From f0ccaa92bce6b0ebb88297b7326f9d0d8fbb298e Mon Sep 17 00:00:00 2001 From: ehouse Date: Sat, 14 Mar 2009 19:27:29 +0000 Subject: [PATCH] Fix so can build iso-8859-2 Polish dicts using make (won't work on BYOD yet): add encoding to emacs mode line and fix the letters, including hard-coding them as decimal numbers until I can figure out how to get perl (in xloc.pm) to emit iso-8859-2 instead of utf8. --- dawg/Polish/Makefile | 21 ++++++++---- dawg/Polish/info.txt | 77 ++++++++++++++++++++++---------------------- 2 files changed, 54 insertions(+), 44 deletions(-) diff --git a/dawg/Polish/Makefile b/dawg/Polish/Makefile index 3cfd60d1f..bee788e10 100644 --- a/dawg/Polish/Makefile +++ b/dawg/Polish/Makefile @@ -1,5 +1,6 @@ -# -*-mode: Makefile -*- -# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved. +# -*- coding: iso-8859-2; mode: Makefile; -*- +# Copyright 2002 - 2009 by Eric House (xwords@eehouse.org). All +# rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -17,16 +18,24 @@ XWLANG=Polish LANGCODE=pl_PL +ENC = ISO-8859-2 -TARGET_TYPE ?= FRANK +# DICT2DAWGARGS = -lang $(LANGCODE) +# DICT2DAWGARGS = -debug + +TARGET_TYPE ?= WINCE include ../Makefile.2to8 include ../Makefile.langcommon -$(XWLANG)Main.dict.gz: slowa.txt.gz - zcat $< | tr -d '\r' | tr [a-z±æê³ñ󶼿] [A-Z¡ÆÊ£ÑÓ¦¬¯] | \ - grep '^[A-PR-UWYZ¡ÆÊ£ÑÓ¦¬¯]*$$' | gzip >$@ +SOURCEDICT ?= $(XWDICTPATH)/Polish/iso-8859-2/slowa.txt.gz + +$(XWLANG)Main.dict.gz: $(SOURCEDICT) + zcat $< | tr -d '\r' \ + | LANG=$(LANGCODE):$(ENC) tr [a±bcædeêfghijkl³mnñoóprs¶tuwyz¼¿] [A¡BCÆDEÊFGHIJKL£MNÑOÓPRS¦TUWYZ¬¯] \ + | LANG=$(LANGCODE):$(ENC) grep '^[A¡BCÆDEÊFGHIJKL£MNÑOÓPRS¦TUWYZ¬¯]*$$' \ + | gzip > $@ # Everything but creating of the Main.dict file is inherited from the # "parent" Makefile.langcommon in the parent directory. diff --git a/dawg/Polish/info.txt b/dawg/Polish/info.txt index 4a92d128c..40cea10c9 100644 --- a/dawg/Polish/info.txt +++ b/dawg/Polish/info.txt @@ -1,5 +1,6 @@ -# -*- mode: conf; -*- -# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved. +# -*- coding: iso-8859-2; mode: conf; -*- +# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights +# reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -22,8 +23,8 @@ CHARSET:iso-8859-2 # deal with DOS files LANGFILTER_PRECLIP: tr -d '\r' | -LANGFILTER_POSTCLIP: | tr [a-z±æê³ñ󶼿] [A-Z¡ÆÊ£ÑÓ¦¬¯] -LANGFILTER_POSTCLIP: | grep '^[A-PR-UWYZ¡ÆÊ£ÑÓ¦¬¯]*$' +LANGFILTER_POSTCLIP: | tr [a-z] [A-Z] +LANGFILTER_POSTCLIP: | grep '^[A-Z]*$' LANGFILTER_POSTCLIP: | tr '\n' '\000' NEEDSSORT:true @@ -39,7 +40,7 @@ LANGINFO: href="mailto:xwords@eehouse.org">me know so that I can get LANGINFO: this working.

LANGINFO:

Note that the blank is the last tile here, while with all -LANGINFO: other languages it's the first.

+LANGINFO: other languages it's the first.

LANGINFO:

Also, please note that we currently require the files you LANGINFO: upload to use the iso-8859-2 character encoding.

@@ -48,40 +49,40 @@ LANGINFO: upload to use the iso-8859-2 character encoding.

# Polish==8. Low byte is padding XLOC_HEADER:0x8800 - -9 1 'A' -1 5 '¡' -2 3 'B' -3 2 'C' -1 6 'Æ' -3 2 'D' -7 1 'E' -1 5 'Ê' -1 5 'F' -2 3 'G' -2 3 'H' -8 1 'I' -2 3 'J' -3 2 'K' -3 2 'L' -2 3 '£' -3 2 'M' -5 1 'N' -1 7 'Ñ' -6 1 'O' -1 5 'Ó' -3 2 'P' -4 1 'R' -4 1 'S' -1 5 '¦' -3 2 'T' -2 3 'U' -4 1 'W' -4 2 'Y' -5 1 'Z' -1 9 '¬' -1 5 '¯' +9 1 'A' +1 5 161 # '¡' +2 3 'B' +3 2 'C' +1 6 198 # 'Æ' +3 2 'D' +7 1 'E' +1 5 202 # 'Ê' +1 5 'F' +2 3 'G' +2 3 'H' +8 1 'I' +2 3 'J' +3 3 'K' +3 2 'L' +2 3 163 # '£' +3 2 'M' +5 1 'N' +1 7 209 # 'Ñ' +6 1 'O' +1 5 211 # 'Ó' +3 2 'P' +4 1 'R' +4 1 'S' +1 5 166 # '¦' +3 2 'T' +2 3 'U' +4 1 'W' +4 2 'Y' +5 1 'Z' +1 9 172 # '¬' +1 5 175 # '¯' + # the blank *must* be last here!!! 2 0 {"_"}