diff --git a/xwords4/dawg/Arabic/info.txt b/xwords4/dawg/Arabic/info.txt index 85b5c0423..7fd5ee708 100644 --- a/xwords4/dawg/Arabic/info.txt +++ b/xwords4/dawg/Arabic/info.txt @@ -15,41 +15,45 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. CHARSET: utf-8 -LANGCODE:ar_AR +LANGCODE:ar +LANGNAME:Arabic XLOC_HEADER:0x8500 +# This looks really weird in most text editors because the mix of +# right-to-left and left-to-right scripts confuses them + -2 8 'ﺀ' -2 10 'ﺃ' -8 1 'ﺍ' -4 2 'ﺏ' -4 2 'ﺕ' -3 2 'ﺙ' -4 1 'ﺝ' -3 1 'ﺡ' -3 1 'ﺥ' -3 2 'ﺩ' -3 3 'ﺫ' -3 2 'ﺭ' -3 3 'ﺯ' -3 2 'ﺱ' -3 3 'ﺵ' -3 4 'ﺹ' -3 4 'ﺽ' -2 4 'ﻁ' -2 5 'ﻅ' -3 4 'ﻉ' -2 8 'ﻍ' -3 3 'ﻑ' -3 3 'ﻕ' -3 4 'ﻙ' -4 1 'ﻝ' -3 1 'ﻡ' -3 1 'ﻥ' -3 1 'ﻩ' -2 10 'ﺅ' -3 1 'ﻭ' -3 1 'ﻯ' -2 6 'ﺉ' -2 0 {"_"} +'ﺀ' 8 2 +'ﺃ' 10 2 +'ﺍ' 1 8 +'ﺏ' 2 4 +'ﺕ' 2 4 +'ﺙ' 2 3 +'ﺝ' 1 4 +'ﺡ' 1 3 +'ﺥ' 1 3 +'ﺩ' 2 3 +'ﺫ' 3 3 +'ﺭ' 2 3 +'ﺯ' 3 3 +'ﺱ' 2 3 +'ﺵ' 3 3 +'ﺹ' 4 3 +'ﺽ' 4 3 +'ﻁ' 4 2 +'ﻅ' 5 2 +'ﻉ' 4 3 +'ﻍ' 8 2 +'ﻑ' 3 3 +'ﻕ' 3 3 +'ﻙ' 4 3 +'ﻝ' 1 4 +'ﻡ' 1 3 +'ﻥ' 1 3 +'ﻩ' 1 3 +'ﺅ' 10 2 +'ﻭ' 1 3 +'ﻯ' 1 3 +'ﺉ' 6 2 +{"_"} 0 2 diff --git a/xwords4/dawg/Catalan/info.txt b/xwords4/dawg/Catalan/info.txt index 9eb79de21..f571e98e5 100644 --- a/xwords4/dawg/Catalan/info.txt +++ b/xwords4/dawg/Catalan/info.txt @@ -16,7 +16,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:ca_ES +LANGCODE:ca +LANGNAME:Catalan CHARSET: utf-8 NEEDSSORT:true @@ -62,33 +63,33 @@ XLOC_HEADER:0x8C00 -2 0 {"_"} -12 1 'A|a' -2 3 'B|b' -3 2 'C|c' -1 10 'Ç|ç' -3 2 'D|d' -13 1 'E|e' -1 4 'F|f' -2 3 'G|g' -1 8 'H|h' -8 1 'I|i' -1 8 'J|j' -4 1 'L|l' -1 10 {"L·L|L-L|ĿL|l·l|l-l|ŀl"} -3 2 'M|m' -6 1 'N|n' -1 10 {"NY|ny|Ny|nY"} -5 1 'O|o' -2 3 'P|p' -1 8 {"QU|qu|Qu|qU"} -8 1 'R|r' -8 1 'S|s' -5 1 'T|t' -4 1 'U|u' -1 4 'V|v' -1 10 'X|x' -1 8 'Z|z' +{"_"} 0 2 +'A|a' 1 12 +'B|b' 3 2 +'C|c' 2 3 +'Ç|ç' 10 1 +'D|d' 2 3 +'E|e' 1 13 +'F|f' 4 1 +'G|g' 3 2 +'H|h' 8 1 +'I|i' 1 8 +'J|j' 8 1 +'L|l' 1 4 +{"L·L|L-L|ĿL|l·l|l-l|ŀl"} 10 1 +'M|m' 2 3 +'N|n' 1 6 +{"NY|ny|Ny|nY"} 10 1 +'O|o' 1 5 +'P|p' 3 2 +{"QU|qu|Qu|qU"} 8 1 +'R|r' 1 8 +'S|s' 1 8 +'T|t' 1 5 +'U|u' 1 4 +'V|v' 4 1 +'X|x' 10 1 +'Z|z' 8 1 # # NOTES: diff --git a/xwords4/dawg/Czech/Makefile b/xwords4/dawg/Czech/Makefile deleted file mode 100644 index 191d8e956..000000000 --- a/xwords4/dawg/Czech/Makefile +++ /dev/null @@ -1,41 +0,0 @@ -# -*-mode: Makefile; coding: utf-8; -*- -# Copyright 2002-2008 by Eric House (xwords@eehouse.org). All rights reserved. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -XWLANG=Czech -LANGCODE=cs_CZ -ENC = UTF-8 - -TARGET_TYPE ?= WINCE - -include ../Makefile.langcommon - -SOURCEDICT ?= $(XWDICTPATH)/Czech/Czech.2-1-6.dict.gz - -$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile - zcat $< | tr -d '\r' | \ - sed 's,.,\U\0,g' | \ - grep '^[AÁBCČDĎEÉĚFGHIÍJKLMNŇOÓPRŘSŠTŤUÚŮVXYÝZŽ]*$$' | \ - gzip -c > $@ - -# Everything but creating of the Main.dict file is inherited from the -# "parent" Makefile.langcommon in the parent directory. - -clean: clean_common - rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb - -help: - @echo 'make [SOURCEDICT=$(XWDICTPATH)/$(XWLANG)/czech2_5.dict.gz]' diff --git a/xwords4/dawg/Czech/Makefile b/xwords4/dawg/Czech/Makefile new file mode 120000 index 000000000..5bdfd75b5 --- /dev/null +++ b/xwords4/dawg/Czech/Makefile @@ -0,0 +1 @@ +Makefile.blex \ No newline at end of file diff --git a/xwords4/dawg/Czech/Makefile.Czech b/xwords4/dawg/Czech/Makefile.Czech new file mode 100644 index 000000000..191d8e956 --- /dev/null +++ b/xwords4/dawg/Czech/Makefile.Czech @@ -0,0 +1,41 @@ +# -*-mode: Makefile; coding: utf-8; -*- +# Copyright 2002-2008 by Eric House (xwords@eehouse.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +XWLANG=Czech +LANGCODE=cs_CZ +ENC = UTF-8 + +TARGET_TYPE ?= WINCE + +include ../Makefile.langcommon + +SOURCEDICT ?= $(XWDICTPATH)/Czech/Czech.2-1-6.dict.gz + +$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile + zcat $< | tr -d '\r' | \ + sed 's,.,\U\0,g' | \ + grep '^[AÁBCČDĎEÉĚFGHIÍJKLMNŇOÓPRŘSŠTŤUÚŮVXYÝZŽ]*$$' | \ + gzip -c > $@ + +# Everything but creating of the Main.dict file is inherited from the +# "parent" Makefile.langcommon in the parent directory. + +clean: clean_common + rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb + +help: + @echo 'make [SOURCEDICT=$(XWDICTPATH)/$(XWLANG)/czech2_5.dict.gz]' diff --git a/xwords4/dawg/Czech/blex.mk b/xwords4/dawg/Czech/Makefile.blex similarity index 95% rename from xwords4/dawg/Czech/blex.mk rename to xwords4/dawg/Czech/Makefile.blex index d87277ecd..89b60fb01 100644 --- a/xwords4/dawg/Czech/blex.mk +++ b/xwords4/dawg/Czech/Makefile.blex @@ -25,12 +25,12 @@ TARGET_TYPE ?= WINCE include ../Makefile.langcommon -SOURCEDICT ?= $(XWDICTPATH)/Czech/blex.dict.gz +SOURCEDICT ?= $(XWDICTPATH)/Czech/blex.dict all: $(XWLANG)2to5.xwd $(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile - zcat $< | tr -d '\r' | \ + cat $< | tr -d '\r' | \ sed 's,.,\U\0,g' | \ grep '^[AÁBCČDĎEÉĚFGHIÍJKLMNŇOÓPRŘSŠTŤUÚŮVXYÝZŽ]*$$' | \ gzip -c > $@ diff --git a/xwords4/dawg/Czech/info.txt b/xwords4/dawg/Czech/info.txt index 0a13e1cee..565925608 100644 --- a/xwords4/dawg/Czech/info.txt +++ b/xwords4/dawg/Czech/info.txt @@ -15,7 +15,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:cs_CZ +LANGCODE:cs +LANGNAME:Czech CHARSET:utf-8 # deal with DOS files @@ -39,46 +40,46 @@ LANGINFO: Unicode-aware systems.

# Czech-ISO8859-2==0x11. Low byte is padding. XLOC_HEADER:0x9100 -#COUNT VAL FACE +#FACE VAL COUNT[15] -2 0 {"_"} -5 1 'A' -2 2 'Á' -2 3 'B' -3 2 'C' -1 4 'Č' -3 1 'D' -1 8 'Ď' -5 1 'E' -2 3 'É' -2 3 'Ě' -1 5 'F' -1 5 'G' -3 2 'H' -4 1 'I' -3 2 'Í' -2 2 'J' -3 1 'K' -3 1 'L' -3 2 'M' -5 1 'N' -1 6 'Ň' -6 1 'O' -1 7 'Ó' -3 1 'P' -3 1 'R' -2 4 'Ř' -4 1 'S' -2 4 'Š' -4 1 'T' -1 7 'Ť' -3 2 'U' -1 5 'Ú' -1 4 'Ů' -4 1 'V' -1 10 'X' -2 2 'Y' -2 4 'Ý' -2 2 'Z' -1 4 'Ž' +{"_"} 0 2 +'A' 1 5 +'Á' 2 2 +'B' 3 2 +'C' 2 3 +'Č' 4 1 +'D' 1 3 +'Ď' 8 1 +'E' 1 5 +'É' 3 2 +'Ě' 3 2 +'F' 5 1 +'G' 5 1 +'H' 2 3 +'I' 1 4 +'Í' 2 3 +'J' 2 2 +'K' 1 3 +'L' 1 3 +'M' 2 3 +'N' 1 5 +'Ň' 6 1 +'O' 1 6 +'Ó' 7 1 +'P' 1 3 +'R' 1 3 +'Ř' 4 2 +'S' 1 4 +'Š' 4 2 +'T' 1 4 +'Ť' 7 1 +'U' 2 3 +'Ú' 5 1 +'Ů' 4 1 +'V' 1 4 +'X' 10 1 +'Y' 2 2 +'Ý' 4 2 +'Z' 2 2 +'Ž' 4 1 diff --git a/xwords4/dawg/Danish/info.txt b/xwords4/dawg/Danish/info.txt index 964a67dce..d20365983 100644 --- a/xwords4/dawg/Danish/info.txt +++ b/xwords4/dawg/Danish/info.txt @@ -1,3 +1,4 @@ +# -*- mode: conf; coding: utf-8; -*- # Copyright 2005 by Eric House (xwords@eehouse.org). All rights reserved. # # This program is free software; you can redistribute it and/or @@ -14,7 +15,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:da_DK +LANGCODE:da +LANGNAME:Danish CHARSET: utf-8 # deal with DOS files @@ -42,33 +44,33 @@ LANGINFO: are three non-English letters: 'Å', 'Æ' and 'Ø'.

XLOC_HEADER:0x8900 -2 0 {"_"} -7 1 'A|a' -2 4 'Å|å' -2 4 'Æ|æ' -4 3 'B|b' -2 8 'C|c' -5 2 'D|d' -9 1 'E|e' -3 3 'F|f' -3 3 'G|g' -2 4 'H|h' -4 3 'I|i' -2 4 'J|j' -4 3 'K|k' -5 2 'L|l' -3 3 'M|m' -6 1 'N|n' -5 2 'O|o' -2 4 'Ø|ø' -2 4 'P|p' -6 1 'R|r' -5 2 'S|s' -5 2 'T|t' -3 3 'U|u' -3 3 'V|v' -1 8 'X|x' -2 4 'Y|y' -1 8 'Z|z' +{"_"} 0 2 +'A|a' 1 7 +'Å|å' 4 2 +'Æ|æ' 4 2 +'B|b' 3 4 +'C|c' 8 2 +'D|d' 2 5 +'E|e' 1 9 +'F|f' 3 3 +'G|g' 3 3 +'H|h' 4 2 +'I|i' 3 4 +'J|j' 4 2 +'K|k' 3 4 +'L|l' 2 5 +'M|m' 3 3 +'N|n' 1 6 +'O|o' 2 5 +'Ø|ø' 4 2 +'P|p' 4 2 +'R|r' 1 6 +'S|s' 2 5 +'T|t' 2 5 +'U|u' 3 3 +'V|v' 3 3 +'X|x' 8 1 +'Y|y' 4 2 +'Z|z' 8 1 # should ignore all after the above diff --git a/xwords4/dawg/Dutch/info.txt b/xwords4/dawg/Dutch/info.txt index 0077fd164..cc9bc3df0 100644 --- a/xwords4/dawg/Dutch/info.txt +++ b/xwords4/dawg/Dutch/info.txt @@ -15,8 +15,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:nl_NL - +LANGNAME:Danish +LANGCODE:nl # deal with DOS files LANGFILTER: tr -d '\r' @@ -41,32 +41,32 @@ XLOC_HEADER:0x8B00 -2 0 {"_"} -6 1 'A|a' -2 3 'B|b' -2 5 'C|c' -5 2 'D|d' -18 1 'E|e' -2 4 'F|f' -3 3 'G|g' -2 4 'H|h' -4 1 'I|i' -2 4 'J|j' -3 3 'K|k' -3 3 'L|l' -3 3 'M|m' -10 1 'N|n' -6 1 'O|o' -2 3 'P|p' -1 10 'Q|q' -5 2 'R|r' -5 2 'S|s' -5 2 'T|t' -3 4 'U|u' -2 4 'V|v' -2 5 'W|w' -1 8 'X|x' -1 8 'Y|y' -2 4 'Z|z' +{"_"} 0 2 +'A|a' 1 6 +'B|b' 3 2 +'C|c' 5 2 +'D|d' 2 5 +'E|e' 1 18 +'F|f' 4 2 +'G|g' 3 3 +'H|h' 4 2 +'I|i' 1 4 +'J|j' 4 2 +'K|k' 3 3 +'L|l' 3 3 +'M|m' 3 3 +'N|n' 1 10 +'O|o' 1 6 +'P|p' 3 2 +'Q|q' 10 1 +'R|r' 2 5 +'S|s' 2 5 +'T|t' 2 5 +'U|u' 4 3 +'V|v' 4 2 +'W|w' 5 2 +'X|x' 8 1 +'Y|y' 8 1 +'Z|z' 4 2 # should ignore all after the above diff --git a/xwords4/dawg/English/info.txt b/xwords4/dawg/English/info.txt index 40593ae95..a7a857f0c 100644 --- a/xwords4/dawg/English/info.txt +++ b/xwords4/dawg/English/info.txt @@ -15,7 +15,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:en_US +LANGCODE:en +LANGNAME:English # deal with DOS files LANGFILTER: tr -d '\r' @@ -39,33 +40,33 @@ LANGINFO: will simply be excluded from the dictionary.

XLOC_HEADER:0x8100 -2 0 {"_"} -9 1 'A|a' -2 3 'B|b' -2 3 'C|c' -4 2 'D|d' -12 1 'E|e' -2 4 'F|f' -3 2 'G|g' -2 4 'H|h' -9 1 'I|i' -1 8 'J|j' -1 5 'K|k' -4 1 'L|l' -2 3 'M|m' -6 1 'N|n' -8 1 'O|o' -2 3 'P|p' -1 10 'Q|q' -6 1 'R|r' -4 1 'S|s' -6 1 'T|t' -4 1 'U|u' -2 4 'V|v' -2 4 'W|w' -1 8 'X|x' -2 4 'Y|y' -1 10 'Z|z' +{"_"} 0 2 +'A|a' 1 9 +'B|b' 3 2 +'C|c' 3 2 +'D|d' 2 4 +'E|e' 1 12 +'F|f' 4 2 +'G|g' 2 3 +'H|h' 4 2 +'I|i' 1 9 +'J|j' 8 1 +'K|k' 5 1 +'L|l' 1 4 +'M|m' 3 2 +'N|n' 1 6 +'O|o' 1 8 +'P|p' 3 2 +'Q|q' 10 1 +'R|r' 1 6 +'S|s' 1 4 +'T|t' 1 6 +'U|u' 1 4 +'V|v' 4 2 +'W|w' 4 2 +'X|x' 8 1 +'Y|y' 4 2 +'Z|z' 10 1 # should ignore all after the above diff --git a/xwords4/dawg/Finnish/Makefile b/xwords4/dawg/Finnish/Makefile index a707e9876..e9b547712 100644 --- a/xwords4/dawg/Finnish/Makefile +++ b/xwords4/dawg/Finnish/Makefile @@ -16,7 +16,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -XWLANG=Finish +XWLANG=Kotus_2007 LANGCODE=fi_FI TARGET_TYPE ?= WINCE ENC = UTF-8 diff --git a/xwords4/dawg/Finnish/info.txt b/xwords4/dawg/Finnish/info.txt index e111545c7..918952ab3 100644 --- a/xwords4/dawg/Finnish/info.txt +++ b/xwords4/dawg/Finnish/info.txt @@ -17,6 +17,7 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. LANGCODE:fi +LANGNAME:Finnish CHARSET: utf-8 LANGFILTER: tr -d '\r' @@ -32,30 +33,30 @@ XLOC_HEADER:0x9900 # From wikipedia -2 0 {"_"} -10 1 'A|a' -5 2 'Ä|ä' -1 8 'B|b' -1 10 'C|c' -1 7 'D|d' -8 1 'E|e' -1 8 'F|f' -1 8 'G|g' -2 4 'H|h' -10 1 'I|i' -2 4 'J|j' -5 2 'K|k' -5 2 'L|l' -3 3 'M|m' -9 1 'N|n' -5 2 'O|o' -1 7 'Ö|ö' -2 4 'P|p' -2 4 'R|r' -7 1 'S|s' -9 1 'T|t' -4 3 'U|u' -2 4 'V|v' -1 8 'W|w' -2 4 'Y|y' +{"_"} 0 2 +'A|a' 1 10 +'Ä|ä' 2 5 +'B|b' 8 1 +'C|c' 10 1 +'D|d' 7 1 +'E|e' 1 8 +'F|f' 8 1 +'G|g' 8 1 +'H|h' 4 2 +'I|i' 1 10 +'J|j' 4 2 +'K|k' 2 5 +'L|l' 2 5 +'M|m' 3 3 +'N|n' 1 9 +'O|o' 2 5 +'Ö|ö' 7 1 +'P|p' 4 2 +'R|r' 4 2 +'S|s' 1 7 +'T|t' 1 9 +'U|u' 3 4 +'V|v' 4 2 +'W|w' 8 1 +'Y|y' 4 2 diff --git a/xwords4/dawg/French/Makefile b/xwords4/dawg/French/Makefile deleted file mode 100644 index 9d68c410f..000000000 --- a/xwords4/dawg/French/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -# -*-mode: Makefile -*- -# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -XWLANG=French -LANGCODE=fr_FR - -TARGET_TYPE ?= FRANK - -include ../Makefile.langcommon - -$(XWLANG)Main.dict.gz: ods3.txt.gz - zcat $< | sed 's/[[:lower:]]*/\U&/' | gzip >$@ - -# Everything but creating of the Main.dict file is inherited from the -# "parent" Makefile.langcommon in the parent directory. - -clean: clean_common - rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb diff --git a/xwords4/dawg/French/Makefile b/xwords4/dawg/French/Makefile new file mode 120000 index 000000000..26f37c6ac --- /dev/null +++ b/xwords4/dawg/French/Makefile @@ -0,0 +1 @@ +Makefile.ODS7 \ No newline at end of file diff --git a/xwords4/dawg/French/info.txt b/xwords4/dawg/French/info.txt index b48611b91..51f2c3e3d 100755 --- a/xwords4/dawg/French/info.txt +++ b/xwords4/dawg/French/info.txt @@ -14,7 +14,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:fr_FR +LANGCODE:fr +LANGNAME:French # deal with DOS files LANGFILTER: tr -d '\r' @@ -37,31 +38,31 @@ LANGINFO: dropping those words from the list prior to compression.

XLOC_HEADER:0x8200 -2 0 {"_"} -9 1 'A|a' -2 3 'B|b' -2 3 'C|c' -3 2 'D|d' -15 1 'E|e' -2 4 'F|f' -2 2 'G|g' -2 4 'H|h' -8 1 'I|i' -1 8 'J|j' -1 10 'K|k' -5 1 'L|l' -3 2 'M|m' -6 1 'N|n' -6 1 'O|o' -2 3 'P|p' -1 8 'Q|q' -6 1 'R|r' -6 1 'S|s' -6 1 'T|t' -6 1 'U|u' -2 4 'V|v' -1 10 'W|w' -1 10 'X|x' -1 10 'Y|y' -1 10 'Z|z' +{"_"} 0 2 +'A|a' 1 9 +'B|b' 3 2 +'C|c' 3 2 +'D|d' 2 3 +'E|e' 1 15 +'F|f' 4 2 +'G|g' 2 2 +'H|h' 4 2 +'I|i' 1 8 +'J|j' 8 1 +'K|k' 10 1 +'L|l' 1 5 +'M|m' 2 3 +'N|n' 1 6 +'O|o' 1 6 +'P|p' 3 2 +'Q|q' 8 1 +'R|r' 1 6 +'S|s' 1 6 +'T|t' 1 6 +'U|u' 1 6 +'V|v' 4 2 +'W|w' 10 1 +'X|x' 10 1 +'Y|y' 10 1 +'Z|z' 10 1 diff --git a/xwords4/dawg/German/Makefile b/xwords4/dawg/German/Makefile deleted file mode 100644 index e29d7242e..000000000 --- a/xwords4/dawg/German/Makefile +++ /dev/null @@ -1,44 +0,0 @@ -# -*- mode: makefile; coding: utf-8; -*- -# Copyright 2002 - 2010 by Eric House (xwords@eehouse.org). All -# rights reserved. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -XWLANG = German -LANGCODE = de_DE -ENC = UTF-8 - -TARGET_TYPE ?= WINCE - -include ../Makefile.langcommon - -SOURCEDICT ?= $(XWDICTPATH)/German/HansGerman.dict.gz - -$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile - file -z $(SOURCEDICT) | grep -q 'UTF-8 Unicode text' || exit 1 - zcat $< \ - | sed 's,.,\U\0,g' \ - | sed -e 's/ß/SS/g' \ - | grep '^[AÄBCDEFGHIJKLMNOÖPQRSTUÜVWXYZ]*$$' \ - | gzip -c > $@ - -# Everything but creating of the Main.dict file is inherited from the -# "parent" Makefile.langcommon in the parent directory. - -clean: clean_common - rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb - -help: - @echo 'make [SOURCEDICT=HansGerman.dict.gz|deutsch.dict.gz]' diff --git a/xwords4/dawg/German/Makefile b/xwords4/dawg/German/Makefile new file mode 120000 index 000000000..dda6ab36d --- /dev/null +++ b/xwords4/dawg/German/Makefile @@ -0,0 +1 @@ +Makefile.sf \ No newline at end of file diff --git a/xwords4/dawg/German/Makefile.hansGerman b/xwords4/dawg/German/Makefile.hansGerman new file mode 100644 index 000000000..e29d7242e --- /dev/null +++ b/xwords4/dawg/German/Makefile.hansGerman @@ -0,0 +1,44 @@ +# -*- mode: makefile; coding: utf-8; -*- +# Copyright 2002 - 2010 by Eric House (xwords@eehouse.org). All +# rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +XWLANG = German +LANGCODE = de_DE +ENC = UTF-8 + +TARGET_TYPE ?= WINCE + +include ../Makefile.langcommon + +SOURCEDICT ?= $(XWDICTPATH)/German/HansGerman.dict.gz + +$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile + file -z $(SOURCEDICT) | grep -q 'UTF-8 Unicode text' || exit 1 + zcat $< \ + | sed 's,.,\U\0,g' \ + | sed -e 's/ß/SS/g' \ + | grep '^[AÄBCDEFGHIJKLMNOÖPQRSTUÜVWXYZ]*$$' \ + | gzip -c > $@ + +# Everything but creating of the Main.dict file is inherited from the +# "parent" Makefile.langcommon in the parent directory. + +clean: clean_common + rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb + +help: + @echo 'make [SOURCEDICT=HansGerman.dict.gz|deutsch.dict.gz]' diff --git a/xwords4/dawg/German/info.txt b/xwords4/dawg/German/info.txt index 383266afe..901a514a9 100644 --- a/xwords4/dawg/German/info.txt +++ b/xwords4/dawg/German/info.txt @@ -16,7 +16,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:de_DE +LANGCODE:de +LANGNAME:German CHARSET: utf-8 # deal with DOS files @@ -46,37 +47,36 @@ LANGINFO: contain letters not found on tiles.

# German==3. Low byte is padding XLOC_HEADER:0x8300 - -2 0 {"_"} -5 1 'A|a' -1 6 'Ä|ä' -2 3 'B|b' -2 4 'C|c' -4 1 'D|d' -15 1 'E|e' -2 4 'F|f' -3 2 'G|g' -4 2 'H|h' -6 1 'I|i' -1 6 'J|j' -2 4 'K|k' -3 2 'L|l' -4 3 'M|m' -9 1 'N|n' -3 2 'O|o' -1 8 'Ö|ö' -1 4 'P|p' -1 10 'Q|q' -6 1 'R|r' -7 1 'S|s' -6 1 'T|t' -6 1 'U|u' -1 6 'Ü|ü' -1 6 'V|v' -1 3 'W|w' -1 8 'X|x' -1 10 'Y|y' -1 3 'Z|z' +{"_"} 0 2 +'A|a' 1 5 +'Ä|ä' 6 1 +'B|b' 3 2 +'C|c' 4 2 +'D|d' 1 4 +'E|e' 1 15 +'F|f' 4 2 +'G|g' 2 3 +'H|h' 2 4 +'I|i' 1 6 +'J|j' 6 1 +'K|k' 4 2 +'L|l' 2 3 +'M|m' 3 4 +'N|n' 1 9 +'O|o' 2 3 +'Ö|ö' 8 1 +'P|p' 4 1 +'Q|q' 10 1 +'R|r' 1 6 +'S|s' 1 7 +'T|t' 1 6 +'U|u' 1 6 +'Ü|ü' 6 1 +'V|v' 6 1 +'W|w' 3 1 +'X|x' 8 1 +'Y|y' 10 1 +'Z|z' 3 1 # should ignore all after the above diff --git a/xwords4/dawg/Hex/Makefile b/xwords4/dawg/Hex/Makefile index 460e3eea7..23c159a91 100644 --- a/xwords4/dawg/Hex/Makefile +++ b/xwords4/dawg/Hex/Makefile @@ -28,7 +28,7 @@ include ../Makefile.langcommon # Pass in your own dict here by setting DICT # DICT ?= $(XWDICTPATH)/English/CSW.dict.gz -DICT ?= $(XWDICTPATH)/English/COSD.dict.gz +DICT ?= $(XWDICTPATH)/English/CSW15.dict # tr 'AE' 'ÄË' doesn't work, so use sed. Note that although we're # pulling words up to 15 letters in length there are none longer than @@ -36,12 +36,10 @@ DICT ?= $(XWDICTPATH)/English/COSD.dict.gz # only one is worth publishing. $(XWLANG)Main.dict.gz: $(DICT) @echo "building $@ from $<" - zcat $< | \ + cat $< | \ sed 's/[[:lower:]]*/\U&/' | \ grep -e '^[A-F]\{2,15\}$$' | \ echo CAFEBABE DEADBEEF $$(cat -) | \ - sed 's/A/Ä/g' | \ - sed 's/E/Ë/g' | \ tr ' ' '\n' | sort | gzip > $@ # Everything but creating of the Main.dict file is inherited from the diff --git a/xwords4/dawg/Hex/info.txt b/xwords4/dawg/Hex/info.txt index 3d5c6be9b..40aedf621 100755 --- a/xwords4/dawg/Hex/info.txt +++ b/xwords4/dawg/Hex/info.txt @@ -16,13 +16,12 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:HEX +LANGCODE:hex +LANGNAME:Hex # uppercase all LANGFILTER: | sed -e 's/[[:lower:]]*/\U&/' LANGFILTER: | grep -x '[A-F]\{2,15\}' -LANGFILTER: | sed 's/A/Ä/' -LANGFILTER: | sed 's/E/Ë/' LANGFILTER: | sort -u D2DARGS: -term 10 @@ -50,12 +49,12 @@ XLOC_HEADER:0xFF00 -4 0 {"_"} -9 1 'Ä|ä' -2 3 'B|b' -2 3 'C|c' -4 2 'D|d' -12 1 'Ë|ë' -2 4 'F|f' +{"_"} 0 4 +'A|a' 1 9 +'B|b' 3 2 +'C|c' 3 2 +'D|d' 2 4 +'E|e' 1 12 +'F|f' 4 2 # should ignore all after the above diff --git a/xwords4/dawg/Hungarian/info.txt b/xwords4/dawg/Hungarian/info.txt index 44c5e975a..b2e53743d 100644 --- a/xwords4/dawg/Hungarian/info.txt +++ b/xwords4/dawg/Hungarian/info.txt @@ -1,6 +1,7 @@ # -*- mode: conf; coding: utf-8; -*- -LANGCODE:hu_HU +LANGCODE:hu +LANGNAME:Hungarian CHARSET: utf-8 BUILD_FLAGS:ALLOWS_DUPLICATES @@ -22,43 +23,43 @@ LANGFILTER: | tr '1234567' '\001\002\003\004\005\006\007' XLOC_HEADER:0x9400 -2 0 {"_"} -6 1 'A|a' -4 1 'Á|á' -3 2 'B|b' -1 5 'C|c' -1 7 {"CS|cs"} -3 2 'D|d' -6 1 'E|e' -3 3 'É|é' -2 4 'F|f' -3 2 'G|g' -2 4 {"GY|gy"} -2 3 'H|h' -3 1 'I|i' -1 5 'Í|í' -2 4 'J|j' -6 1 'K|k' -4 1 'L|l' -1 8 {"LY|ly"} -3 1 'M|m' -4 1 'N|n' -1 5 {"NY|ny"} -3 1 'O|o' -3 2 'Ó|ó' -2 4 'Ö|ö' -1 7 'Ő|ö' -2 4 'P|p' -4 1 'R|r' -3 1 'S|s' -2 3 {"SZ|sz"} -5 1 'T|t' -1 10 {"TY|ty"} -2 4 'U|u' -1 7 'Ú|ú' -2 4 'Ü|ü' -1 7 'Ű|ű' -2 3 'V|v' -2 4 'Z|z' -1 8 {"ZS|zs"} +{"_"} 0 2 +'A|a' 1 6 +'Á|á' 1 4 +'B|b' 2 3 +'C|c' 5 1 +{"CS|cs"} 7 1 +'D|d' 2 3 +'E|e' 1 6 +'É|é' 3 3 +'F|f' 4 2 +'G|g' 2 3 +{"GY|gy"} 4 2 +'H|h' 3 2 +'I|i' 1 3 +'Í|í' 5 1 +'J|j' 4 2 +'K|k' 1 6 +'L|l' 1 4 +{"LY|ly"} 8 1 +'M|m' 1 3 +'N|n' 1 4 +{"NY|ny"} 5 1 +'O|o' 1 3 +'Ó|ó' 2 3 +'Ö|ö' 4 2 +'Ő|ö' 7 1 +'P|p' 4 2 +'R|r' 1 4 +'S|s' 1 3 +{"SZ|sz"} 3 2 +'T|t' 1 5 +{"TY|ty"} 10 1 +'U|u' 4 2 +'Ú|ú' 7 1 +'Ü|ü' 4 2 +'Ű|ű' 7 1 +'V|v' 3 2 +'Z|z' 4 2 +{"ZS|zs"} 8 1 diff --git a/xwords4/dawg/Hëx/Makefile b/xwords4/dawg/Hëx/Makefile index 22e8ed4f6..89ec688ed 100644 --- a/xwords4/dawg/Hëx/Makefile +++ b/xwords4/dawg/Hëx/Makefile @@ -25,12 +25,12 @@ TARGET_TYPE = WINCE include ../Makefile.langcommon # Pass in your own dict here by setting DICT -DICT ?= $(XWDICTPATH)/English/CSW.dict.gz +DICT ?= $(XWDICTPATH)/English/CSW15.dict # tr 'AE' 'ÄË' doesn't work, so use sed. $(XWLANG)Main.dict.gz: $(DICT) @echo "building $@ from $<" - zcat $< | sed 's/[[:lower:]]*/\U&/' | grep -e '^[A-F]\{2,8\}$$' | \ + cat $< | sed 's/[[:lower:]]*/\U&/' | grep -e '^[A-F]\{2,8\}$$' | \ echo CAFEBABE DEADBEEF $$(cat -) | \ sed 's/A/Ä/g' | \ sed 's/E/Ë/g' | \ diff --git a/xwords4/dawg/Hëx/info.txt b/xwords4/dawg/Hëx/info.txt index bd699aa11..6a047e879 100644 --- a/xwords4/dawg/Hëx/info.txt +++ b/xwords4/dawg/Hëx/info.txt @@ -16,7 +16,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:HEX +LANGCODE:hë +LANGNAME:Hëx # uppercase all LANGFILTER: | sed -e 's/[[:lower:]]*/\U&/' @@ -50,12 +51,12 @@ XLOC_HEADER:0xFF00 -4 0 {"_"} -9 1 'Ä' -2 3 'B' -2 3 'C' -4 2 'D' -12 1 'Ë' -2 4 'F' +{"_"} 0 4 +'Ä|ä' 1 9 +'B|b' 3 2 +'C|c' 3 2 +'D|d' 2 4 +'Ë|ë' 1 12 +'F|f' 4 2 # should ignore all after the above diff --git a/xwords4/dawg/Italian/info.txt b/xwords4/dawg/Italian/info.txt index eb829b26e..e0f18e57c 100755 --- a/xwords4/dawg/Italian/info.txt +++ b/xwords4/dawg/Italian/info.txt @@ -15,7 +15,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:it_IT +LANGCODE:it +LANGNAME:Italian # deal with DOS files LANGFILTER: tr -d '\r' @@ -35,31 +36,31 @@ XLOC_HEADER:0x8A00 # tile values taken from http://www.gtoal.com/wordgames/details/italian/ -2 0 {"_"} -13 1 'A|a' -3 5 'B|b' -4 4 'C|c' -3 5 'D|d' -13 1 'E|e' -2 8 'F|f' -3 5 'G|g' -2 8 'H|h' -13 1 'I|i' -0 1 'J|j' -0 1 'K|k' -5 3 'L|l' -5 3 'M|m' -6 2 'N|n' -13 1 'O|o' -3 5 'P|p' -1 10 'Q|q' -6 2 'R|r' -6 2 'S|s' -6 2 'T|t' -5 3 'U|u' -4 4 'V|v' -0 1 'W|w' -0 1 'X|x' -0 1 'Y|y' -2 8 'Z|z' +{"_"} 0 2 +'A|a' 1 13 +'B|b' 5 3 +'C|c' 4 4 +'D|d' 5 3 +'E|e' 1 13 +'F|f' 8 2 +'G|g' 5 3 +'H|h' 8 2 +'I|i' 1 13 +'J|j' 1 0 +'K|k' 1 0 +'L|l' 3 5 +'M|m' 3 5 +'N|n' 2 6 +'O|o' 1 13 +'P|p' 5 3 +'Q|q' 10 1 +'R|r' 2 6 +'S|s' 2 6 +'T|t' 2 6 +'U|u' 3 5 +'V|v' 4 4 +'W|w' 1 0 +'X|x' 1 0 +'Y|y' 1 0 +'Z|z' 8 2 diff --git a/xwords4/dawg/Makefile.langcommon b/xwords4/dawg/Makefile.langcommon index 45d26f033..4f9250fb0 100644 --- a/xwords4/dawg/Makefile.langcommon +++ b/xwords4/dawg/Makefile.langcommon @@ -220,7 +220,8 @@ endif frankspecials.bin: ../frank_mkspecials.py $(BMPFILES) $< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@ -$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin charcount.bin table.bin values.bin frankspecials.bin +$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin \ + charcount.bin table.bin values.bin frankspecials.bin cat $(XWLANG)$*_flags.bin $(XWLANG)$*_newheader.bin charcount.bin table.bin values.bin \ frankspecials.bin $(XWLANG)StartLoc.bin \ $$(ls dawg$(XWLANG)$*_*.bin) > $@ @@ -273,6 +274,9 @@ allbins: $(MAKE) TARGET_TYPE=FRANK byodbins rm palmspecials.bin +extraCounts.bin: ../xloc.py info.txt + ../xloc.py -ec -out $@ + table.bin: ../xloc.py ifdef NEWDAWG ../xloc.py $(ENCP) -tn -out $@ @@ -298,6 +302,15 @@ $(XWLANG)%_note.bin: echo -n $(DICTNOTE) > $@ perl -e "print pack(\"c\",0)" >> $@ +langCode.bin: ../xloc.py + ../xloc.py -info LANGCODE -out $@ + +langName.bin: ../xloc.py + ../xloc.py -info LANGNAME -out $@ + +otherCounts.bin: ../xloc.py + ../xloc.py -oc -out otherCounts.bin + $(XWLANG)%_md5sum.bin: cat table.bin values.bin frankspecials.bin $(XWLANG)StartLoc.bin \ dawg$(XWLANG)$*_*.bin | md5sum | awk '{print $$1}' | tr -d '\n' > $@ @@ -308,7 +321,8 @@ $(XWLANG)%_headerFlags.bin: perl -e "print pack(\"n\",$$FLAGS)" > $@ $(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin \ - $(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin + $(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin langCode.bin \ + langName.bin otherCounts.bin SIZ=0; \ for FILE in $+; do \ SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \ diff --git a/xwords4/dawg/Polish/info.txt b/xwords4/dawg/Polish/info.txt index d0e624aaf..97206f3de 100644 --- a/xwords4/dawg/Polish/info.txt +++ b/xwords4/dawg/Polish/info.txt @@ -16,7 +16,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:pl_PL +LANGCODE:pl +LANGNAME:Polish CHARSET:utf-8 # deal with DOS files @@ -50,39 +51,39 @@ LANGINFO: other languages it's the first.

XLOC_HEADER:0x8800 -9 1 'A|a' -1 5 'Ą|ą' -2 3 'B|b' -3 2 'C|c' -1 6 'Ć|ć' -3 2 'D|d' -7 1 'E|e' -1 5 'Ę|ę' -1 5 'F|f' -2 3 'G|g' -2 3 'H|h' -8 1 'I|i' -2 3 'J|j' -3 3 'K|k' -3 2 'L|l' -2 3 'Ł|ł' -3 2 'M|m' -5 1 'N|n' -1 7 'Ń|ń' -6 1 'O|o' -1 5 'Ó|ó' -3 2 'P|p' -4 1 'R|r' -4 1 'S|s' -1 5 'Ś|ś' -3 2 'T|t' -2 3 'U|u' -4 1 'W|w' -4 2 'Y|y' -5 1 'Z|z' -1 9 'Ź|ź' -1 5 'Ż|ż' +'A|a' 1 9 +'Ą|ą' 5 1 +'B|b' 3 2 +'C|c' 2 3 +'Ć|ć' 6 1 +'D|d' 2 3 +'E|e' 1 7 +'Ę|ę' 5 1 +'F|f' 5 1 +'G|g' 3 2 +'H|h' 3 2 +'I|i' 1 8 +'J|j' 3 2 +'K|k' 3 3 +'L|l' 2 3 +'Ł|ł' 3 2 +'M|m' 2 3 +'N|n' 1 5 +'Ń|ń' 7 1 +'O|o' 1 6 +'Ó|ó' 5 1 +'P|p' 2 3 +'R|r' 1 4 +'S|s' 1 4 +'Ś|ś' 5 1 +'T|t' 2 3 +'U|u' 3 2 +'W|w' 1 4 +'Y|y' 2 4 +'Z|z' 1 5 +'Ź|ź' 9 1 +'Ż|ż' 5 1 # the blank *must* be last here!!! -2 0 {"_"} +{"_"} 0 2 diff --git a/xwords4/dawg/Portuguese/info.txt b/xwords4/dawg/Portuguese/info.txt index 1a6b1aecb..a5734c4d1 100644 --- a/xwords4/dawg/Portuguese/info.txt +++ b/xwords4/dawg/Portuguese/info.txt @@ -14,7 +14,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:pt_PT +LANGCODE:pt +LANGNAME:Portuguese CHARSET: utf-8 # deal with DOS files @@ -41,31 +42,31 @@ XLOC_HEADER:0x8D00 -3 0 {"_"} -14 1 'A|a' -3 3 'B|b' -4 2 'C|c' -2 3 'Ç|ç' -5 2 'D|d' -11 1 'E|e' -2 4 'F|f' -2 4 'G|g' -2 4 'H|h' -10 1 'I|i' -2 5 'J|j' -5 2 'L|l' -6 1 'M|m' -4 3 'N|n' -10 1 'O|o' -4 2 'P|p' -1 6 'Q|q' -6 1 'R|r' -8 1 'S|s' -5 1 'T|t' -7 1 'U|u' -2 4 'V|v' -1 8 'X|x' -1 8 'Z|z' +{"_"} 0 3 +'A|a' 1 14 +'B|b' 3 3 +'C|c' 2 4 +'Ç|ç' 3 2 +'D|d' 2 5 +'E|e' 1 11 +'F|f' 4 2 +'G|g' 4 2 +'H|h' 4 2 +'I|i' 1 10 +'J|j' 5 2 +'L|l' 2 5 +'M|m' 1 6 +'N|n' 3 4 +'O|o' 1 10 +'P|p' 2 4 +'Q|q' 6 1 +'R|r' 1 6 +'S|s' 1 8 +'T|t' 1 5 +'U|u' 1 7 +'V|v' 4 2 +'X|x' 8 1 +'Z|z' 8 1 # should ignore all after the above diff --git a/xwords4/dawg/Romanian/info.txt b/xwords4/dawg/Romanian/info.txt index 6fbbf3055..279e1cfc5 100644 --- a/xwords4/dawg/Romanian/info.txt +++ b/xwords4/dawg/Romanian/info.txt @@ -18,6 +18,7 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. LANGCODE:ro +LANGNAME:Romanian # deal with DOS files LANGFILTER: tr -d '\r' @@ -42,29 +43,29 @@ XLOC_HEADER:0x9500 # Number Value Letter -2 0 {"_"} -11 1 'A|a' -2 9 'B|b' -5 1 'C|c' -4 2 'D|d' -9 1 'E|e' -2 8 'F|f' -2 9 'G|g' -1 10 'H|h' -10 1 'I|i' -1 10 'J|j' -4 1 'L|l' -3 4 'M|m' -6 1 'N|n' -5 1 'O|o' -4 2 'P|p' -7 1 'R|r' -5 1 'S|s' -7 1 'T|t' -6 1 'U|u' -2 8 'V|v' -1 10 'X|x' -1 10 'Z|z' +{"_"} 0 2 +'A|a' 1 11 +'B|b' 9 2 +'C|c' 1 5 +'D|d' 2 4 +'E|e' 1 9 +'F|f' 8 2 +'G|g' 9 2 +'H|h' 10 1 +'I|i' 1 10 +'J|j' 10 1 +'L|l' 1 4 +'M|m' 4 3 +'N|n' 1 6 +'O|o' 1 5 +'P|p' 2 4 +'R|r' 1 7 +'S|s' 1 5 +'T|t' 1 7 +'U|u' 1 6 +'V|v' 8 2 +'X|x' 10 1 +'Z|z' 10 1 # should ignore all after the above diff --git a/xwords4/dawg/Russian/info.txt b/xwords4/dawg/Russian/info.txt index 728210c4b..2c55c1041 100644 --- a/xwords4/dawg/Russian/info.txt +++ b/xwords4/dawg/Russian/info.txt @@ -16,7 +16,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:ru_RU +LANGCODE:ru +LANGNAME:Russian CHARSET:windows-1251 # deal with DOS files @@ -38,38 +39,38 @@ LANGINFO: contain letters not listed below are removed.

XLOC_HEADER:0x8F00 -8 1 'Ŕ' -2 3 'Á' -4 1 'Â' -2 3 'Ă' -2 2 'Ä' -7 1 'Ĺ' -1 4 'Ć' -1 3 'Ç' -7 1 'Č' -1 2 'É' -4 2 'Ę' -4 2 'Ë' -2 3 'Ě' -4 1 'Í' -9 1 'Î' -4 2 'Ď' -5 1 'Đ' -5 1 'Ń' -7 1 'Ň' -4 2 'Ó' -1 5 'Ô' -1 4 'Ő' -1 4 'Ö' -1 3 '×' -1 4 'Ř' -1 5 'Ů' -1 10 'Ú' -2 2 'Ű' -4 1 'Ü' -1 8 'Ý' -1 5 'Ţ' -2 2 'ß' -2 0 {"_"} +'Ŕ' 1 8 +'Á' 3 2 +'Â' 1 4 +'Ă' 3 2 +'Ä' 2 2 +'Ĺ' 1 7 +'Ć' 4 1 +'Ç' 3 1 +'Č' 1 7 +'É' 2 1 +'Ę' 2 4 +'Ë' 2 4 +'Ě' 3 2 +'Í' 1 4 +'Î' 1 9 +'Ď' 2 4 +'Đ' 1 5 +'Ń' 1 5 +'Ň' 1 7 +'Ó' 2 4 +'Ô' 5 1 +'Ő' 4 1 +'Ö' 4 1 +'×' 3 1 +'Ř' 4 1 +'Ů' 5 1 +'Ú' 10 1 +'Ű' 2 2 +'Ü' 1 4 +'Ý' 8 1 +'Ţ' 5 1 +'ß' 2 2 +{"_"} 0 2 # should ignore all after the above diff --git a/xwords4/dawg/Slovak/info.txt b/xwords4/dawg/Slovak/info.txt index 504898d40..730b6e7b2 100644 --- a/xwords4/dawg/Slovak/info.txt +++ b/xwords4/dawg/Slovak/info.txt @@ -16,7 +16,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:sk_SK +LANGCODE:sk +LANGNAME:Slovak CHARSET:utf-8 # deal with DOS files @@ -38,47 +39,47 @@ XLOC_HEADER:0x9300 #COUNT VAL FACE # converted from http://www.deskovehry.info/pravidla/scrabble/scrabble-sk.htm -9 1 'A' -1 4 'Á' -1 10 'Ä' -2 4 'B' -1 4 'C' -1 5 'Č' -3 2 'D' -1 8 'Ď' -8 1 'E' -1 7 'É' -1 8 'F' -1 8 'G' -1 4 'H' -5 1 'I' -1 5 'Í' -2 3 'J' -3 2 'K' -3 2 'L' -1 10 'Ĺ' -1 7 'Ľ' -4 2 'M' -5 1 'N' -1 8 'Ň' -9 1 'O' -1 8 'Ô' -1 10 'Ó' -3 2 'P' -4 1 'R' -1 10 'Ŕ' -4 1 'S' -1 5 'Š' -4 1 'T' -1 7 'Ť' -2 3 'U' -1 7 'Ú' -4 1 'V' -1 10 'X' -1 4 'Y' -1 5 'Ý' -1 4 'Z' -1 5 'Ž' +'A|a' 1 9 +'Á|á' 4 1 +'Ä|ä' 10 1 +'B|b' 4 2 +'C|c' 4 1 +'Č|č' 5 1 +'D|d' 2 3 +'Ď|ď' 8 1 +'E|e' 1 8 +'É|é' 7 1 +'F|f' 8 1 +'G|g' 8 1 +'H|h' 4 1 +'I|i' 1 5 +'Í|í' 5 1 +'J|j' 3 2 +'K|k' 2 3 +'L|l' 2 3 +'Ĺ|ĺ' 10 1 +'Ľ|ľ' 7 1 +'M|m' 2 4 +'N|n' 1 5 +'Ň|ň' 8 1 +'O|o' 1 9 +'Ô|ô' 8 1 +'Ó|ó' 10 1 +'P|p' 2 3 +'R|r' 1 4 +'Ŕ|ŕ' 10 1 +'S|s' 1 4 +'Š|š' 5 1 +'T|t' 1 4 +'Ť|ť' 7 1 +'U|u' 3 2 +'Ú|ú' 7 1 +'V|v' 1 4 +'X|x' 10 1 +'Y|y' 4 1 +'Ý|ý' 5 1 +'Z|z' 4 1 +'Ž|ž' 5 1 # the blank *must* be last here!!! -2 0 {"_"} +{"_"} 0 2 diff --git a/xwords4/dawg/Spanish/info.txt b/xwords4/dawg/Spanish/info.txt index 3a608a24d..c29c58069 100644 --- a/xwords4/dawg/Spanish/info.txt +++ b/xwords4/dawg/Spanish/info.txt @@ -19,9 +19,12 @@ # no way can unix sort handle the control chars I'm adding to text # below +LANGCODE:es +LANGNAME:Spanish NEEDSSORT:true CHARSET: utf-8 + # MSDos LF chars go bye-bye LANGFILTER: tr -d '\r' @@ -77,34 +80,34 @@ LANGCODE:es_ES XLOC_HEADER:0x8600 -2 0 {"_"} -12 1 'A|a' -2 3 'B|b' -4 3 'C|c' -1 5 {"CH|ch|Ch|cH",true,true} -5 2 'D|d' -12 1 'E|e' -1 4 'F|f' -2 2 'G|g' -2 4 'H|h' -6 1 'I|i' -1 8 'J|j' -4 1 'L|l' -1 8 {"LL|ll|Ll|lL", true, true} -2 3 'M|m' -5 1 'N|n' -1 8 'Ñ|ñ' -9 1 'O|o' -2 3 'P|p' -1 5 'Q|q' -5 1 'R|r' -1 8 {"RR|rr|Rr|rR",true,true} -6 1 'S|s' -4 1 'T|t' -5 1 'U|u' -1 4 'V|v' -1 8 'X|x' -1 4 'Y|y' -1 10 'Z|z' +{"_"} 0 2 +'A|a' 1 12 +'B|b' 3 2 +'C|c' 3 4 +{"CH|ch|Ch|cH",true,true} 5 1 +'D|d' 2 5 +'E|e' 1 12 +'F|f' 4 1 +'G|g' 2 2 +'H|h' 4 2 +'I|i' 1 6 +'J|j' 8 1 +'L|l' 1 4 +{"LL|ll|Ll|lL",true,true} 8 1 +'M|m' 3 2 +'N|n' 1 5 +'Ñ|ñ' 8 1 +'O|o' 1 9 +'P|p' 3 2 +'Q|q' 5 1 +'R|r' 1 5 +{"RR|rr|Rr|rR",true,true} 8 1 +'S|s' 1 6 +'T|t' 1 4 +'U|u' 1 5 +'V|v' 4 1 +'X|x' 8 1 +'Y|y' 4 1 +'Z|z' 10 1 # should ignore all after the above diff --git a/xwords4/dawg/Swedish/Makefile b/xwords4/dawg/Swedish/Makefile index 67fd40af4..c13ce2ad7 100644 --- a/xwords4/dawg/Swedish/Makefile +++ b/xwords4/dawg/Swedish/Makefile @@ -26,13 +26,14 @@ TARGET_TYPE ?= WINCE include ../Makefile.langcommon -SOURCEDICT ?= $(XWDICTPATH)/Swedish/swedish15.utf8.gz +SOURCEDICT ?= $(XWDICTPATH)/Swedish/swedish15.txt # Q and W are not available as tiles, but I'm told there's a custom in # Swedish play of allowing blanks to stand for those letters as well. # So we don't exclude words with those letters from the dictionary. $(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile - zcat $< | sed 's/[[:lower:]]*/\U&/' | \ + cat $< | iconv -f ISO-8859-1 -t $(ENC) | \ + sed 's/[[:lower:]]*/\U&/' | \ LANG=$(LANGCODE):$(ENC) grep '^[A-ZÄÅÆÖÜ]\{2,15\}$$' | \ gzip -c > $@ diff --git a/xwords4/dawg/Swedish/info.txt b/xwords4/dawg/Swedish/info.txt index 360d46af7..4a331102e 100644 --- a/xwords4/dawg/Swedish/info.txt +++ b/xwords4/dawg/Swedish/info.txt @@ -16,7 +16,8 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. CHARSET: utf-8 -LANGCODE:sv_SE +LANGCODE:sv +LANGNAME:Swedish LANGFILTER: tr -d '\r' # uppercase all @@ -33,45 +34,44 @@ LANGINFO: and W, and adds Ä, Å, Æ, Ö and Ü.

XLOC_HEADER:0x8700 - -2 0 {"_"} -8 1 'A|a' +{"_"} 0 2 +'A|a' 1 8 # A with two dots -2 3 'Ä|ä' +'Ä|ä' 3 2 # A with circle -2 4 'Å|å' +'Å|å' 4 2 # Æ tile only available for blanks -0 1 'Æ|æ' -2 4 'B|b' -1 8 'C|c' -5 1 'D|d' -7 1 'E|e' -2 3 'F|f' -3 2 'G|g' -2 2 'H|h' -5 1 'I|i' -1 7 'J|j' -3 2 'K|k' -5 1 'L|l' -3 2 'M|m' -6 1 'N|n' -5 2 'O|o' +'Æ|æ' 1 0 +'B|b' 4 2 +'C|c' 8 1 +'D|d' 1 5 +'E|e' 1 7 +'F|f' 3 2 +'G|g' 2 3 +'H|h' 2 2 +'I|i' 1 5 +'J|j' 7 1 +'K|k' 2 3 +'L|l' 1 5 +'M|m' 2 3 +'N|n' 1 6 +'O|o' 2 5 # O with two dots -2 4 'Ö|ö' -2 4 'P|p' +'Ö|ö' 4 2 +'P|p' 4 2 # Q tile only available for blanks -0 1 'Q|q' -8 1 'R|r' -8 1 'S|s' -8 1 'T|t' -3 4 'U|u' +'Q|q' 1 0 +'R|r' 1 8 +'S|s' 1 8 +'T|t' 1 8 +'U|u' 4 3 # Ü tile only available for blanks -0 1 'Ü|ü' -2 3 'V|v' +'Ü|ü' 1 0 +'V|v' 3 2 # W tile only available for blanks -0 1 'W|w' -1 8 'X|x' -1 7 'Y|y' -1 10 'Z|z' +'W|w' 1 0 +'X|x' 8 1 +'Y|y' 7 1 +'Z|z' 10 1 diff --git a/xwords4/dawg/Turkish/info.txt b/xwords4/dawg/Turkish/info.txt index 7b242788d..32f2154bf 100644 --- a/xwords4/dawg/Turkish/info.txt +++ b/xwords4/dawg/Turkish/info.txt @@ -14,7 +14,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:tr_TR +LANGCODE:tr +LANGNAME:Turkish CHARSET: utf-8 LANGINFO:

Turkish is untested with actual wordlists!

@@ -22,34 +23,34 @@ LANGINFO:

Turkish is untested with actual wordlists!

XLOC_HEADER:0x8400 -2 0 {"_"} -12 1 'A' -2 3 'B' -2 4 'C' -2 4 'Ç' -2 3 'D' -8 1 'E' -1 7 'F' -1 5 'G' -1 8 'Ğ' -1 5 'H' -4 2 'I' -7 1 'İ' -1 10 'J' -7 1 'K' -7 1 'L' -4 2 'M' -5 1 'N' -3 2 'O' -1 7 'Ö' -1 5 'P' -6 1 'R' -3 2 'S' -2 4 'Ş' -5 1 'T' -3 2 'U' -2 3 'Ü' -1 7 'V' -2 3 'Y' -2 4 'Z' +{"_"} 0 2 +'A' 1 12 +'B' 3 2 +'C' 4 2 +'Ç' 4 2 +'D' 3 2 +'E' 1 8 +'F' 7 1 +'G' 5 1 +'Ğ' 8 1 +'H' 5 1 +'I' 2 4 +'İ' 1 7 +'J' 10 1 +'K' 1 7 +'L' 1 7 +'M' 2 4 +'N' 1 5 +'O' 2 3 +'Ö' 7 1 +'P' 5 1 +'R' 1 6 +'S' 2 3 +'Ş' 4 2 +'T' 1 5 +'U' 2 3 +'Ü' 3 2 +'V' 7 1 +'Y' 3 2 +'Z' 4 2 diff --git a/xwords4/dawg/dawg2dict.py b/xwords4/dawg/dawg2dict.py index 3c325f188..2e875335e 100755 --- a/xwords4/dawg/dawg2dict.py +++ b/xwords4/dawg/dawg2dict.py @@ -54,12 +54,18 @@ def splitFaces( buf ): return faces -def loadCountsAndValues( fh, numFaces, data ): - twoBytesFmt = struct.Struct('BB') +def loadCountsAndValues( fh, numFaces, extraData, data ): for ii in range(numFaces): - pair = twoBytesFmt.unpack(fh.read(twoBytesFmt.size)) - data[ii]['count'] = int(pair[0]) - data[ii]['val'] = int(pair[1]) + data[ii]['counts'] = {15: int.from_bytes(fh.read(1), 'little')} + data[ii]['val'] = int.from_bytes(fh.read(1), 'little') + if extraData: + buf = io.BytesIO(extraData) + while True: + siz = int.from_bytes(buf.read(1), 'little') + if not siz: break + for ii in range(numFaces): + count = int.from_bytes(buf.read(1), 'little') + data[ii]['counts'][siz] = count def eatBitmap( fh ): nCols = int(oneByteFmt.unpack(fh.read(oneByteFmt.size))[0]) @@ -142,6 +148,7 @@ def process(args): with open(args.DAWG, "rb") as dawg: nWords = 0 + extraData = None headerFmt = struct.Struct('!HH') (flags, headerLen) = headerFmt.unpack(dawg.read(headerFmt.size)) @@ -162,7 +169,22 @@ def process(args): sys.exit(0) md5Sum = getNullTermParam(header) print( 'header: read sum: {}'.format(md5Sum), file=sys.stderr ) - except: + + # skip header flags + header.read(2) + print( 'header: skipped flags', file=sys.stderr) + + langCode = getNullTermParam(header) + langName = getNullTermParam(header) + print('header: langCode: {}; langName: {}'.format(langCode, langName), + file=sys.stderr) + + extraSize = int.from_bytes(header.read(1), 'little') + print( 'header: extraSize: {}'.format(extraSize), file=sys.stderr ) + extraData = header.read(extraSize) + + except Exception as ex: + print( 'header: exception!! {} '.format(ex) ) md5Sum = None if args.GET_SUM: @@ -214,7 +236,7 @@ def process(args): langCode = 0x7F & oneByteFmt.unpack(dawg.read(oneByteFmt.size))[0] dawg.read( oneByteFmt.size ) # skip byte - loadCountsAndValues( dawg, numFaces, data ) + loadCountsAndValues( dawg, numFaces, extraData, data ) loadSpecialData( dawg, data ) offsetStruct = struct.Struct('!L') diff --git a/xwords4/dawg/xloc.py b/xwords4/dawg/xloc.py index c329ebd34..ec895ffa8 100755 --- a/xwords4/dawg/xloc.py +++ b/xwords4/dawg/xloc.py @@ -8,7 +8,12 @@ def errorOut(msg): def mkParser(): parser = argparse.ArgumentParser() parser.add_argument('-enc', dest = 'ENCODING', type = str, help = 'use this encoding' ) - parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true', help = 'output table file' ) + parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true', + help = 'output table file' ) + parser.add_argument('-oc', dest = 'DO_OTHERCOUNTS', action = 'store_true', + help = 'write extra (non-15x15 board) counts' ) + parser.add_argument('-info', dest = 'INFO_KEY', type = str, + help = 'info.txt keyword to write null-terminated' ) # parser.add_argument('-tn', dest = 'UNICODE', default = False, # action = 'store_true', help = 'assume unicode') @@ -53,8 +58,10 @@ def parseTileInfo(infoFile, encoding): if sEndTiles.match(line): break else: - (count, val, face) = line.split(None, 2) - result['_TILES'].append((count, val, face)) + (face, val, counts) = line.split(None, 2) + result['_TILES'].append({'counts': counts, + 'val': val, + 'face': face}) elif sBeginTiles.match(line): inTiles = True else: @@ -71,12 +78,18 @@ def printLetters( letters, outfile ): letters = ' '.join(letters) outfile.write(letters.encode('utf8')) +def writeInfoFile(xlocToken, key, outfile): + val = xlocToken[key] + assert val + outfile.write(val.encode('utf8')) + outfile.write(struct.pack('B', 0 )) + def writeMapFile(xlocToken, outfile): - print('writeMapFile()') + print('writeMapFile(out={})'.format(outfile)) tiles = xlocToken['_TILES'] specialCount = 0 for tile in tiles: - face = tile[2] + face = tile['face'] match = sSingleCharMatch.match(face) if match: printLetters( match.group(1), outfile ) @@ -94,13 +107,39 @@ def writeMapFile(xlocToken, outfile): def writeValuesFile(xlocToken, outfile): header = xlocToken.get('XLOC_HEADER') or errorOut('no XLOC_HEADER found') - print('writing header: {}'.format(header)) + print('writeValuesFile(out={}): writing header: {}'.format(outfile, header)) outfile.write(struct.pack('!H', int(header, 16))) + cs = xlocToken.get('COUNT_SIZES', '15').split() + useOffset = cs.index('15') + + nCounts = 0 for tile in xlocToken['_TILES']: - val = int(tile[0]) - count = int(tile[1]) - outfile.write(struct.pack('BB', val, count)) + counts = tile['counts'].split() + assert nCounts == 0 or nCounts == len(counts) + nCounts = len(counts) + assert nCounts == len(cs) + outfile.write(struct.pack('B', int(counts[useOffset]))) + + val = int(tile['val']) + outfile.write(struct.pack('B', val)) + +def writeOtherCounts(xlocToken, outfile): + cs = xlocToken.get('COUNT_SIZES', '15').split() + + tiles = xlocToken['_TILES'] + # Write the size of the data so it can be skipped by the reader, + # which won't know how many faces the tile set has yet. + totalSiz = (len(cs) - 1) * (1 + len(tiles)) + outfile.write(struct.pack('B', totalSiz)) + + for useOffset in range(len(cs)): + siz = int(cs[useOffset]) + if siz == 15: continue + outfile.write(struct.pack('B', siz)) + for tile in tiles: + count = tile['counts'].split()[useOffset] + outfile.write(struct.pack('B', int(count))) def main(): print('{}.main {} called'.format(sys.argv[0], sys.argv[1:])) @@ -111,6 +150,10 @@ def main(): errorOut('{} not found'.format(infoFile)) xlocToken = parseTileInfo(infoFile, args.ENCODING) + if args.INFO_KEY and args.OUTFILE: + with open(args.OUTFILE, 'wb') as outfile: + writeInfoFile(xlocToken, args.INFO_KEY, outfile); + if args.DO_TABLE or args.TABLE_FILE: path = args.TABLE_FILE or args.OUTFILE with open(path, 'wb') as outfile: @@ -127,6 +170,10 @@ def main(): with open(path, 'wb') as outfile: writeValuesFile( xlocToken, outfile ) + if args.DO_OTHERCOUNTS and args.OUTFILE: + with open(args.OUTFILE, 'wb') as outfile: + writeOtherCounts(xlocToken, outfile) + ############################################################################## if __name__ == '__main__': main()