From e89feb62d8e08c0fbd5cf61a77cdc6166ae3329f Mon Sep 17 00:00:00 2001 From: Andy2 Date: Tue, 30 Nov 2010 18:38:05 -0800 Subject: [PATCH] second part of manual merge of unicode_branch's dawg/ directory into this one. This adds the directories and their files created inside dawg. --- xwords4/dawg/Czech/Makefile | 43 ++++++++++++++++++ xwords4/dawg/Czech/info.txt | 84 ++++++++++++++++++++++++++++++++++++ xwords4/dawg/Greek/Makefile | 44 +++++++++++++++++++ xwords4/dawg/Greek/info.txt | 60 ++++++++++++++++++++++++++ xwords4/dawg/Hëx/Makefile | 45 +++++++++++++++++++ xwords4/dawg/Hëx/info.txt | 61 ++++++++++++++++++++++++++ xwords4/dawg/Slovak/Makefile | 43 ++++++++++++++++++ xwords4/dawg/Slovak/info.txt | 84 ++++++++++++++++++++++++++++++++++++ 8 files changed, 464 insertions(+) create mode 100644 xwords4/dawg/Czech/Makefile create mode 100644 xwords4/dawg/Czech/info.txt create mode 100644 xwords4/dawg/Greek/Makefile create mode 100644 xwords4/dawg/Greek/info.txt create mode 100644 xwords4/dawg/Hëx/Makefile create mode 100644 xwords4/dawg/Hëx/info.txt create mode 100644 xwords4/dawg/Slovak/Makefile create mode 100644 xwords4/dawg/Slovak/info.txt diff --git a/xwords4/dawg/Czech/Makefile b/xwords4/dawg/Czech/Makefile new file mode 100644 index 000000000..3f9502292 --- /dev/null +++ b/xwords4/dawg/Czech/Makefile @@ -0,0 +1,43 @@ +# -*-mode: Makefile; coding: utf-8; -*- +# Copyright 2002-2008 by Eric House (xwords@eehouse.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +XWLANG=Czech +LANGCODE=cs_CZ +ENC = UTF-8 + +TARGET_TYPE ?= WINCE + +include ../Makefile.2to8 + +include ../Makefile.langcommon + +SOURCEDICT ?= $(XWDICTPATH)/Czech/Czech.2-1-6.dict.gz + +$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile + zcat $< | tr -d '\r' | \ + tr [aábcčdďeéěfghiíjklmnňoóprřsštťuúůvxyýzž] [AÁBCČDĎEÉĚFGHIÍJKLMNŇOÓPRŘSŠTŤUÚŮVXYÝZŽ] | \ + grep '^[AÁBCČDĎEÉĚFGHIÍJKLMNŇOÓPRŘSŠTŤUÚŮVXYÝZŽ]*$$' | \ + gzip -c > $@ + +# Everything but creating of the Main.dict file is inherited from the +# "parent" Makefile.langcommon in the parent directory. + +clean: clean_common + rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb + +help: + @echo 'make [SOURCEDICT=$(XWDICTPATH)/$(XWLANG)/czech2_5.dict.gz]' diff --git a/xwords4/dawg/Czech/info.txt b/xwords4/dawg/Czech/info.txt new file mode 100644 index 000000000..337164364 --- /dev/null +++ b/xwords4/dawg/Czech/info.txt @@ -0,0 +1,84 @@ +# -*- coding: utf-8; mode: conf; -*- +# Copyright 2002-2008 by Eric House (xwords@eehouse.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +LANGCODE:cs_CZ +CHARSET:utf-8 + +# deal with DOS files +LANGFILTER: tr -d '\r' +# tr seems to work on systems that don't know the Czech locale, but +# grep does not. So don't use grep, e.g. to eliminate words +# containing letters not in our alphabet. Instead, pass the -r flag +# via D2DARGS so they're dropped. +LANGFILTER: | tr [aábcčdďeéěfghiíjklmnňoóprřsštťuúůvxyýzž] [AÁBCČDĎEÉĚFGHIÍJKLMNŇOÓPRŘSŠTŤUÚŮVXYÝZŽ] +LANGFILTER: | sort -u + +# presence of high-ascii means we must not pass -nosort +D2DARGS: -r -term 0 -enc UTF-8 +#D2DARGS: -term 10 -r -enc UTF-8 + +LANGINFO:

This BYOD language works on Czech wordlists encoded in +LANGINFO: UTF-8 and produces dictionaries that should work on +LANGINFO: Unicode-aware systems.

+ +# High bit means "official". Next 7 bits are an enum where +# Czech-ISO8859-2==0x11. Low byte is padding. +XLOC_HEADER:0x9100 + +#COUNT VAL FACE + +2 0 {"_"} +5 1 'A' +2 2 'Á' +2 3 'B' +3 2 'C' +1 4 'Č' +3 1 'D' +1 8 'Ď' +5 1 'E' +2 3 'É' +2 3 'Ě' +1 5 'F' +1 5 'G' +3 2 'H' +4 1 'I' +3 2 'Í' +2 2 'J' +3 1 'K' +3 1 'L' +3 2 'M' +5 1 'N' +1 6 'Ň' +6 1 'O' +1 7 'Ó' +3 1 'P' +3 1 'R' +2 4 'Ř' +4 1 'S' +2 4 'Š' +4 1 'T' +1 7 'Ť' +3 2 'U' +1 5 'Ú' +1 4 'Ů' +4 1 'V' +1 10 'X' +2 2 'Y' +2 4 'Ý' +2 2 'Z' +1 4 'Ž' + diff --git a/xwords4/dawg/Greek/Makefile b/xwords4/dawg/Greek/Makefile new file mode 100644 index 000000000..ed542fce0 --- /dev/null +++ b/xwords4/dawg/Greek/Makefile @@ -0,0 +1,44 @@ +# -*- mode: Makefile; -*- +# Copyright 2002 - 2009 by Eric House (xwords@eehouse.org). All +# rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +XWLANG = Greek +LANGCODE = el_GK +ENC = UTF-8 + +# DICT2DAWGARGS = -lang $(LANGCODE) +# DICT2DAWGARGS = -debug + +TARGET_TYPE ?= WINCE + +include ../Makefile.2to8 + +include ../Makefile.langcommon + +SOURCEDICT ?= $(XWDICTPATH)/Greek/greek.dict + +$(XWLANG)Main.dict.gz: $(SOURCEDICT) + cat $< | tr -d '\r' \ + | tr 'αβγδεζηθικλμνξοπρστυφχψω' 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ' \ + | grep '^[ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ]*$$' \ + | gzip > $@ + +# Everything but creating of the Main.dict file is inherited from the +# "parent" Makefile.langcommon in the parent directory. + +clean: clean_common + rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb diff --git a/xwords4/dawg/Greek/info.txt b/xwords4/dawg/Greek/info.txt new file mode 100644 index 000000000..3cc048bd9 --- /dev/null +++ b/xwords4/dawg/Greek/info.txt @@ -0,0 +1,60 @@ +# -*- mode: conf; coding: utf-8; -*- +# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights +# reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +LANGCODE:el_GK +CHARSET: utf-8 + +LANGFILTER: tr -d '\r' +LANGFILTER: | tr 'αβγδεζηθικλμνξοπρστυφχψω' 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ' +LANGFILTER: | grep '^[ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ]*$' + +LANGINFO:

Greek. Uploaded wordlist must be in utf-8 format. +LANGINFO:

+ +D2DARGS: -r -term 0 -enc UTF-8 + +XLOC_HEADER:0x9200 + +# This alphabet's likely correct but counts and values aren't. + +2 0 {"_"} +1 1 'Α' +1 1 'Β' +1 1 'Γ' +1 1 'Δ' +1 1 'Ε' +1 1 'Ζ' +1 1 'Η' +1 1 'Θ' +1 1 'Ι' +1 1 'Κ' +1 1 'Λ' +1 1 'Μ' +1 1 'Ν' +1 1 'Ξ' +1 1 'Ο' +1 1 'Π' +1 1 'Ρ' +1 1 'Σ' +1 1 'Τ' +1 1 'Υ' +1 1 'Φ' +1 1 'Χ' +1 1 'Ψ' +1 1 'Ω' + diff --git a/xwords4/dawg/Hëx/Makefile b/xwords4/dawg/Hëx/Makefile new file mode 100644 index 000000000..0f60e619b --- /dev/null +++ b/xwords4/dawg/Hëx/Makefile @@ -0,0 +1,45 @@ +# -*- mode: makefile; -*- +# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights +# reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +XWLANG = Hëx +LANGCODE = hex +ENC = UTF-8 + +TARGET_TYPE = WINCE + +include ../Makefile.2to8 + +include ../Makefile.langcommon + +# Pass in your own dict here by setting DICT +DICT ?= $(XWDICTPATH)/English/CSW.dict.gz + +# tr 'AE' 'ÄË' doesn't work, so use sed. +$(XWLANG)Main.dict.gz: $(DICT) + @echo "building $@ from $<" + zcat $< | tr [a-f] [A-F] | grep -e '^[A-F]\{2,8\}$$' | \ + echo CAFEBABE DEADBEEF $$(cat -) | \ + sed 's/A/Ä/g' | \ + sed 's/E/Ë/g' | \ + tr ' ' '\n' | sort | gzip > $@ + +# Everything but creating of the Main.dict file is inherited from the +# "parent" Makefile.langcommon in the parent directory. + +clean: clean_common + rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb diff --git a/xwords4/dawg/Hëx/info.txt b/xwords4/dawg/Hëx/info.txt new file mode 100644 index 000000000..526041cf1 --- /dev/null +++ b/xwords4/dawg/Hëx/info.txt @@ -0,0 +1,61 @@ +# -*- mode: conf; -*- +# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights +# reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +LANGCODE:HEX + +# uppercase all +LANGFILTER: tr [a-f] [A-F] +LANGFILTER: | grep '^[A-F]*$' +LANGFILTER: | sed 's/A/Ä/' +LANGFILTER: | sed 's/E/Ë/' +LANGFILTER: | sort -u + +D2DARGS: -term 10 + +LANGINFO:

The hex "language" is something of a programmers' joke. +LANGINFO: Hex is short for hexadecimal, a 16-base number system whose +LANGINFO: "digits" are the numerals 0-9 plus the letters A-F. Hex +LANGINFO: letters are often used to represent computer data, and +LANGINFO: certain sequences are sometimes used as markers because +LANGINFO: they're easy to pick out in large dumps of otherwise +LANGINFO: meaningless (to humans) garbage. In staring at Mac memory +LANGINFO: dumps, for example, you'd occasionally see the letters +LANGINFO: DEADBEEF and know that memory in that area was probably +LANGINFO: undamaged.

+ +LANGINFO:

I use Hex dictionaries for testing since they have few +LANGINFO: tiles and games play quickly. That's also why the Hex +LANGINFO: tile set has four blanks; that's the largest number +LANGINFO: Crosswords supports and I needed to test at the limit.

+ +# High bit means "official". Next 7 bits are an enum where Hex==127 +# (I just made that up; not sure what it was originally.) Low byte is +# padding +XLOC_HEADER:0xFF00 + + + +4 0 {"_"} +9 1 'Ä' +2 3 'B' +2 3 'C' +4 2 'D' +12 1 'Ë' +2 4 'F' + +# should ignore all after the above diff --git a/xwords4/dawg/Slovak/Makefile b/xwords4/dawg/Slovak/Makefile new file mode 100644 index 000000000..e3ec376e7 --- /dev/null +++ b/xwords4/dawg/Slovak/Makefile @@ -0,0 +1,43 @@ +# -*-mode: Makefile; coding: utf-8; -*- +# Copyright 2002-2010 by Eric House (xwords@eehouse.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +XWLANG=Slovak +LANGCODE=sk_SK +ENC = UTF-8 + +TARGET_TYPE ?= WINCE + +include ../Makefile.2to8 + +include ../Makefile.langcommon + +SOURCEDICT ?= $(XWDICTPATH)/Slovak/slovnik_scrabble.dict.gz + +$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile + zcat $< | tr -d '\r' | \ + tr [aáäbcčdďeéfghiíjklĺľmnňoôóprŕsštťuúvxyýzž] [AÁÄBCČDĎEÉFGHIÍJKLĹĽMNŇOÔÓPRŔSŠTŤUÚVXYÝZŽ] | \ + grep '^[AÁÄBCČDĎEÉFGHIÍJKLĹĽMNŇOÔÓPRŔSŠTŤUÚVXYÝZŽ]*$$' | \ + gzip -c > $@ + +# Everything but creating of the Main.dict file is inherited from the +# "parent" Makefile.langcommon in the parent directory. + +clean: clean_common + rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb + +help: + @echo 'make [SOURCEDICT=$(XWDICTPATH)/$(XWLANG)/czech2_5.dict.gz]' diff --git a/xwords4/dawg/Slovak/info.txt b/xwords4/dawg/Slovak/info.txt new file mode 100644 index 000000000..feac4c248 --- /dev/null +++ b/xwords4/dawg/Slovak/info.txt @@ -0,0 +1,84 @@ +# -*- coding: utf-8; mode: conf; -*- +# Copyright 2002-2010 by Eric House (xwords@eehouse.org). All rights +# reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +LANGCODE:sk_SK +CHARSET:utf-8 + +# deal with DOS files +LANGFILTER: tr -d '\r' + +LANGFILTER: | tr [aáäbcčdďeéfghiíjklĺľmnňoôóprŕsštťuúvxyýzž] [AÁÄBCČDĎEÉFGHIÍJKLĹĽMNŇOÔÓPRŔSŠTŤUÚVXYÝZŽ] +LANGFILTER: | sort -u + +D2DARGS: -r -term 0 -enc UTF-8 + +LANGINFO:

This BYOD language works on Slovak wordlists encoded in +LANGINFO: UTF-8 and produces dictionaries that should work on +LANGINFO: Unicode-aware versions of Crosswords.

+ +# High bit means "official". Next 7 bits are an enum where +# Slovak==0x31. Low byte is padding. +XLOC_HEADER:0x9300 + +#COUNT VAL FACE +# converted from http://www.deskovehry.info/pravidla/scrabble/scrabble-sk.htm + +9 1 'A' +1 4 'Á' +1 10 'Ä' +2 4 'B' +1 4 'C' +1 5 'Č' +1 8 'D' +1 8 'Ď' +8 1 'E' +1 7 'É' +1 8 'F' +1 8 'G' +1 4 'H' +5 1 'I' +1 5 'Í' +2 3 'J' +3 2 'K' +3 2 'L' +1 10 'Ĺ' +1 7 'Ľ' +4 2 'M' +5 1 'N' +1 8 'Ň' +9 1 'O' +1 8 'Ô' +1 10 'Ó' +3 2 'P' +4 1 'R' +1 10 'Ŕ' +4 1 'S' +1 5 'Š' +4 1 'T' +1 7 'Ť' +2 3 'U' +1 7 'Ú' +4 1 'V' +1 10 'X' +1 4 'Y' +1 5 'Ý' +1 4 'Z' +1 5 'Ž' +# the blank *must* be last here!!! +2 0 {"_"} +