second part of manual merge of unicode_branch's dawg/ directory into

this one.  This adds the directories and their files created inside
dawg.
This commit is contained in:
Andy2 2010-11-30 18:38:05 -08:00
parent 79990bc7b1
commit e89feb62d8
8 changed files with 464 additions and 0 deletions

View file

@ -0,0 +1,43 @@
# -*-mode: Makefile; coding: utf-8; -*-
# Copyright 2002-2008 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG=Czech
LANGCODE=cs_CZ
ENC = UTF-8
TARGET_TYPE ?= WINCE
include ../Makefile.2to8
include ../Makefile.langcommon
SOURCEDICT ?= $(XWDICTPATH)/Czech/Czech.2-1-6.dict.gz
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
zcat $< | tr -d '\r' | \
tr [aábcčdďeéěfghiíjklmnňoóprřsštťuúůvxyýzž] [AÁBCČDĎEÉĚFGHIÍJKLMNŇOÓPRŘSŠTŤUÚŮVXYÝZŽ] | \
grep '^[AÁBCČDĎEÉĚFGHIÍJKLMNŇOÓPRŘSŠTŤUÚŮVXYÝZŽ]*$$' | \
gzip -c > $@
# Everything but creating of the Main.dict file is inherited from the
# "parent" Makefile.langcommon in the parent directory.
clean: clean_common
rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb
help:
@echo 'make [SOURCEDICT=$(XWDICTPATH)/$(XWLANG)/czech2_5.dict.gz]'

View file

@ -0,0 +1,84 @@
# -*- coding: utf-8; mode: conf; -*-
# Copyright 2002-2008 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
LANGCODE:cs_CZ
CHARSET:utf-8
# deal with DOS files
LANGFILTER: tr -d '\r'
# tr seems to work on systems that don't know the Czech locale, but
# grep does not. So don't use grep, e.g. to eliminate words
# containing letters not in our alphabet. Instead, pass the -r flag
# via D2DARGS so they're dropped.
LANGFILTER: | tr [aábcčdďeéěfghiíjklmnňoóprřsštťuúůvxyýzž] [AÁBCČDĎEÉĚFGHIÍJKLMNŇOÓPRŘSŠTŤUÚŮVXYÝZŽ]
LANGFILTER: | sort -u
# presence of high-ascii means we must not pass -nosort
D2DARGS: -r -term 0 -enc UTF-8
#D2DARGS: -term 10 -r -enc UTF-8
LANGINFO: <p>This BYOD language works on Czech wordlists encoded in
LANGINFO: UTF-8 and produces dictionaries that should work on
LANGINFO: Unicode-aware systems. </p>
# High bit means "official". Next 7 bits are an enum where
# Czech-ISO8859-2==0x11. Low byte is padding.
XLOC_HEADER:0x9100
#COUNT VAL FACE
<BEGIN_TILES>
2 0 {"_"}
5 1 'A'
2 2 'Á'
2 3 'B'
3 2 'C'
1 4 'Č'
3 1 'D'
1 8 'Ď'
5 1 'E'
2 3 'É'
2 3 'Ě'
1 5 'F'
1 5 'G'
3 2 'H'
4 1 'I'
3 2 'Í'
2 2 'J'
3 1 'K'
3 1 'L'
3 2 'M'
5 1 'N'
1 6 'Ň'
6 1 'O'
1 7 'Ó'
3 1 'P'
3 1 'R'
2 4 'Ř'
4 1 'S'
2 4 'Š'
4 1 'T'
1 7 'Ť'
3 2 'U'
1 5 'Ú'
1 4 'Ů'
4 1 'V'
1 10 'X'
2 2 'Y'
2 4 'Ý'
2 2 'Z'
1 4 'Ž'
<END_TILES>

View file

@ -0,0 +1,44 @@
# -*- mode: Makefile; -*-
# Copyright 2002 - 2009 by Eric House (xwords@eehouse.org). All
# rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG = Greek
LANGCODE = el_GK
ENC = UTF-8
# DICT2DAWGARGS = -lang $(LANGCODE)
# DICT2DAWGARGS = -debug
TARGET_TYPE ?= WINCE
include ../Makefile.2to8
include ../Makefile.langcommon
SOURCEDICT ?= $(XWDICTPATH)/Greek/greek.dict
$(XWLANG)Main.dict.gz: $(SOURCEDICT)
cat $< | tr -d '\r' \
| tr 'αβγδεζηθικλμνξοπρστυφχψω' 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ' \
| grep '^[ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ]*$$' \
| gzip > $@
# Everything but creating of the Main.dict file is inherited from the
# "parent" Makefile.langcommon in the parent directory.
clean: clean_common
rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb

View file

@ -0,0 +1,60 @@
# -*- mode: conf; coding: utf-8; -*-
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
# reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
LANGCODE:el_GK
CHARSET: utf-8
LANGFILTER: tr -d '\r'
LANGFILTER: | tr 'αβγδεζηθικλμνξοπρστυφχψω' 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ'
LANGFILTER: | grep '^[ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ]*$'
LANGINFO: <p>Greek. Uploaded wordlist must be in utf-8 format.
LANGINFO: </p>
D2DARGS: -r -term 0 -enc UTF-8
XLOC_HEADER:0x9200
# This alphabet's likely correct but counts and values aren't.
<BEGIN_TILES>
2 0 {"_"}
1 1 'Α'
1 1 'Β'
1 1 'Γ'
1 1 'Δ'
1 1 'Ε'
1 1 'Ζ'
1 1 'Η'
1 1 'Θ'
1 1 'Ι'
1 1 'Κ'
1 1 'Λ'
1 1 'Μ'
1 1 'Ν'
1 1 'Ξ'
1 1 'Ο'
1 1 'Π'
1 1 'Ρ'
1 1 'Σ'
1 1 'Τ'
1 1 'Υ'
1 1 'Φ'
1 1 'Χ'
1 1 'Ψ'
1 1 'Ω'
<END_TILES>

View file

@ -0,0 +1,45 @@
# -*- mode: makefile; -*-
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
# reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG = Hëx
LANGCODE = hex
ENC = UTF-8
TARGET_TYPE = WINCE
include ../Makefile.2to8
include ../Makefile.langcommon
# Pass in your own dict here by setting DICT
DICT ?= $(XWDICTPATH)/English/CSW.dict.gz
# tr 'AE' 'ÄË' doesn't work, so use sed.
$(XWLANG)Main.dict.gz: $(DICT)
@echo "building $@ from $<"
zcat $< | tr [a-f] [A-F] | grep -e '^[A-F]\{2,8\}$$' | \
echo CAFEBABE DEADBEEF $$(cat -) | \
sed 's/A/Ä/g' | \
sed 's/E/Ë/g' | \
tr ' ' '\n' | sort | gzip > $@
# Everything but creating of the Main.dict file is inherited from the
# "parent" Makefile.langcommon in the parent directory.
clean: clean_common
rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb

View file

@ -0,0 +1,61 @@
# -*- mode: conf; -*-
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
# reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
LANGCODE:HEX
# uppercase all
LANGFILTER: tr [a-f] [A-F]
LANGFILTER: | grep '^[A-F]*$'
LANGFILTER: | sed 's/A/Ä/'
LANGFILTER: | sed 's/E/Ë/'
LANGFILTER: | sort -u
D2DARGS: -term 10
LANGINFO: <p>The hex "language" is something of a programmers' joke.
LANGINFO: Hex is short for hexadecimal, a 16-base number system whose
LANGINFO: "digits" are the numerals 0-9 plus the letters A-F. Hex
LANGINFO: letters are often used to represent computer data, and
LANGINFO: certain sequences are sometimes used as markers because
LANGINFO: they're easy to pick out in large dumps of otherwise
LANGINFO: meaningless (to humans) garbage. In staring at Mac memory
LANGINFO: dumps, for example, you'd occasionally see the letters
LANGINFO: DEADBEEF and know that memory in that area was probably
LANGINFO: undamaged.</p>
LANGINFO: <p>I use Hex dictionaries for testing since they have few
LANGINFO: tiles and games play quickly. That's also why the Hex
LANGINFO: tile set has four blanks; that's the largest number
LANGINFO: Crosswords supports and I needed to test at the limit.</p>
# High bit means "official". Next 7 bits are an enum where Hex==127
# (I just made that up; not sure what it was originally.) Low byte is
# padding
XLOC_HEADER:0xFF00
<BEGIN_TILES>
4 0 {"_"}
9 1 'Ä'
2 3 'B'
2 3 'C'
4 2 'D'
12 1 'Ë'
2 4 'F'
<END_TILES>
# should ignore all after the <END_TILES> above

View file

@ -0,0 +1,43 @@
# -*-mode: Makefile; coding: utf-8; -*-
# Copyright 2002-2010 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG=Slovak
LANGCODE=sk_SK
ENC = UTF-8
TARGET_TYPE ?= WINCE
include ../Makefile.2to8
include ../Makefile.langcommon
SOURCEDICT ?= $(XWDICTPATH)/Slovak/slovnik_scrabble.dict.gz
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
zcat $< | tr -d '\r' | \
tr [aáäbcčdďeéfghiíjklĺľmnňoôóprŕsštťuúvxyýzž] [AÁÄBCČDĎEÉFGHIÍJKLĹĽMNŇOÔÓPRŔSŠTŤUÚVXYÝZŽ] | \
grep '^[AÁÄBCČDĎEÉFGHIÍJKLĹĽMNŇOÔÓPRŔSŠTŤUÚVXYÝZŽ]*$$' | \
gzip -c > $@
# Everything but creating of the Main.dict file is inherited from the
# "parent" Makefile.langcommon in the parent directory.
clean: clean_common
rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb
help:
@echo 'make [SOURCEDICT=$(XWDICTPATH)/$(XWLANG)/czech2_5.dict.gz]'

View file

@ -0,0 +1,84 @@
# -*- coding: utf-8; mode: conf; -*-
# Copyright 2002-2010 by Eric House (xwords@eehouse.org). All rights
# reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
LANGCODE:sk_SK
CHARSET:utf-8
# deal with DOS files
LANGFILTER: tr -d '\r'
LANGFILTER: | tr [aáäbcčdďeéfghiíjklĺľmnňoôóprŕsštťuúvxyýzž] [AÁÄBCČDĎEÉFGHIÍJKLĹĽMNŇOÔÓPRŔSŠTŤUÚVXYÝZŽ]
LANGFILTER: | sort -u
D2DARGS: -r -term 0 -enc UTF-8
LANGINFO: <p>This BYOD language works on Slovak wordlists encoded in
LANGINFO: UTF-8 and produces dictionaries that should work on
LANGINFO: Unicode-aware versions of Crosswords. </p>
# High bit means "official". Next 7 bits are an enum where
# Slovak==0x31. Low byte is padding.
XLOC_HEADER:0x9300
#COUNT VAL FACE
# converted from http://www.deskovehry.info/pravidla/scrabble/scrabble-sk.htm
<BEGIN_TILES>
9 1 'A'
1 4 'Á'
1 10 'Ä'
2 4 'B'
1 4 'C'
1 5 'Č'
1 8 'D'
1 8 'Ď'
8 1 'E'
1 7 'É'
1 8 'F'
1 8 'G'
1 4 'H'
5 1 'I'
1 5 'Í'
2 3 'J'
3 2 'K'
3 2 'L'
1 10 'Ĺ'
1 7 'Ľ'
4 2 'M'
5 1 'N'
1 8 'Ň'
9 1 'O'
1 8 'Ô'
1 10 'Ó'
3 2 'P'
4 1 'R'
1 10 'Ŕ'
4 1 'S'
1 5 'Š'
4 1 'T'
1 7 'Ť'
2 3 'U'
1 7 'Ú'
4 1 'V'
1 10 'X'
1 4 'Y'
1 5 'Ý'
1 4 'Z'
1 5 'Ž'
# the blank *must* be last here!!!
2 0 {"_"}
<END_TILES>