add umlaut to filenames as additional test

This commit is contained in:
ehouse 2009-03-29 15:38:17 +00:00
parent f66fa25e93
commit 1ac648f3e6
3 changed files with 110 additions and 0 deletions

4
dawg/Hëx/.cvsignore Normal file
View file

@ -0,0 +1,4 @@
*.bin
*.pdb
*.xwd
*.seb

45
dawg/Hëx/Makefile Normal file
View file

@ -0,0 +1,45 @@
# -*- mode: makefile; -*-
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
# reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG = Hëx
LANGCODE = hex
ENC = UTF-8
TARGET_TYPE = WINCE
include ../Makefile.2to8
include ../Makefile.langcommon
# Pass in your own dict here by setting DICT
DICT ?= $(XWDICTPATH)/English/CSW.dict.gz
# tr 'AE' 'ÄË' doesn't work, so use sed.
$(XWLANG)Main.dict.gz: $(DICT)
@echo "building $@ from $<"
zcat $< | tr [a-f] [A-F] | grep -e '^[A-F]\{2,8\}$$' | \
echo CAFEBABE DEADBEEF $$(cat -) | \
sed 's/A/Ä/g' | \
sed 's/E/Ë/g' | \
tr ' ' '\n' | sort | gzip > $@
# Everything but creating of the Main.dict file is inherited from the
# "parent" Makefile.langcommon in the parent directory.
clean: clean_common
rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb

61
dawg/Hëx/info.txt Executable file
View file

@ -0,0 +1,61 @@
# -*- mode: conf; -*-
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
# reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
LANGCODE:HEX
# uppercase all
LANGFILTER: tr [a-f] [A-F]
LANGFILTER: | grep '^[A-F]*$'
LANGFILTER: | sed 's/A/Ä/'
LANGFILTER: | sed 's/E/Ë/'
LANGFILTER: | sort -u
D2DARGS: -term 10
LANGINFO: <p>The hex "language" is something of a programmers' joke.
LANGINFO: Hex is short for hexadecimal, a 16-base number system whose
LANGINFO: "digits" are the numerals 0-9 plus the letters A-F. Hex
LANGINFO: letters are often used to represent computer data, and
LANGINFO: certain sequences are sometimes used as markers because
LANGINFO: they're easy to pick out in large dumps of otherwise
LANGINFO: meaningless (to humans) garbage. In staring at Mac memory
LANGINFO: dumps, for example, you'd occasionally see the letters
LANGINFO: DEADBEEF and know that memory in that area was probably
LANGINFO: undamaged.</p>
LANGINFO: <p>I use Hex dictionaries for testing since they have few
LANGINFO: tiles and games play quickly. That's also why the Hex
LANGINFO: tile set has four blanks; that's the largest number
LANGINFO: Crosswords supports and I needed to test at the limit.</p>
# High bit means "official". Next 7 bits are an enum where Hex==127
# (I just made that up; not sure what it was originally.) Low byte is
# padding
XLOC_HEADER:0xFF00
<BEGIN_TILES>
4 0 {"_"}
9 1 'Ä'
2 3 'B'
2 3 'C'
4 2 'D'
12 1 'Ë'
2 4 'F'
<END_TILES>
# should ignore all after the <END_TILES> above