Replace A and E with umlaut'd unicode chars for a better test case

This commit is contained in:
ehouse 2009-03-20 03:45:36 +00:00
parent eae332764d
commit b0ae16c5e4
2 changed files with 16 additions and 12 deletions

View file

@ -1,4 +1,6 @@
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved. # -*- mode: makefile; -*-
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
# reserved.
# #
# This program is free software; you can redistribute it and/or # This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License # modify it under the terms of the GNU General Public License
@ -16,6 +18,7 @@
XWLANG = Hex XWLANG = Hex
LANGCODE = hex LANGCODE = hex
ENC = UTF-8
TARGET_TYPE = WINCE TARGET_TYPE = WINCE
@ -24,14 +27,15 @@ include ../Makefile.2to8
include ../Makefile.langcommon include ../Makefile.langcommon
# Pass in your own dict here by setting DICT # Pass in your own dict here by setting DICT
DICT ?= $(XWDICTPATH)/English/SOWPODS_official.txt.gz DICT ?= $(XWDICTPATH)/English/CSW.dict.gz
# Feel free to base this on whatever dictionary you have at hand. I'm # tr 'AE' 'ÄË' doesn't work, so use sed.
# using CollegeEng for no particular reason.
$(XWLANG)Main.dict.gz: $(DICT) $(XWLANG)Main.dict.gz: $(DICT)
@echo "building $@ from $<" @echo "building $@ from $<"
zcat $< | tr [a-f] [A-F] | grep -e '^[A-F]\{2,8\}$$' | \ zcat $< | tr [a-f] [A-F] | grep -e '^[A-F]\{2,8\}$$' | \
echo CAFEBABE DEADBEEF $$(cat -) | \ echo CAFEBABE DEADBEEF $$(cat -) | \
sed 's/A/Ä/g' | \
sed 's/E/Ë/g' | \
tr ' ' '\n' | sort | gzip > $@ tr ' ' '\n' | sort | gzip > $@
# Everything but creating of the Main.dict file is inherited from the # Everything but creating of the Main.dict file is inherited from the

View file

@ -1,4 +1,6 @@
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved. # -*- mode: conf; -*-
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
# reserved.
# #
# This program is free software; you can redistribute it and/or # This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License # modify it under the terms of the GNU General Public License
@ -16,14 +18,14 @@
LANGCODE:HEX LANGCODE:HEX
# uppercase all # uppercase all
LANGFILTER: tr [a-f] [A-F] LANGFILTER: tr [a-f] [A-F]
LANGFILTER: | grep '^[A-F]*$' LANGFILTER: | grep '^[A-F]*$'
LANGFILTER: | sed 's/A/Ä/'
LANGFILTER: | sed 's/E/Ë/'
LANGFILTER: | sort -u LANGFILTER: | sort -u
D2DARGS: -nosort -term 10 D2DARGS: -term 10
LANGINFO: <p>The hex "language" is something of a programmers' joke. LANGINFO: <p>The hex "language" is something of a programmers' joke.
LANGINFO: Hex is short for hexadecimal, a 16-base number system whose LANGINFO: Hex is short for hexadecimal, a 16-base number system whose
@ -41,8 +43,6 @@ LANGINFO: tiles and games play quickly. That's also why the Hex
LANGINFO: tile set has four blanks; that's the largest number LANGINFO: tile set has four blanks; that's the largest number
LANGINFO: Crosswords supports and I needed to test at the limit.</p> LANGINFO: Crosswords supports and I needed to test at the limit.</p>
# High bit means "official". Next 7 bits are an enum where Hex==127 # High bit means "official". Next 7 bits are an enum where Hex==127
# (I just made that up; not sure what it was originally.) Low byte is # (I just made that up; not sure what it was originally.) Low byte is
# padding # padding
@ -51,11 +51,11 @@ XLOC_HEADER:0xFF00
<BEGIN_TILES> <BEGIN_TILES>
4 0 {"_"} 4 0 {"_"}
9 1 'A' 9 1 'Ä'
2 3 'B' 2 3 'B'
2 3 'C' 2 3 'C'
4 2 'D' 4 2 'D'
12 1 'E' 12 1 'Ë'
2 4 'F' 2 4 'F'
<END_TILES> <END_TILES>
# should ignore all after the <END_TILES> above # should ignore all after the <END_TILES> above