clean up Hex wordlist

Remove it from known set so it can be used to test. Get rid of
filenames having umlaut since that screws up URLs between Android and
nginx (not sure whose fault and not going there now.) Lang name should
be able to have an umlaut, but it gets used for filename for now so
fix later....
This commit is contained in:
Eric House 2022-06-05 14:38:33 -07:00
parent 20df5fec3a
commit d24099f52b
8 changed files with 16 additions and 120 deletions

View file

@ -261,7 +261,6 @@
<item>hu</item><item>@string/lang_name_hungarian</item>
<item>ro</item><item>@string/lang_name_romanian</item>
<item>fi</item><item>@string/lang_name_finnish</item>
<item>hex</item><item>Hex</item>
</string-array>
<!-- Triples of Name, supported codes, and URL format string -->

View file

@ -571,10 +571,10 @@ getCountsFor( const DictionaryCtxt* dict, XP_U16 nCols )
if ( 50 < (count * pct) % 100 ) {
++newCount;
}
XP_ASSERT( tile != blank || newCount <= MAX_NUM_BLANKS );
if ( tile == blank && newCount > MAX_NUM_BLANKS ) {
newCount = MAX_NUM_BLANKS;
}
XP_ASSERT( tile != blank || newCount <= MAX_NUM_BLANKS );
counts[tile] = newCount;
}
}

View file

@ -443,7 +443,6 @@ static struct {
{ .lc = 0x14, .isoCode = "hu", },
{ .lc = 0x15, .isoCode = "ro", },
{ .lc = 0x19, .isoCode = "fi", },
{ .lc = 0x7f, .isoCode = "hex", },
};
XP_Bool

View file

@ -17,10 +17,10 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG = Hex
LANGCODE = hex
LANGCODE = hx
ENC = UTF-8
DICTNOTE = "Toy/debugging wordlist comprising words from a large English list using only the letters A-F"
DICTNOTE = "Toy/debugging wordlist comprising words from a large English list using only the letters A-F, with umlauts thrown in"
TARGET_TYPE = WINCE
@ -40,7 +40,9 @@ $(XWLANG)Main.dict.gz: $(DICT)
sed 's/[[:lower:]]*/\U&/' | \
grep -e '^[A-F]\{2,15\}$$' | \
echo CAFEBABE DEADBEEF $$(cat -) | \
tr ' ' '\n' | sort | gzip > $@
sed 's/A/Ä/g' | \
sed 's/E/Ë/g' | \
tr ' ' '\n' | sort -u | gzip > $@
# Everything but creating of the Main.dict file is inherited from the
# "parent" Makefile.langcommon in the parent directory.

View file

@ -16,12 +16,14 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
LANGCODE:hex
LANGCODE:hx
LANGNAME:Hex
# uppercase all
LANGFILTER: | sed -e 's/[[:lower:]]*/\U&/'
LANGFILTER: | grep -x '[A-F]\{2,15\}'
LANGFILTER: | sed 's/A/Ä/'
LANGFILTER: | sed 's/E/Ë/'
LANGFILTER: | sort -u
D2DARGS: -term 10
@ -50,11 +52,11 @@ XLOC_HEADER:0xFF00
<BEGIN_TILES>
{"_"} 0 4
'A|a' 1 9
'Ä|ä' 1 9
'B|b' 3 2
'C|c' 3 2
'D|d' 2 4
'E|e' 1 12
'Ë|ë' 1 12
'F|f' 4 2
<END_TILES>
# should ignore all after the <END_TILES> above

View file

@ -1 +0,0 @@
*.xwd

View file

@ -1,43 +0,0 @@
# -*- mode: makefile; -*-
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
# reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG = Hëx
LANGCODE = hex
ENC = UTF-8
TARGET_TYPE = WINCE
include ../Makefile.langcommon
# Pass in your own dict here by setting DICT
DICT ?= $(XWDICTPATH)/English/CSW15.dict
# tr 'AE' 'ÄË' doesn't work, so use sed.
$(XWLANG)Main.dict.gz: $(DICT)
@echo "building $@ from $<"
cat $< | sed 's/[[:lower:]]*/\U&/' | grep -e '^[A-F]\{2,8\}$$' | \
echo CAFEBABE DEADBEEF $$(cat -) | \
sed 's/A/Ä/g' | \
sed 's/E/Ë/g' | \
tr ' ' '\n' | sort | gzip > $@
# Everything but creating of the Main.dict file is inherited from the
# "parent" Makefile.langcommon in the parent directory.
clean: clean_common
rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb

View file

@ -1,62 +0,0 @@
# -*- mode: conf; -*-
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
# reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
LANGCODE:hë
LANGNAME:Hëx
# uppercase all
LANGFILTER: | sed -e 's/[[:lower:]]*/\U&/'
LANGFILTER: | grep -x '[A-F]\{2,15\}'
LANGFILTER: | sed 's/A/Ä/'
LANGFILTER: | sed 's/E/Ë/'
LANGFILTER: | sort -u
D2DARGS: -term 10
LANGINFO: <p>The hex "language" is something of a programmers' joke.
LANGINFO: Hex is short for hexadecimal, a 16-base number system whose
LANGINFO: "digits" are the numerals 0-9 plus the letters A-F. Hex
LANGINFO: letters are often used to represent computer data, and
LANGINFO: certain sequences are sometimes used as markers because
LANGINFO: they're easy to pick out in large dumps of otherwise
LANGINFO: meaningless (to humans) garbage. In staring at Mac memory
LANGINFO: dumps, for example, you'd occasionally see the letters
LANGINFO: DEADBEEF and know that memory in that area was probably
LANGINFO: undamaged.</p>
LANGINFO: <p>I use Hex dictionaries for testing since they have few
LANGINFO: tiles and games play quickly. That's also why the Hex
LANGINFO: tile set has four blanks; that's the largest number
LANGINFO: Crosswords supports and I needed to test at the limit.</p>
# High bit means "official". Next 7 bits are an enum where Hex==127
# (I just made that up; not sure what it was originally.) Low byte is
# padding
XLOC_HEADER:0xFF00
<BEGIN_TILES>
{"_"} 0 4
'Ä|ä' 1 9
'B|b' 3 2
'C|c' 3 2
'D|d' 2 4
'Ë|ë' 1 12
'F|f' 4 2
<END_TILES>
# should ignore all after the <END_TILES> above