From d24099f52b5cd0bcc646e07b50ffc773cbd88557 Mon Sep 17 00:00:00 2001 From: Eric House Date: Sun, 5 Jun 2022 14:38:33 -0700 Subject: [PATCH] clean up Hex wordlist Remove it from known set so it can be used to test. Get rid of filenames having umlaut since that screws up URLs between Android and nginx (not sure whose fault and not going there now.) Lang name should be able to have an umlaut, but it gets used for filename for now so fix later.... --- .../app/src/main/res/values/common_rsrc.xml | 1 - xwords4/common/dictnry.c | 2 +- xwords4/common/strutils.c | 1 - xwords4/dawg/Hex/Makefile | 8 ++- xwords4/dawg/Hex/info.txt | 18 +++--- xwords4/dawg/Hëx/.gitignore | 1 - xwords4/dawg/Hëx/Makefile | 43 ------------- xwords4/dawg/Hëx/info.txt | 62 ------------------- 8 files changed, 16 insertions(+), 120 deletions(-) delete mode 100644 xwords4/dawg/Hëx/.gitignore delete mode 100644 xwords4/dawg/Hëx/Makefile delete mode 100644 xwords4/dawg/Hëx/info.txt diff --git a/xwords4/android/app/src/main/res/values/common_rsrc.xml b/xwords4/android/app/src/main/res/values/common_rsrc.xml index 8e8fd4005..08ada7f02 100644 --- a/xwords4/android/app/src/main/res/values/common_rsrc.xml +++ b/xwords4/android/app/src/main/res/values/common_rsrc.xml @@ -261,7 +261,6 @@ hu@string/lang_name_hungarian ro@string/lang_name_romanian fi@string/lang_name_finnish - hexHex diff --git a/xwords4/common/dictnry.c b/xwords4/common/dictnry.c index e94b68b36..cf02bf2f8 100644 --- a/xwords4/common/dictnry.c +++ b/xwords4/common/dictnry.c @@ -571,10 +571,10 @@ getCountsFor( const DictionaryCtxt* dict, XP_U16 nCols ) if ( 50 < (count * pct) % 100 ) { ++newCount; } - XP_ASSERT( tile != blank || newCount <= MAX_NUM_BLANKS ); if ( tile == blank && newCount > MAX_NUM_BLANKS ) { newCount = MAX_NUM_BLANKS; } + XP_ASSERT( tile != blank || newCount <= MAX_NUM_BLANKS ); counts[tile] = newCount; } } diff --git a/xwords4/common/strutils.c b/xwords4/common/strutils.c index d767d814d..eaad25442 100644 --- a/xwords4/common/strutils.c +++ b/xwords4/common/strutils.c @@ -443,7 +443,6 @@ static struct { { .lc = 0x14, .isoCode = "hu", }, { .lc = 0x15, .isoCode = "ro", }, { .lc = 0x19, .isoCode = "fi", }, - { .lc = 0x7f, .isoCode = "hex", }, }; XP_Bool diff --git a/xwords4/dawg/Hex/Makefile b/xwords4/dawg/Hex/Makefile index 23c159a91..cc9e29ec0 100644 --- a/xwords4/dawg/Hex/Makefile +++ b/xwords4/dawg/Hex/Makefile @@ -17,10 +17,10 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. XWLANG = Hex -LANGCODE = hex +LANGCODE = hx ENC = UTF-8 -DICTNOTE = "Toy/debugging wordlist comprising words from a large English list using only the letters A-F" +DICTNOTE = "Toy/debugging wordlist comprising words from a large English list using only the letters A-F, with umlauts thrown in" TARGET_TYPE = WINCE @@ -40,7 +40,9 @@ $(XWLANG)Main.dict.gz: $(DICT) sed 's/[[:lower:]]*/\U&/' | \ grep -e '^[A-F]\{2,15\}$$' | \ echo CAFEBABE DEADBEEF $$(cat -) | \ - tr ' ' '\n' | sort | gzip > $@ + sed 's/A/Ä/g' | \ + sed 's/E/Ë/g' | \ + tr ' ' '\n' | sort -u | gzip > $@ # Everything but creating of the Main.dict file is inherited from the # "parent" Makefile.langcommon in the parent directory. diff --git a/xwords4/dawg/Hex/info.txt b/xwords4/dawg/Hex/info.txt index 40aedf621..f8415ca6c 100755 --- a/xwords4/dawg/Hex/info.txt +++ b/xwords4/dawg/Hex/info.txt @@ -16,12 +16,14 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:hex +LANGCODE:hx LANGNAME:Hex # uppercase all LANGFILTER: | sed -e 's/[[:lower:]]*/\U&/' LANGFILTER: | grep -x '[A-F]\{2,15\}' +LANGFILTER: | sed 's/A/Ä/' +LANGFILTER: | sed 's/E/Ë/' LANGFILTER: | sort -u D2DARGS: -term 10 @@ -49,12 +51,12 @@ XLOC_HEADER:0xFF00 -{"_"} 0 4 -'A|a' 1 9 -'B|b' 3 2 -'C|c' 3 2 -'D|d' 2 4 -'E|e' 1 12 -'F|f' 4 2 +{"_"} 0 4 +'Ä|ä' 1 9 +'B|b' 3 2 +'C|c' 3 2 +'D|d' 2 4 +'Ë|ë' 1 12 +'F|f' 4 2 # should ignore all after the above diff --git a/xwords4/dawg/Hëx/.gitignore b/xwords4/dawg/Hëx/.gitignore deleted file mode 100644 index 3b9a3cef8..000000000 --- a/xwords4/dawg/Hëx/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.xwd diff --git a/xwords4/dawg/Hëx/Makefile b/xwords4/dawg/Hëx/Makefile deleted file mode 100644 index 89ec688ed..000000000 --- a/xwords4/dawg/Hëx/Makefile +++ /dev/null @@ -1,43 +0,0 @@ -# -*- mode: makefile; -*- -# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights -# reserved. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -XWLANG = Hëx -LANGCODE = hex -ENC = UTF-8 - -TARGET_TYPE = WINCE - -include ../Makefile.langcommon - -# Pass in your own dict here by setting DICT -DICT ?= $(XWDICTPATH)/English/CSW15.dict - -# tr 'AE' 'ÄË' doesn't work, so use sed. -$(XWLANG)Main.dict.gz: $(DICT) - @echo "building $@ from $<" - cat $< | sed 's/[[:lower:]]*/\U&/' | grep -e '^[A-F]\{2,8\}$$' | \ - echo CAFEBABE DEADBEEF $$(cat -) | \ - sed 's/A/Ä/g' | \ - sed 's/E/Ë/g' | \ - tr ' ' '\n' | sort | gzip > $@ - -# Everything but creating of the Main.dict file is inherited from the -# "parent" Makefile.langcommon in the parent directory. - -clean: clean_common - rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb diff --git a/xwords4/dawg/Hëx/info.txt b/xwords4/dawg/Hëx/info.txt deleted file mode 100644 index 6a047e879..000000000 --- a/xwords4/dawg/Hëx/info.txt +++ /dev/null @@ -1,62 +0,0 @@ -# -*- mode: conf; -*- -# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights -# reserved. -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -LANGCODE:hë -LANGNAME:Hëx - -# uppercase all -LANGFILTER: | sed -e 's/[[:lower:]]*/\U&/' -LANGFILTER: | grep -x '[A-F]\{2,15\}' -LANGFILTER: | sed 's/A/Ä/' -LANGFILTER: | sed 's/E/Ë/' -LANGFILTER: | sort -u - -D2DARGS: -term 10 - -LANGINFO:

The hex "language" is something of a programmers' joke. -LANGINFO: Hex is short for hexadecimal, a 16-base number system whose -LANGINFO: "digits" are the numerals 0-9 plus the letters A-F. Hex -LANGINFO: letters are often used to represent computer data, and -LANGINFO: certain sequences are sometimes used as markers because -LANGINFO: they're easy to pick out in large dumps of otherwise -LANGINFO: meaningless (to humans) garbage. In staring at Mac memory -LANGINFO: dumps, for example, you'd occasionally see the letters -LANGINFO: DEADBEEF and know that memory in that area was probably -LANGINFO: undamaged.

- -LANGINFO:

I use Hex dictionaries for testing since they have few -LANGINFO: tiles and games play quickly. That's also why the Hex -LANGINFO: tile set has four blanks; that's the largest number -LANGINFO: Crosswords supports and I needed to test at the limit.

- -# High bit means "official". Next 7 bits are an enum where Hex==127 -# (I just made that up; not sure what it was originally.) Low byte is -# padding -XLOC_HEADER:0xFF00 - - - -{"_"} 0 4 -'Ä|ä' 1 9 -'B|b' 3 2 -'C|c' 3 2 -'D|d' 2 4 -'Ë|ë' 1 12 -'F|f' 4 2 - -# should ignore all after the above