From 79990bc7b108dd7e009d89eea9f914bdc717a313 Mon Sep 17 00:00:00 2001
From: Andy2 <andy2@trand.private.network>
Date: Tue, 30 Nov 2010 18:35:11 -0800
Subject: [PATCH] first set of changes formed by applyinig diff of
 android_branch's dawg/ directory against unicode_branch's.  The two branches
 seem to have to common ancestor -- probably didn't survive translation from
 svn -- so this is the best I can do.

This checkin is all the files that were modified by the patch plus a
couple of simple additions.  Next I'll be adding directories that the
patch created.  It also reintroduced a bunch of .cvsignore files; I
won't check those in.
---
 xwords4/dawg/Catalan/Makefile            |   2 +-
 xwords4/dawg/Catalan/info.txt            |   2 +-
 xwords4/dawg/English/Makefile            |   2 +-
 xwords4/dawg/English/Makefile.CollegeEng |  36 ++++
 xwords4/dawg/English/Makefile.OWL2       |   4 +-
 xwords4/dawg/English/Makefile.TWL06      |  37 ++++
 xwords4/dawg/German/Makefile             |  17 +-
 xwords4/dawg/German/info.txt             |  24 +--
 xwords4/dawg/Hex/Makefile                |  12 +-
 xwords4/dawg/Hex/info.txt                |  16 +-
 xwords4/dawg/Makefile.langcommon         |  29 ++--
 xwords4/dawg/Polish/Makefile             |  14 +-
 xwords4/dawg/Polish/info.txt             |  30 ++--
 xwords4/dawg/Spanish/Makefile            |  11 +-
 xwords4/dawg/Spanish/info.txt            |  12 +-
 xwords4/dawg/Swedish/Makefile            |  10 +-
 xwords4/dawg/Swedish/info.txt            |  24 +--
 xwords4/dawg/dawg2dict.pl                | 103 +++++++++--
 xwords4/dawg/dict2dawg.cpp               | 207 +++++++++++++----------
 xwords4/dawg/dictstats.pl                |  28 +--
 xwords4/dawg/xloc.pl                     |  15 +-
 xwords4/dawg/xloc.pm                     |  14 +-
 22 files changed, 423 insertions(+), 226 deletions(-)
 create mode 100644 xwords4/dawg/English/Makefile.CollegeEng
 create mode 100644 xwords4/dawg/English/Makefile.TWL06

diff --git a/xwords4/dawg/Catalan/Makefile b/xwords4/dawg/Catalan/Makefile
index 46815e993..da54778d7 100644
--- a/xwords4/dawg/Catalan/Makefile
+++ b/xwords4/dawg/Catalan/Makefile
@@ -33,7 +33,7 @@ endif
 endif
 
 LANG_SPECIAL_INFO = \
-		"L-L" $(PBITMS)/large_ll.pbitm $(PBITMS)/small_ll.pbitm \
+		"LÂ·L" $(PBITMS)/large_ll.pbitm $(PBITMS)/small_ll.pbitm \
 		"NY" $(PBITMS)/large_ny.pbitm $(PBITMS)/small_ny.pbitm \
 		"QU" $(PBITMS)/large_qu.pbitm $(PBITMS)/small_qu.pbitm \
 
diff --git a/xwords4/dawg/Catalan/info.txt b/xwords4/dawg/Catalan/info.txt
index 1cb56442d..9954db826 100644
--- a/xwords4/dawg/Catalan/info.txt
+++ b/xwords4/dawg/Catalan/info.txt
@@ -75,7 +75,7 @@ XLOC_HEADER:0x8C00
 8	1     'I'
 1	8     'J'
 4	1     'L'
-1	10    {"L-L"}
+1	10    {"LÂ·L"}
 3	2     'M'
 6	1     'N'
 1	10    {"NY"}
diff --git a/xwords4/dawg/English/Makefile b/xwords4/dawg/English/Makefile
index 95b975643..21c4b89ac 100644
--- a/xwords4/dawg/English/Makefile
+++ b/xwords4/dawg/English/Makefile
@@ -15,7 +15,7 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
-TARGET_TYPE ?= FRANK
+TARGET_TYPE ?= WINCE
 
 include ../Makefile.langcommon
 
diff --git a/xwords4/dawg/English/Makefile.CollegeEng b/xwords4/dawg/English/Makefile.CollegeEng
new file mode 100644
index 000000000..80d4cbd24
--- /dev/null
+++ b/xwords4/dawg/English/Makefile.CollegeEng
@@ -0,0 +1,36 @@
+# -*- mode: makefile; compile-command: "make -f Makefile.COSD"; -*-
+# Copyright 2002 by Eric House (xwords@eehouse.org).  All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+XWLANG=CollegeEng
+LANGCODE=en_US
+TARGET_TYPE=WINCE
+
+include ../Makefile.2to8
+
+include ../Makefile.langcommon
+
+SOURCEDICT ?= $(XWDICTPATH)/English/CollegeEng.dict.gz
+
+$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
+	zcat $< | tr -d '\r' | tr [a-z] [A-Z] | grep -e "^[A-Z]\{2,15\}$$" | \
+		gzip -c > $@
+
+# Everything but creating of the Main.dict file is inherited from the
+# "parent" Makefile.langcommon in the parent directory.
+
+clean: clean_common
+	rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb 
diff --git a/xwords4/dawg/English/Makefile.OWL2 b/xwords4/dawg/English/Makefile.OWL2
index 41479a843..ce7988a5a 100644
--- a/xwords4/dawg/English/Makefile.OWL2
+++ b/xwords4/dawg/English/Makefile.OWL2
@@ -15,9 +15,9 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
-XWLANG=OWL2_
+XWLANG=OWL2
 LANGCODE=en_US
-TARGET_TYPE=PALM
+TARGET_TYPE?=PALM
 
 include ../Makefile.2to8
 
diff --git a/xwords4/dawg/English/Makefile.TWL06 b/xwords4/dawg/English/Makefile.TWL06
new file mode 100644
index 000000000..28dd43ca1
--- /dev/null
+++ b/xwords4/dawg/English/Makefile.TWL06
@@ -0,0 +1,37 @@
+# -*- mode: makefile; compile-command: "make -f Makefile.COSD"; -*-
+# Copyright 2002-2010 by Eric House (xwords@eehouse.org).  All rights
+# reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+XWLANG=TWL06
+LANGCODE=en_US
+TARGET_TYPE=WINCE
+
+include ../Makefile.2to8
+
+include ../Makefile.langcommon
+
+# from http://www.3zsoftware.com/en/wordmagic/lists.php
+SOURCEDICT ?= $(XWDICTPATH)/English/twl06.zip
+
+$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
+	zcat $< | grep -e "^[A-Z]\{2,15\}$$" | gzip -c > $@
+
+# Everything but creating of the Main.dict file is inherited from the
+# "parent" Makefile.langcommon in the parent directory.
+
+clean: clean_common
+	rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb 
diff --git a/xwords4/dawg/German/Makefile b/xwords4/dawg/German/Makefile
index 62ec0be1e..b5e453d5a 100644
--- a/xwords4/dawg/German/Makefile
+++ b/xwords4/dawg/German/Makefile
@@ -1,5 +1,6 @@
-# -*- mode: makefile; coding: iso-8859-1; -*-
-# Copyright 2002 by Eric House (xwords@eehouse.org).  All rights reserved.
+# -*- mode: makefile; coding: utf-8; -*-
+# Copyright 2002 - 2010 by Eric House (xwords@eehouse.org).  All
+# rights reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -17,7 +18,7 @@
 
 XWLANG = German
 LANGCODE = de_DE
-ENC = ISO-8859-1
+ENC = UTF-8
 
 TARGET_TYPE ?= WINCE
 
@@ -28,11 +29,11 @@ include ../Makefile.langcommon
 SOURCEDICT ?= $(XWDICTPATH)/German/HansGerman.dict.gz
 
 $(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
-	zcat $< | tr [a-zäöü] [A-ZÄÖÜ] | \
-		LANG=$(LANGCODE):$(ENC) sed -e 's/ß/SS/g' | \
-		LANG=$(LANGCODE):$(ENC) grep '[AEIOUÄÖÜ]' | \
-		LANG=$(LANGCODE):$(ENC) grep '^[A-ZÄÖÜ]\+$$' | \
-		gzip -c > $@
+	zcat $< \
+		| tr [a-zÃ¤Ã¶Ã¼] [A-ZÃ„Ã–Ãœ] \
+		| sed -e 's/ÃŸ/SS/g' \
+		| grep '^[A-ZÃ„Ã–Ãœ]*$$' \
+		| gzip -c > $@
 
 # Everything but creating of the Main.dict file is inherited from the
 # "parent" Makefile.langcommon in the parent directory.
diff --git a/xwords4/dawg/German/info.txt b/xwords4/dawg/German/info.txt
index f6321981d..7c3ee588f 100644
--- a/xwords4/dawg/German/info.txt
+++ b/xwords4/dawg/German/info.txt
@@ -1,4 +1,6 @@
-# Copyright 2002 by Eric House (xwords@eehouse.org).  All rights reserved.
+# -*- mode: conf; coding: utf-8; -*-
+# Copyright 2002 - 2010 by Eric House (xwords@eehouse.org).  All
+# rights reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -15,17 +17,18 @@
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 LANGCODE:de_DE
+CHARSET: utf-8
 
 # deal with DOS files
 LANGFILTER: tr -d '\r'
 # substitute for sharfes-s
-LANGFILTER: | sed -e 's/ß/SS/g'
+LANGFILTER: | sed -e 's/ÃŸ/SS/g'
 # uppercase all
-LANGFILTER: | tr [a-zäöü] [A-ZÄÖÜ]
+LANGFILTER: | tr [a-zÃ¤Ã¶Ã¼] [A-ZÃ„Ã–Ãœ]
 # no words not containing a vowel
-LANGFILTER: | grep '[AEIOUÄÖÜ]' 
+LANGFILTER: | grep '[AEIOUÃ„Ã–Ãœ]' 
 # none with illegal chars
-LANGFILTER: | grep '^[A-ZÄÖÜ]\+$'
+LANGFILTER: | grep '^[A-ZÃ„Ã–Ãœ]\+$'
 
 # Until I can figure out how to force sort to use a locale's collation
 # rules we can't trust sort in the filtering rules above and so must
@@ -46,9 +49,8 @@ XLOC_HEADER:0x8300
 
 <BEGIN_TILES>
 2			0		{"_"}
-5		        1		'A'
-# A mit umlaut
-1			6		196
+5	        1		'A'
+1			6		'Ã„'
 2			3		'B'
 2			4		'C'
 4			1		'D'
@@ -63,16 +65,14 @@ XLOC_HEADER:0x8300
 4			3		'M'
 9			1		'N'
 3			2		'O'
-# O mit umlaut
-1			8		214
+1			8		'Ã–'
 1			4		'P'
 1			10		'Q'
 6			1		'R'
 7			1		'S'
 6			1		'T'
 6			1		'U'
-# U mit umlaut
-1			6		220
+1			6		'Ãœ'
 1			6		'V'
 1			3		'W'
 1			8		'X'
diff --git a/xwords4/dawg/Hex/Makefile b/xwords4/dawg/Hex/Makefile
index cf710fa59..96d54ff1b 100644
--- a/xwords4/dawg/Hex/Makefile
+++ b/xwords4/dawg/Hex/Makefile
@@ -1,4 +1,6 @@
-# Copyright 2002 by Eric House (xwords@eehouse.org).  All rights reserved.
+# -*- mode: makefile; -*-
+# Copyright 2002-2009 by Eric House (xwords@eehouse.org).  All rights
+# reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -16,6 +18,7 @@
 
 XWLANG = Hex
 LANGCODE = hex
+ENC = UTF-8
 
 TARGET_TYPE = WINCE
 
@@ -24,14 +27,15 @@ include ../Makefile.2to8
 include ../Makefile.langcommon
 
 # Pass in your own dict here by setting DICT
-DICT ?= $(XWDICTPATH)/English/SOWPODS_official.txt.gz
+DICT ?= $(XWDICTPATH)/English/CSW.dict.gz
 
-# Feel free to base this on whatever dictionary you have at hand.  I'm
-# using CollegeEng for no particular reason.
+# tr 'AE' 'Ã„Ã‹' doesn't work, so use sed.
 $(XWLANG)Main.dict.gz: $(DICT)
 	@echo "building $@ from $<"
 	zcat $< | tr [a-f] [A-F] | grep -e '^[A-F]\{2,8\}$$' | \
 		echo CAFEBABE DEADBEEF $$(cat -) | \
+		sed 's/A/Ã„/g' | \
+		sed 's/E/Ã‹/g' | \
 		tr ' ' '\n' | sort | gzip > $@
 
 # Everything but creating of the Main.dict file is inherited from the
diff --git a/xwords4/dawg/Hex/info.txt b/xwords4/dawg/Hex/info.txt
index fcd4f6baf..526041cf1 100755
--- a/xwords4/dawg/Hex/info.txt
+++ b/xwords4/dawg/Hex/info.txt
@@ -1,4 +1,6 @@
-# Copyright 2002 by Eric House (xwords@eehouse.org).  All rights reserved.
+# -*- mode: conf; -*-
+# Copyright 2002-2009 by Eric House (xwords@eehouse.org).  All rights
+# reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -16,14 +18,14 @@
 
 LANGCODE:HEX
 
-
-
 # uppercase all
 LANGFILTER: tr [a-f] [A-F]
 LANGFILTER: | grep '^[A-F]*$'
+LANGFILTER: | sed 's/A/Ã„/'
+LANGFILTER: | sed 's/E/Ã‹/'
 LANGFILTER: | sort -u
 
-D2DARGS: -nosort -term 10
+D2DARGS: -term 10
 
 LANGINFO: <p>The hex "language" is something of a programmers' joke.
 LANGINFO: Hex is short for hexadecimal, a 16-base number system whose
@@ -41,8 +43,6 @@ LANGINFO: tiles and games play quickly.  That's also why the Hex
 LANGINFO: tile set has four blanks; that's the largest number
 LANGINFO: Crosswords supports and I needed to test at the limit.</p>
 
-
-
 # High bit means "official".  Next 7 bits are an enum where Hex==127
 # (I just made that up; not sure what it was originally.)  Low byte is
 # padding
@@ -51,11 +51,11 @@ XLOC_HEADER:0xFF00
 
 <BEGIN_TILES>
 4			0		{"_"}
-9			1		'A'
+9			1		'Ã„'
 2			3		'B'
 2			3		'C'
 4			2		'D'
-12			1		'E'
+12			1		'Ã‹'
 2			4		'F'
 <END_TILES>
 # should ignore all after the <END_TILES> above
diff --git a/xwords4/dawg/Makefile.langcommon b/xwords4/dawg/Makefile.langcommon
index a0206cc72..e5b5b64d8 100644
--- a/xwords4/dawg/Makefile.langcommon
+++ b/xwords4/dawg/Makefile.langcommon
@@ -204,16 +204,6 @@ endif
 frankspecials.bin: ../frank_mkspecials.pl  $(BMPFILES)
 	$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
 
-# a binary file (one byte) giving the number of tiles in the dict
-charcount.bin: table.bin
-ifdef NEWDAWG
-	siz=$$(ls -l $< | awk '{print $$5}'); \
-	perl -e "print pack(\"c\",$$siz/2)" > $@
-else
-	siz=$$(wc -c $< | sed -e 's/$<//'); \
-	perl -e "print pack(\"c\",$$siz)" > $@
-endif
-
 $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin 
 	cat $(XWLANG)$*_flags.bin charcount.bin table.bin values.bin \
 		frankspecials.bin $(XWLANG)StartLoc.bin  \
@@ -233,9 +223,9 @@ $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin
 $(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
 ifdef NEWDAWG
 	if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
-		then perl -e "print pack(\"n\",0x0002)" > $@; echo "flags=2"; \
+		then perl -e "print pack(\"n\",0x0004)" > $@; echo "flags=4"; \
 	elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
-		then perl -e "print pack(\"n\",0x0003)" > $@; echo "flags=3"; \
+		then perl -e "print pack(\"n\",0x0005)" > $@; echo "flags=5"; \
 	elif true; \
 		then echo "Unexpected node size"; exit 1; \
 	fi
@@ -272,8 +262,19 @@ else
 	perl -I../ ../xloc.pl -t -out $@
 endif
 
-values.bin:  ../xloc.pl 
-	perl -I../ ../xloc.pl -v -out $@ $(ENCP)
+values.bin: ../xloc.pl
+	perl -I../ ../xloc.pl -v -out $@
+
+# a binary file, two bytes, one giving the size of tiles data and the
+#  other the number of tiles in the dict.  Tiles data is utf-8 and so
+#  number is not derivable from size.
+charcount.bin: table.bin ../xloc.pl
+	SIZ=$$(ls -l $< | awk '{print $$5}'); \
+	perl -e "print pack(\"c\",$$SIZ)" > $@
+	TMP=/tmp/tmp$$$$; \
+	perl -I../ ../xloc.pl -s -out $$TMP; \
+	cat $$TMP >> $@; \
+	rm -f $$TMP
 
 %.dict: %.dict.gz
 	zcat $< > $@
diff --git a/xwords4/dawg/Polish/Makefile b/xwords4/dawg/Polish/Makefile
index bee788e10..3254bfefb 100644
--- a/xwords4/dawg/Polish/Makefile
+++ b/xwords4/dawg/Polish/Makefile
@@ -1,4 +1,4 @@
-# -*- coding: iso-8859-2; mode: Makefile; -*-
+# -*- mode: Makefile; -*-
 # Copyright 2002 - 2009 by Eric House (xwords@eehouse.org).  All
 # rights reserved.
 #
@@ -16,9 +16,9 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
-XWLANG=Polish
-LANGCODE=pl_PL
-ENC = ISO-8859-2
+XWLANG = Polish
+LANGCODE = pl_PL
+ENC = UTF-8
 
 # DICT2DAWGARGS = -lang $(LANGCODE)
 # DICT2DAWGARGS = -debug
@@ -29,12 +29,12 @@ include ../Makefile.2to8
 
 include ../Makefile.langcommon
 
-SOURCEDICT ?= $(XWDICTPATH)/Polish/iso-8859-2/slowa.txt.gz
+SOURCEDICT ?= $(XWDICTPATH)/Polish/slowa.txt.gz
 
 $(XWLANG)Main.dict.gz: $(SOURCEDICT)
 	zcat $< | tr -d '\r' \
-	| LANG=$(LANGCODE):$(ENC) tr [a±bcædeêfghijkl³mnñoóprs¶tuwyz¼¿] [A¡BCÆDEÊFGHIJKL£MNÑOÓPRS¦TUWYZ¬¯] \
-    | LANG=$(LANGCODE):$(ENC) grep '^[A¡BCÆDEÊFGHIJKL£MNÑOÓPRS¦TUWYZ¬¯]*$$' \
+	| tr [aÄ…bcÄ‡deÄ™fghijklÅ‚mnÅ„oÃ³prsÅ›tuwyzÅºÅ¼] [AÄ„BCÄ†DEÄ˜FGHIJKLÅMNÅƒOÃ“PRSÅšTUWYZÅ¹Å»] \
+    | grep '^[AÄ„BCÄ†DEÄ˜FGHIJKLÅMNÅƒOÃ“PRSÅšTUWYZÅ¹Å»]*$$' \
     | gzip > $@
 
 # Everything but creating of the Main.dict file is inherited from the
diff --git a/xwords4/dawg/Polish/info.txt b/xwords4/dawg/Polish/info.txt
index 657006233..35eed9ac0 100644
--- a/xwords4/dawg/Polish/info.txt
+++ b/xwords4/dawg/Polish/info.txt
@@ -1,4 +1,4 @@
-# -*- coding: iso-8859-2; mode: conf; -*-
+# -*- mode: conf; -*-
 # Copyright 2002-2009 by Eric House (xwords@eehouse.org).  All rights
 # reserved.
 #
@@ -17,12 +17,12 @@
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 LANGCODE:pl_PL
-CHARSET:iso-8859-2
+CHARSET:utf-8
 
 # deal with DOS files
 LANGFILTER: tr -d '\r'
-LANGFILTER: | tr [a-pr-uwyz±æê³ñó¶¼¿] [A-PR-UWYZ¡ÆÊ£ÑÓ¦¬¯]
-LANGFILTER: | grep '^[A-PR-UWYZ¡ÆÊ£ÑÓ¦¬¯]*$'
+LANGFILTER: | tr [a-pr-uwyzÄ…Ä‡Ä™Å‚Å„Ã³Å›ÅºÅ¼] [A-PR-UWYZÄ„Ä†Ä˜ÅÅƒÃ“ÅšÅ¹Å»]
+LANGFILTER: | grep '^[A-PR-UWYZÄ„Ä†Ä˜ÅÅƒÃ“ÅšÅ¹Å»]*$'
 LANGFILTER: | tr '\n' '\000'
 
 D2DARGS: -r -term 0
@@ -42,8 +42,8 @@ LANGINFO: this working.</p>
 LANGINFO: <p>Note that the blank is the last tile here, while with all
 LANGINFO: other languages it&apos;s the first.</p>
 
-LANGINFO: <p>Also, please note that we currently require the files you
-LANGINFO: upload to use the iso-8859-2 character encoding.</p>
+# LANGINFO: <p>Also, please note that we currently require the files you
+# LANGINFO: upload to use the iso-8859-2 character encoding.</p>
 
 # High bit means "official".  Next 7 bits are an enum where
 # Polish==8.  Low byte is padding
@@ -51,13 +51,13 @@ XLOC_HEADER:0x8800
 
 <BEGIN_TILES>
 9           1       'A'
-1           5       161 # '¡'
+1           5       'Ä„'
 2           3       'B'
 3           2       'C'
-1           6       198 # 'Æ'
+1           6       'Ä†'
 3           2       'D'
 7           1       'E'
-1           5       202 # 'Ê'
+1           5       'Ä˜'
 1           5       'F'
 2           3       'G'
 2           3       'H'
@@ -65,23 +65,23 @@ XLOC_HEADER:0x8800
 2           3       'J'
 3           3  	    'K'
 3           2       'L'
-2           3       163 # '£'
+2           3       'Å'
 3           2       'M'
 5           1       'N'
-1           7       209 # 'Ñ'
+1           7       'Åƒ'
 6           1       'O'
-1           5       211 # 'Ó'
+1           5       'Ã“'
 3           2       'P'
 4           1       'R'
 4           1       'S'
-1           5       166 # '¦'
+1           5       'Åš'
 3           2       'T'
 2           3       'U'
 4           1       'W'
 4           2  	    'Y'
 5           1       'Z'
-1           9       172 # '¬'
-1           5       175 # '¯'
+1           9       'Å¹'
+1           5       'Å»'
 
 # the blank *must* be last here!!!
 2           0       {"_"}
diff --git a/xwords4/dawg/Spanish/Makefile b/xwords4/dawg/Spanish/Makefile
index 591a150c1..455a8c825 100644
--- a/xwords4/dawg/Spanish/Makefile
+++ b/xwords4/dawg/Spanish/Makefile
@@ -1,4 +1,4 @@
-# -*-mode: Makefile; compile-command: "make all"; coding: iso-8859-1; -*-
+# -*-mode: Makefile; compile-command: "make all"; coding: utf-8; -*-
 # Copyright 2002 by Eric House (xwords@eehouse.org).  All rights reserved.
 #
 # This program is free software; you can redistribute it and/or
@@ -18,7 +18,7 @@
 XWLANG = SpanishFAA41
 LANGCODE = es_ES
 TARGET_TYPE ?= WINCE
-ENC = ISO-8859-1
+ENC = UTF-8
 
 ifeq ($(TARGET_TYPE),PALM)
 PBITMS = ./bmps/palm
@@ -44,14 +44,13 @@ include ../Makefile.langcommon
 #$(LANG)Main.dict.gz: SpanishMain.dict.gz
 #	ln -s $< $@
 
-SOURCEDICT ?= $(XWDICTPATH)/Spanish/FAA_4.1.txt.gz
+SOURCEDICT ?= $(XWDICTPATH)/Spanish/FAA_4.1.utf8.gz
 
 $(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
 	zcat $< \
 	| tr -d '\r' \
-	| tr '\207\216\222\227\234\237\226' 'aeiouu\321' \
-	| tr [a-zñ] [A-ZÑ] \
-	| LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VX-ZÑ]*$$' \
+	| tr [a-zÃ±] [A-ZÃ‘] \
+	| LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VX-ZÃ‘]*$$' \
 	| sed 's/CH/1/g' \
 	| sed 's/LL/2/g' \
 	| sed 's/RR/3/g' \
diff --git a/xwords4/dawg/Spanish/info.txt b/xwords4/dawg/Spanish/info.txt
index 7a086e2b4..03a4ec1ff 100644
--- a/xwords4/dawg/Spanish/info.txt
+++ b/xwords4/dawg/Spanish/info.txt
@@ -1,4 +1,4 @@
-# -*- mode: conf; coding: iso-8859-1; -*-
+# -*- mode: conf; coding: utf-8; -*-
 # Copyright 2002-2006 by Eric House (xwords@eehouse.org).  All rights
 # reserved.
 #
@@ -20,6 +20,7 @@
 # below
 
 NEEDSSORT:true
+CHARSET: utf-8
 
 # MSDos LF chars go bye-bye
 LANGFILTER: tr -d '\r'
@@ -27,9 +28,9 @@ LANGFILTER: tr -d '\r'
 # convert accented vowels
 LANGFILTER: | tr '\207\216\222\227\234\237\226' 'aeiouu\321'
 # uppercase
-LANGFILTER: | tr [a-zñ] [A-ZÑ]
+LANGFILTER: | tr [a-zÃƒÂ±] [A-ZÃƒÂ‘]
 # remove words with illegal letters
-LANGFILTER: | grep '^[[A-JL-VX-ZÑ]*$'
+LANGFILTER: | grep '^[[A-JL-VX-ZÃƒÂ‘]*$'
 # substitute pairs (can't figure out how to use octal values)
 LANGFILTER: | sed 's/CH/1/g'
 LANGFILTER: | sed 's/LL/2/g'
@@ -43,7 +44,7 @@ LANGFILTER: | sort -u -z
 D2DARGS: -r -term 0
 
 LANGINFO: <p>Spanish words include all letters in the English alphabet
-LANGINFO: except "K" and "W", and with "Ñ" added. Since there are no
+LANGINFO: except "K" and "W", and with "ÃƒÂ‘" added. Since there are no
 LANGINFO: tiles for accented vowels, these are replaced by the
 LANGINFO: unaccented forms.</p>
 
@@ -92,8 +93,7 @@ XLOC_HEADER:0x8600
 1			8	{"LL", true, true}
 2			3	'M'
 5			1	'N'
-#	/*'N~'*/
-1			8	 209
+1			8	'Ã‘'
 9			1	'O'
 2			3	'P'
 1			5	'Q'
diff --git a/xwords4/dawg/Swedish/Makefile b/xwords4/dawg/Swedish/Makefile
index d830ba744..56f363240 100644
--- a/xwords4/dawg/Swedish/Makefile
+++ b/xwords4/dawg/Swedish/Makefile
@@ -1,4 +1,4 @@
-# -*-mode: Makefile; coding: iso-8859-1; -*-
+# -*-mode: Makefile; coding: utf-8; -*-
 # Copyright 2002 by Eric House (xwords@eehouse.org).  All rights reserved.
 #
 # This program is free software; you can redistribute it and/or
@@ -17,7 +17,7 @@
 
 XWLANG=Swedish
 LANGCODE=sv_SE
-ENC = ISO-8859-1
+ENC = UTF-8
 
 # Swedish has too many chars for the old format.
 NEWDAWG=whatever
@@ -28,14 +28,14 @@ include ../Makefile.2to8
 
 include ../Makefile.langcommon
 
-SOURCEDICT ?= $(XWDICTPATH)/Swedish/swedish15.dict.gz
+SOURCEDICT ?= $(XWDICTPATH)/Swedish/swedish15.utf8.gz
 
 # Q and W are not available as tiles, but I'm told there's a custom in
 # Swedish play of allowing blanks to stand for those letters as well.
 # So we don't exclude words with those letters from the dictionary.
 $(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
-	zcat $< | tr [a-zäåæöü] [A-ZÄÅÆÖÜ] | \
-		LANG=$(LANGCODE):$(ENC) grep '^[A-ZÄÅÆÖÜ]\{2,15\}$$' | \
+	zcat $< | tr [a-zÃ¤Ã¥Ã¦Ã¶Ã¼] [A-ZÃ„Ã…Ã†Ã–Ãœ] | \
+		LANG=$(LANGCODE):$(ENC) grep '^[A-ZÃ„Ã…Ã†Ã–Ãœ]\{2,15\}$$' | \
 		gzip -c > $@
 
 # Everything but creating of the Main.dict file is inherited from the
diff --git a/xwords4/dawg/Swedish/info.txt b/xwords4/dawg/Swedish/info.txt
index 40dde73ef..7f95ee3c6 100644
--- a/xwords4/dawg/Swedish/info.txt
+++ b/xwords4/dawg/Swedish/info.txt
@@ -1,4 +1,4 @@
-# -*- mode: conf; coding: iso-8859-1; -*-
+# -*- mode: conf; coding: utf-8; -*-
 # Copyright 2002 by Eric House (xwords@eehouse.org).  All rights reserved.
 #
 # This program is free software; you can redistribute it and/or
@@ -15,16 +15,17 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
+CHARSET: utf-8
 LANGCODE:sv_SE
 
 LANGFILTER: tr -d '\r'
-LANGFILTER: | tr [a-zäåæöü] [A-ZÄÅÆÖÜ]
-LANGFILTER: | grep '^[A-ZÄÅÆÖÜ]*$'
+LANGFILTER: | tr [a-zÃ¤Ã¥Ã¦Ã¶Ã¼] [A-ZÃ„Ã…Ã†Ã–Ãœ]
+LANGFILTER: | grep '^[A-ZÃ„Ã…Ã†Ã–Ãœ]*$'
 
 D2DARGS: -r -term 10
 
 LANGINFO: <p>From an English-speaker&apos;s perspective, Swedish drops Q
-LANGINFO: and W, and adds Ä, Å, Æ, Ö and Ü.</p>
+LANGINFO: and W, and adds Ã„, Ã…, Ã†, Ã– and Ãœ.</p>
 
 # High bit means "official".  Next 7 bits are an enum where
 # Swedish==7.  Low byte is padding
@@ -36,11 +37,11 @@ XLOC_HEADER:0x8700
 2			0		{"_"}
 8			1		'A'
 # A with two dots
-2			3		'Ä'
+2			3		'Ã„'
 # A with circle
-2			4		'Å'
-# Æ tile only available for blanks
-0			1		'Æ'
+2			4		'Ã…'
+# Ã† tile only available for blanks
+0			1		'Ã†'
 2			4		'B'
 1			8		'C'
 5			1		'D'
@@ -56,7 +57,7 @@ XLOC_HEADER:0x8700
 6			1		'N'
 5			2		'O'
 # O with two dots
-2			4		'Ö'
+2			4		'Ã–'
 2			4		'P'
 # Q tile only available for blanks
 0			1		'Q'
@@ -64,13 +65,12 @@ XLOC_HEADER:0x8700
 8			1		'S'
 8			1		'T'
 3			4		'U'
-# Ü tile only available for blanks
-0			1		'Ü'
+# Ãœ tile only available for blanks
+0			1		'Ãœ'
 2			3		'V'
 # W tile only available for blanks
 0			1		'W'
 1			8		'X'
 1			7		'Y'
 1			10		'Z'
-
 <END_TILES>
diff --git a/xwords4/dawg/dawg2dict.pl b/xwords4/dawg/dawg2dict.pl
index b4565cd34..70d93343a 100755
--- a/xwords4/dawg/dawg2dict.pl
+++ b/xwords4/dawg/dawg2dict.pl
@@ -1,6 +1,6 @@
-#!/usr/bin/perl
+#!/usr/bin/perl -CS
 #
-# Copyright 2004 by Eric House (xwords@eehouse.org)
+# Copyright 2004 - 2009 by Eric House (xwords@eehouse.org)
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -22,9 +22,12 @@
 
 use strict;
 use Fcntl;
+use Encode 'from_to';
+use Encode;
 
 my $gInFile;
 my $gDoRaw = 0;
+my $gDoJSON = 0;
 my $gFileType;
 my $gNodeSize;
 
@@ -33,7 +36,7 @@ sub systell { sysseek($_[0], 0, SEEK_CUR) }
 
 sub usage() {
     print STDERR "USAGE: $0 "
-        . "[-raw] "
+        . "[-raw | -json] "
         . "-dict <xwdORpdb>"
         . "\n"
         . "\t(Takes a .pdb or .xwd and prints its words to stdout)\n";
@@ -45,6 +48,8 @@ sub parseARGV() {
     while ( my $parm = shift(@ARGV) ) {
         if ( $parm eq "-raw" ) {
             $gDoRaw = 1;
+        } elsif ( $parm eq "-json" ) {
+            $gDoJSON = 1;
         } elsif ( $parm eq "-dict" ) {
             $gInFile = shift(@ARGV);
         } else {
@@ -72,18 +77,32 @@ sub countSpecials($) {
 sub readXWDFaces($$$) {
     my ( $fh, $facRef, $nSpecials ) = @_;
 
-    my $buf;
-    my $nRead = sysread( $fh, $buf, 1 );
-    my $nChars = unpack( 'c', $buf );
+    my ( $buf, $nRead, $nChars, $nBytes );
+    $nRead = sysread( $fh, $buf, 1 );
+    $nBytes = unpack( 'c', $buf );
+    printf STDERR "nBytes of faces: %d\n", $nBytes;
+    $nRead = sysread( $fh, $buf, 1 );
+    $nChars = unpack( 'c', $buf );
+    printf STDERR "nChars of faces: %d\n", $nChars;
 
+    binmode( $fh, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
+    sysread( $fh, $buf, $nChars );
+    length($buf) == $nChars or die "didn't read expected number of bytes\n";
+    binmode( $fh ) or die "binmode failed\n";
+
+    print STDERR "string now: $buf\n";
     my @faces;
-    for ( my $i = 0; $i < $nChars; ++$i ) {
-        my $nRead = sysread( $fh, $buf, 2 );
-        push( @faces, chr(unpack( "n", $buf ) ) );
+    for ( my $ii = 0; $ii < $nChars; ++$ii ) {
+        my $chr = substr( $buf, $ii, 1 );
+        print STDERR "pushing $chr \n";
+        push( @faces, $chr );
     }
 
+    printf STDERR "at 0x%x after reading faces\n", systell($fh);
+
     ${$nSpecials} = countSpecials( \@faces );
     @{$facRef} = @faces;
+    printf STDERR "readXWDFaces=>%d\n", $nChars;
     return $nChars;
 } # readXWDFaces
 
@@ -99,6 +118,7 @@ sub skipBitmap($) {
 
         sysread( $fh, $buf, $nBytes );
     }
+    printf STDERR "skipBitmap\n";
 } # skipBitmap
 
 sub getSpecials($$$) {
@@ -138,9 +158,9 @@ sub readNodesToEnd($) {
 
 sub nodeSizeFromFlags($) {
     my ( $flags ) = @_;
-    if ( $flags == 2 ) {
+    if ( $flags == 4 ) {
         return 3;
-    } elsif ( $flags == 3 ) {
+    } elsif ( $flags == 5 ) {
         return 4;
     } else {
         die "invalid dict flags $flags";
@@ -161,6 +181,7 @@ sub mergeSpecials($$) {
 sub prepXWD($$$$) {
     my ( $fh, $facRef, $nodesRef, $startRef ) = @_;
 
+    printf STDERR "at 0x%x at start\n", systell($fh);
     my $buf;
     my $nRead = sysread( $fh, $buf, 2 );
     my $flags = unpack( "n", $buf );
@@ -170,24 +191,30 @@ sub prepXWD($$$$) {
     my $nSpecials;
     my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
 
+    printf STDERR "at 0x%x before header read\n", systell($fh);
     # skip xloc header
     $nRead = sysread( $fh, $buf, 2 );
 
     # skip values info.
+    printf STDERR "at 0x%x before reading %d values\n", systell($fh), $faceCount;
     sysread( $fh, $buf, $faceCount * 2 );
+    printf STDERR "at 0x%x after values read\n", systell($fh);
 
+    printf STDERR "at 0x%x before specials read\n", systell($fh);
     my @specials;
     getSpecials( $fh, $nSpecials, \@specials );
     mergeSpecials( $facRef, \@specials );
+    printf STDERR "at 0x%x after specials read\n", systell($fh);
 
-#    printf STDERR "at 0x%x before offset read\n", systell($fh);
+    printf STDERR "at 0x%x before offset read\n", systell($fh);
     sysread( $fh, $buf, 4 );
     $$startRef = unpack( 'N', $buf );
-#    print STDERR "startRef=$$startRef\n";
+    print STDERR "startRef=$$startRef\n";
 
     my @nodes = readNodesToEnd( $fh );
 
     @$nodesRef = @nodes;
+    print STDERR "prepXWD done\n";
 } # prepXWD
 
 sub readPDBSpecials($$$$$) {
@@ -342,10 +369,52 @@ sub printNodes($$) {
     }
 }
 
+sub printStartJson($) {
+    my ( $startIndex ) = @_;
+    printf( "  start: 0x%.8x,\n", $startIndex );
+}
+
+sub printCharsJson($) {
+    my ( $fr ) = @_;
+    print "  chars: [ ";
+    foreach my $char (@$fr) {
+        print "\"$char\", "
+    }
+    print "],\n"
+}
+
+sub printNodesJson($) {
+    my ( $nr ) = @_;
+    print "  dawg: [\n";
+
+    my $len = @$nr;
+    my $newLine = 1;
+    for ( my $ii = 0; $ii < $len; ++$ii ) {
+        my $node = $$nr[$ii];
+
+        if ( $newLine ) {
+            printf( "    /*%.6x*/ ", $ii );
+            $newLine = 0;
+        }
+
+        printf "0x%.8x, ", $node;
+
+        my ( $chrIndex, $nextEdge, $accepting, $lastEdge );
+        parseNode( $node, \$chrIndex, \$nextEdge, \$accepting, \$lastEdge );
+        if ( $lastEdge ) {
+            print "\n";
+            $newLine = 1;
+        }
+    }
+
+    print "\n  ],\n"
+}
+
 #################################################################
 # main
 #################################################################
 
+binmode( STDERR, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
 
 parseARGV();
 
@@ -364,9 +433,17 @@ if ( $gFileType eq "xwd" ){
 close INFILE;
 
 die "no nodes!!!" if 0 == @nodes;
+
 if ( $gDoRaw ) {
     printNodes( \@nodes, \@faces );
+} elsif ( $gDoJSON ) {
+    print "dict = {\n";
+    printStartJson( $startIndex );
+    printCharsJson( \@faces );
+    printNodesJson( \@nodes );
+    print "}\n";
 } else {
+    binmode( STDOUT, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
     printDAWG( [], \@nodes, $startIndex, \@faces );
 }
 
diff --git a/xwords4/dawg/dict2dawg.cpp b/xwords4/dawg/dict2dawg.cpp
index 873593397..c48f5655e 100644
--- a/xwords4/dawg/dict2dawg.cpp
+++ b/xwords4/dawg/dict2dawg.cpp
@@ -78,7 +78,7 @@ static void (*gReadWordProc)(void) = NULL;
 static NodeList gNodes;       // final array of nodes
 static unsigned int gNBytesPerOutfile = 0xFFFFFFFF;
 static char* gTableFile = NULL;
-static bool gIsMultibyte = false;
+static bool gIsMultibyte = true; // always true
 static const char* gEncoding = NULL;
 static char* gOutFileBase = NULL;
 static char* gStartNodeOut = NULL;
@@ -91,9 +91,9 @@ static const char* gLang = NULL;
 static char* gBytesPerNodeFile = NULL;        // where to write whether node
                                        // size 3 or 4
 int gWordCount = 0;
-std::map<Letter,wchar_t> gTableHash;
+std::map<wchar_t,Letter> gTableHash;
 int gBlankIndex;
-std::vector<char> gRevMap;
+std::vector<wchar_t> gRevMap;
 #ifdef DEBUG
 bool gDebug = false;
 #endif
@@ -107,17 +107,19 @@ int gLimHigh = MAX_WORD_LEN;
 
 
 // OWL is 1.7M
-#define MAX_POOL_SIZE (10 * 0x100000)
+#define MAX_POOL_SIZE (10 * 0x100000 * sizeof(wchar_t))
 #define ERROR_EXIT(...) error_exit( __LINE__, __VA_ARGS__ );
+#define VSIZE(a) (sizeof(a)/sizeof(a[0]))
 
 static char* parseARGV( int argc, char** argv, const char** inFileName );
 static void usage( const char* name );
 static void error_exit( int line, const char* fmt, ... );
 static void makeTableHash( void );
+static void printTableHash( void );
 static WordList* parseAndSort( void );
 static void printWords( WordList* strings );
 static bool firstBeforeSecond( const Letter* lhs, const Letter* rhs );
-static char* tileToAscii( char* out, int outSize, const Letter* in );
+static wchar_t* tilesToText( wchar_t* out, int outLen, const Letter* in );
 static int buildNode( int depth );
 static void TrieNodeSetIsLastSibling( Node* nodeR, bool isLastSibling );
 static int addNodes( NodeList& newedgesR );
@@ -178,6 +180,7 @@ main( int argc, char** argv )
     }
     
     makeTableHash();
+    printTableHash();
 
     // Do I need this stupid thing?  Better to move the first row to
     // the front of the array and patch everything else.  Or fix the
@@ -451,9 +454,9 @@ readFromSortedArray( void )
             }
 #ifdef DEBUG
             if ( gDebug ) {
-                char buf[T2ABUFLEN(MAX_WORD_LEN)];
-                fprintf( stderr, "%s: got word: %s\n", __func__,
-                         tileToAscii( buf, sizeof(buf), word ) );
+                wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
+                fprintf( stderr, "%s: got word: %ls\n", __func__,
+                         tilesToText( buf, VSIZE(buf), word ) );
             }
 #endif
         }
@@ -473,13 +476,13 @@ readFromSortedArray( void )
              && !firstBeforeSecond( gCurrentWord, word ) ) {
 #ifdef DEBUG
             if ( gDebug ) {
-                char buf1[T2ABUFLEN(MAX_WORD_LEN)];
-                char buf2[T2ABUFLEN(MAX_WORD_LEN)];
+                wchar_t buf1[T2ABUFLEN(MAX_WORD_LEN)];
+                wchar_t buf2[T2ABUFLEN(MAX_WORD_LEN)];
                 fprintf( stderr,
-                         "%s: words %s and %s are the same or out of order\n",
+                         "%s: words %ls and %ls are the same or out of order\n",
                          __func__, 
-                         tileToAscii( buf1, sizeof(buf1), gCurrentWord ),
-                         tileToAscii( buf2, sizeof(buf2), word ) );
+                         tilesToText( buf1, VSIZE(buf1), gCurrentWord ),
+                         tilesToText( buf2, VSIZE(buf2), word ) );
             }
 #endif
             continue;
@@ -492,9 +495,9 @@ readFromSortedArray( void )
 
 #ifdef DEBUG
     if ( gDebug ) {
-        char buf[T2ABUFLEN(MAX_WORD_LEN)];
-        fprintf( stderr, "gCurrentWord now %s\n", 
-                 tileToAscii( buf, sizeof(buf), gCurrentWord) );
+        wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
+        fprintf( stderr, "gCurrentWord now %ls\n", 
+                 tilesToText( buf, VSIZE(buf), gCurrentWord) );
     }
 #endif
 } // readFromSortedArray
@@ -516,6 +519,9 @@ getWideChar( FILE* file )
             assert( 0 == ii );
             dest = byt;
             break;
+        } else if ( byt < ' ' && 0 == ii ) {
+            dest = byt;
+            break;
         }
 
         assert( ii < 4 );
@@ -533,7 +539,7 @@ getWideChar( FILE* file )
 } // getWideChar
 
 static Letter*
-readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
+readOneWord( Letter* wordBuf, const int bufLen, int* lenp, bool* gotEOF )
 {
     Letter* result = NULL;
     int count = 0;
@@ -545,7 +551,7 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
     // return it.  If no, start over ONLY IF the terminator was not
     // EOF.
     for ( ; ; ) {
-        wchar_t byt = gIsMultibyte? getWideChar( gInFile ) : getc( gInFile );
+        wchar_t byt = getWideChar( gInFile );
 
         // EOF is special: we don't try for another word even if
         // dropWord is true; we must leave now.
@@ -557,6 +563,13 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
             if ( !dropWord && (count >= gLimLow) && (count <= gLimHigh) ) {
                 assert( count < bufLen );
                 wordBuf[count] = '\0';
+#ifdef DEBUG
+                if ( gDebug ) {
+                    wchar_t buf[T2ABUFLEN(count)];
+                    fprintf( stderr, "%s: adding word: %ls\n", 
+                             __func__, tilesToText( buf, VSIZE(buf), wordBuf ) );
+                }
+#endif
                 result = wordBuf;
                 *lenp = count;
                 ++gWordCount;
@@ -567,11 +580,12 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
             } 
 #ifdef DEBUG
             if ( gDebug ) {
-                char buf[T2ABUFLEN(count)];
+                wchar_t buf[T2ABUFLEN(count)];
                 wordBuf[count] = '\0';
-                fprintf( stderr, "%s: dropping word (len %d>=%d): %s\n", 
-                         __func__, count, gLimHigh, 
-                         tileToAscii( buf, sizeof(buf), wordBuf ) );
+                fprintf( stderr, "%s: dropping word (len %d >%d or <%d or "
+                         "dropWord:%d): %ls\n", __func__, count, gLimHigh, 
+                         gLimLow, (int)dropWord,
+                         tilesToText( buf, VSIZE(buf), wordBuf ) );
             }
 #endif
             count = 0;  // we'll start over
@@ -579,43 +593,43 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
 
         } else if ( count >= bufLen ) {
             // Just drop it...
+            assert(0);          // Fix this -- but need to warn when out of
+                                // memory!!!
             dropWord = true;
 
             // Don't call into the hashtable twice here!!
-        } else if ( gTableHash.find(byt) != gTableHash.end() ) {
-            assert( count < bufLen );
-            wordBuf[count++] = gTableHash[byt];
-            if ( count >= bufLen ) {
-                dropWord = true;
-            }
-        } else if ( gKillIfMissing || !dropWord ) {
-            char buf[T2ABUFLEN(count)];
-            wordBuf[count] = '\0';
-
-            tileToAscii( buf, sizeof(buf), wordBuf );
-
-            if ( gKillIfMissing ) {
-                ERROR_EXIT( "chr %lc (%d/0x%x) not in map file %s\n"
-                            "last word was %s\n",
-                            byt, (int)byt, (int)byt, gTableFile, buf );
-            } else if ( !dropWord ) {
-#ifdef DEBUG
-                if ( gDebug ) {
-                    fprintf( stderr, "%s: chr %c (%d) not in map file %s\n"
-                             "dropping partial word %s\n", __func__,
-                             (char)byt, (int)byt, gTableFile, buf );
+        } else {
+            std::map<wchar_t,Letter>::iterator iter = gTableHash.find(byt);
+            if ( iter != gTableHash.end() ) {
+                assert( count < bufLen );
+                wordBuf[count++] = iter->second;
+                if ( count >= bufLen ) {
+                    dropWord = true;
                 }
+            } else if ( gKillIfMissing || !dropWord ) {
+                wchar_t buf[T2ABUFLEN(count)];
+                wordBuf[count] = '\0';
+
+                tilesToText( buf, VSIZE(buf), wordBuf );
+
+                if ( gKillIfMissing ) {
+                    ERROR_EXIT( "chr %lc (%d/0x%x) not in map file %s\n"
+                                "last word was %ls\n",
+                                byt, (int)byt, (int)byt, gTableFile, buf );
+                } else if ( !dropWord ) {
+#ifdef DEBUG
+                    if ( gDebug ) {
+                        fprintf( stderr, "%s: chr %lc (%d) not in map file %s\n"
+                                 "dropping partial word %ls\n", __func__,
+                                 byt, (int)byt, gTableFile, buf );
+                    }
 #endif
-                dropWord = true;
+                    dropWord = true;
+                }
             }
         }
-    }
+    } // for
 
-//     if ( NULL != result ) {
-//         char buf[T2ABUFLEN(MAX_WORD_LEN)];
-//         fprintf( stderr, "%s returning %s\n", __func__,
-//                  tileToAscii( buf, sizeof(buf), result ) );
-//     }
     return result;
 } // readOneWord
 
@@ -635,7 +649,7 @@ readFromFile( void )
     // during the sort.  This seems easier.
     for ( ; ; ) {
         if ( !gDone ) {
-            word = readOneWord( wordBuf, sizeof(wordBuf), &len, &s_eof );
+            word = readOneWord( wordBuf, VSIZE(wordBuf), &len, &s_eof );
             gDone = NULL == word;
         }
         if ( gDone ) {
@@ -658,13 +672,13 @@ readFromFile( void )
              && !firstBeforeSecond( gCurrentWord, word ) ) {
 #ifdef DEBUG
             if ( gDebug ) {
-                char buf1[T2ABUFLEN(MAX_WORD_LEN)];
-                char buf2[T2ABUFLEN(MAX_WORD_LEN)];
+                wchar_t buf1[T2ABUFLEN(MAX_WORD_LEN)];
+                wchar_t buf2[T2ABUFLEN(MAX_WORD_LEN)];
                 fprintf( stderr,
-                         "%s: words %s and %s are the smae or out of order\n",
+                         "%s: words %ls and %ls are the smae or out of order\n",
                          __func__, 
-                         tileToAscii( buf1, sizeof(buf1), gCurrentWord ),
-                         tileToAscii( buf2, sizeof(buf2), word ) );
+                         tilesToText( buf1, VSIZE(buf1), gCurrentWord ),
+                         tilesToText( buf2, VSIZE(buf2), word ) );
             }
 #endif
             continue;
@@ -676,9 +690,9 @@ readFromFile( void )
 
 #ifdef DEBUG
     if ( gDebug ) {
-        char buf[T2ABUFLEN(MAX_WORD_LEN)];
-        fprintf( stderr, "gCurrentWord now %s\n", 
-                 tileToAscii( buf, sizeof(buf), gCurrentWord) );
+        wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
+        fprintf( stderr, "gCurrentWord now %ls\n", 
+                 tilesToText( buf, VSIZE(buf), gCurrentWord) );
     }
 #endif
 } // readFromFile
@@ -690,14 +704,15 @@ firstBeforeSecond( const Letter* lhs, const Letter* rhs )
     return gt;
 }
 
-static char*
-tileToAscii( char* out, int outSize, const Letter* in )
+static wchar_t*
+tilesToText( wchar_t* out, int outSize, const Letter* in )
 {
-    char tiles[outSize];
-    int tilesLen = 1;
-    tiles[0] = '[';
+    wchar_t tiles[outSize];
+    wchar_t* orig = out;
+    int tilesLen = 0;
+
+    tiles[tilesLen++] = L'[';
 
-    char* orig = out;
     for ( ; ; ) {
         Letter ch = *in++;
         if ( '\0' == ch ) {
@@ -705,14 +720,15 @@ tileToAscii( char* out, int outSize, const Letter* in )
         }
         assert( ch < gRevMap.size() );
         *out++ = gRevMap[ch];
-        tilesLen += sprintf( &tiles[tilesLen], "%d,", ch );
+
+        tilesLen += swprintf( &tiles[tilesLen], outSize-tilesLen, L"%d,", ch );
         assert( (out - orig) < outSize );
     }
 
     assert( tilesLen+1 < outSize );
-    tiles[tilesLen] = ']';
-    tiles[tilesLen+1] = '\0';
-    strcpy( out, tiles );
+    tiles[tilesLen] = L']';
+    tiles[tilesLen+1] = L'\0';
+    wcscpy( out, tiles );
 
     return orig;
 }
@@ -777,9 +793,9 @@ printWords( WordList* strings )
 {
     std::vector<Letter*>::iterator iter = strings->begin();
     while ( iter != strings->end() ) {
-        char buf[T2ABUFLEN(MAX_WORD_LEN)];
-        tileToAscii( buf, sizeof(buf), *iter );
-        fprintf( stderr, "%s\n", buf );
+        wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
+        tilesToText( buf, VSIZE(buf), *iter );
+        fprintf( stderr, "%ls\n", buf );
         ++iter;
     }
 }
@@ -906,18 +922,12 @@ makeTableHash( void )
     gRevMap.push_back(0);
 
     for ( ii = 0; ; ++ii ) {
-        int ch = getc(TABLEFILE);
-        if ( ch == EOF ) {
-            break;
-        }
-
-        if ( gUseUnicode ) {   // skip the first byte each time: tmp HACK!!!
-            ch = getc(TABLEFILE);
-        }
-        if ( ch == EOF ) {
+        wchar_t ch = getWideChar( TABLEFILE );
+        if ( EOF == ch ) {
             break;
         }
 
+        fprintf( stderr, "adding %lc/%x\n", ch, ch );
         gRevMap.push_back(ch);
 
         if ( ch == 0 ) {	// blank
@@ -940,6 +950,26 @@ makeTableHash( void )
     fclose( TABLEFILE );
 } // makeTableHash
 
+static void
+printTableHash( void )
+{
+    if ( gDebug ) {
+        std::vector<wchar_t>::iterator iter = gRevMap.begin();
+        int count = 0;              // 0th entry is 0
+        while ( iter != gRevMap.end() ) {
+            wchar_t ch = *iter;
+            if ( 0 != ch ) {
+                fprintf( stderr, "%s: gRevMap[%d]: %lc\n", __func__, count, ch );
+                fprintf( stderr, "%s: gTableHash[%lc]: %d\n", __func__, ch, 
+                         gTableHash[ch] );
+                assert( gTableHash[ch] == count );
+            }
+            ++iter; 
+            ++count;
+        }
+    }
+}
+
 // emitNodes. "input" is $gNodes.  From it we write up to
 // $nBytesPerOutfile to files named $outFileBase0..n, mapping the
 // letter field down to 5 bits with a hash built from $tableFile.  If
@@ -1065,6 +1095,9 @@ outputNode( Node node, int nBytes, FILE* outfile )
     unsigned int fco = TrieNodeGetFirstChildOffset(node);
     unsigned int fourthByte = 0;
 
+    assert( ((3 == nBytes) && (fco < (1<<17)))
+            || ((4 == nBytes) && (fco < (1<<24))) );
+
     if ( nBytes == 4 ) {
         fourthByte = fco >> 16;
         if ( fourthByte > 0xFF ) {
@@ -1085,7 +1118,7 @@ outputNode( Node node, int nBytes, FILE* outfile )
     //                                                  |  |  |    
     //                                accepting bit  ---+  |  |
     //                                 last edge bit ------+  |
-    //         ---- last bit (17th on next node addr)---------+
+    //         ---- last bit (17th of next node addr)---------+
 
     // The four-byte format adds a byte at the right end for
     // addressing, but removes the extra bit (5) in order to let the
@@ -1247,13 +1280,13 @@ parseARGV( int argc, char** argv, const char** inFileName )
 
     if ( !!enc ) {
         if ( !strcasecmp( enc, "UTF-8" ) ) {
-            gIsMultibyte = true;
+//             gIsMultibyte = true;
         } else if ( !strcasecmp( enc, "iso-8859-1" ) ) {
-            gIsMultibyte = false;
+//             gIsMultibyte = false;
         } else if ( !strcasecmp( enc, "iso-latin-1" ) ) {
-            gIsMultibyte = false;
+//             gIsMultibyte = false;
         } else if ( !strcasecmp( enc, "ISO-8859-2" ) ) {
-            gIsMultibyte = false;
+//             gIsMultibyte = false;
         } else {
             ERROR_EXIT( "%s: unknown encoding %s", __func__, enc );
         }
diff --git a/xwords4/dawg/dictstats.pl b/xwords4/dawg/dictstats.pl
index 567a77f16..8465ca8df 100755
--- a/xwords4/dawg/dictstats.pl
+++ b/xwords4/dawg/dictstats.pl
@@ -15,9 +15,15 @@
 use strict;
 
 my @wordSizeCounts;
-my @letterCounts;
+my %letterCounts;
 my $wordCount;
 my $letterCount;
+my $enc = "utf8";               # this could be a cmdline arg....
+
+if ( $enc ) {
+    binmode( STDOUT, ":encoding($enc)" ) ;
+    binmode( STDIN, ":encoding($enc)" ) ;
+}
 
 while (<>) {
 
@@ -27,10 +33,10 @@ while (<>) {
     ++$wordCount;
 
     foreach my $letter (split( / */ ) ) {
-        my $i = ord($letter);
+        my $ii = ord($letter);
         # special-case the bogus chars we add for "specials"
-        die "$0: this is a letter?: $i" if $i <= 32 && $i >= 4 && $i != 0; 
-        ++$letterCounts[$i];
+        die "$0: this is a letter?: $ii" if $ii <= 32 && $ii >= 4 && $ii != 0; 
+        ++$letterCounts{$letter};
         ++$letterCount;
     }
 }
@@ -54,14 +60,12 @@ for ( my $i = 1 ; $i <= 99; ++$i ) {
 print "\n\n**** Letter counts ****\n";
 print "     ASCII ORD  HEX     PCT (of $letterCount)\n";
 my $lineNo = 1;
-for ( my $i = 0; $i < 255; ++$i ) {
-    my $count = $letterCounts[$i];
-    if ( $count > 0 ) {
-        my $pct = (100.00 * $count) / $letterCount;
-        printf( "%2d: %3s   %3d  %x    %5.2f (%d)\n",
-                $lineNo, chr($i), $i, $i, $pct, $count );
-        ++$lineNo;
-    }
+foreach my $key (sort keys %letterCounts) {
+    my $count = $letterCounts{$key};
+    my $pct = (100.00 * $count) / $letterCount;
+    printf( "%2d: %3s   %3d  %x    %5.2f (%d)\n",
+            $lineNo, $key, ord($key), ord($key), $pct, $count );
+    ++$lineNo;
 }
 
 print "\n";
diff --git a/xwords4/dawg/xloc.pl b/xwords4/dawg/xloc.pl
index 20b72fcc9..23ef0ca43 100755
--- a/xwords4/dawg/xloc.pl
+++ b/xwords4/dawg/xloc.pl
@@ -23,6 +23,7 @@ use xloc;
 
 my $unicode = -1;
 my $doval = 0;
+my $dosize = 0;
 my $enc;
 my $outfile;
 
@@ -37,6 +38,8 @@ while ( $arg = $ARGV[0] ) {
         $unicode = 0;
     } elsif ( $arg eq "-v" ) {
         $doval = 1;
+    } elsif ( $arg eq "-s" ) {
+        $dosize = 1;
     } elsif ( $arg eq '-out' ) {
         $outfile = $ARGV[1];
         shift @ARGV;
@@ -52,12 +55,20 @@ die "info file $infoFile not found\n" if ! -s $infoFile;
 
 my $xlocToken = xloc::ParseTileInfo($infoFile, $enc);
 
-open OUTFILE, "> $outfile";
+if ( $enc ) {
+    open OUTFILE, ">:encoding($enc)", "$outfile" 
+        or die "couldn't open $outfile";
+} else {
+    open OUTFILE, ">$outfile" or die "couldn't open $outfile";
+}
 # For f*cking windoze linefeeds
-binmode( OUTFILE );
+# binmode( OUTFILE );
 
 if ( $unicode ne -1 ) {
     xloc::WriteMapFile( $xlocToken, $unicode, \*OUTFILE );
+} elsif ( $dosize ) {
+    my $count = xloc::GetNTiles( $xlocToken );
+    print OUTFILE pack("c", $count );
 } elsif ( $doval ) {
     xloc::WriteValuesFile( $xlocToken, \*OUTFILE );
 }
diff --git a/xwords4/dawg/xloc.pm b/xwords4/dawg/xloc.pm
index 741968e76..6e25fa138 100644
--- a/xwords4/dawg/xloc.pm
+++ b/xwords4/dawg/xloc.pm
@@ -103,13 +103,6 @@ sub GetValue($$) {
 sub WriteMapFile($$$) {
     my ( $hashR, $unicode, $fhr ) = @_;
 
-    my $packStr;
-    if ( $unicode ) {
-        $packStr = "n";
-    } else {
-        $packStr = "C";
-    }
-
     my $count = GetNTiles($hashR);
     my $specialCount = 0;
     for ( my $i = 0; $i < $count; ++$i ) {
@@ -117,11 +110,12 @@ sub WriteMapFile($$$) {
         my $str = ${$tileR}[2];
 
         if ( $str =~ /\'(.)\'/ ) {
-            print $fhr pack($packStr, ord($1) );
+            print $fhr pack( "U", ord($1) );
+#            printf STDERR "ord: %x ($1)\n", ord($1);
         } elsif ( $str =~ /\"(.+)\"/ ) {
-            print $fhr pack($packStr, $specialCount++ );
+            print $fhr pack( "c", $specialCount++ );
         } elsif ( $str =~ /(\d+)/ ) {
-            print $fhr pack( $packStr, $1 );
+            print $fhr pack( "n", $1 );
         } else {
             die "WriteMapFile: unrecognized face format $str, elem $i";
         }