diff --git a/xwords4/dawg/Russian/Makefile b/xwords4/dawg/Russian/Makefile index d982c89c8..585a59707 100644 --- a/xwords4/dawg/Russian/Makefile +++ b/xwords4/dawg/Russian/Makefile @@ -1,4 +1,4 @@ -# -*- mode: makefile -*- +# -*- coding: utf-8; -*- # Copyright 2002-2007 by Eric House (xwords@eehouse.org). All rights reserved. # # This program is free software; you can redistribute it and/or @@ -15,21 +15,23 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -XWLANG=Russian -LANGCODE=ru_RU +XWLANG = Russian +LANGCODE = ru_RU +ENC = UTF-8 DICT2DAWGARGS = -r TARGET_TYPE ?= WINCE include ../Makefile.langcommon -SOURCEDICT ?= $(XWDICTPATH)/$(XWLANG)/RU5000.txt.gz +SOURCEDICT ?= $(XWDICTPATH)/Russian/RU5000.txt.gz $(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile zcat $< | tr -d '\r' | \ - tr [] [] | \ - gzip -c > $@ - + iconv -f ISO_8859-2 -t utf8 | \ + sed 's,.,\U\0,g' | \ + grep '^[ŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢß]*$$' | \ + gzip -c > $@ # Everything but creating of the Main.dict file is inherited from the # "parent" Makefile.langcommon in the parent directory. diff --git a/xwords4/dawg/Russian/info.txt b/xwords4/dawg/Russian/info.txt index 912f508f4..b00af9d43 100644 --- a/xwords4/dawg/Russian/info.txt +++ b/xwords4/dawg/Russian/info.txt @@ -1,3 +1,4 @@ +# -*- mode: conf; coding: utf-8; -*- # Copyright 2002,2007 by Eric House (xwords@eehouse.org). All rights # reserved. # @@ -21,56 +22,54 @@ CHARSET:windows-1251 # deal with DOS files LANGFILTER: tr -d '\r' # uppercase all -LANGFILTER: | tr [] [] +LANGFILTER: | tr [ŕáâăäĺćçčéęëěíîďđńňóôőö÷řůÚűüýţ˙] [ŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢß] # LANGFILTER: | tr -s '\n' '\000' # note: don't turn off sorting! Can't do it with GNU 'sort' without # setting LANG D2DARGS: -r -term 10 -LANGINFO:
Russian wordlists must be in the Windows-1251 -LANGINFO: codepage. Lower-case letters are converted to upper case and -LANGINFO: any words that contain letters not listed below are -LANGINFO: removed.
+LANGINFO:Russian wordlists must be in utf-8: codepage. Lower-case +LANGINFO: letters are converted to upper case and any words that +LANGINFO: contain letters not listed below are removed.
# High bit means "official". Next 7 bits are an enum where # Russian==0x0F. Low byte is padding. XLOC_HEADER:0x8F00 -