Cleanup to run on a machine that's utf8: specify iso-8859-1 when needed.

This commit is contained in:
ehouse 2009-01-25 18:57:05 +00:00
parent 64407edd26
commit 4661ac8227
15 changed files with 88 additions and 62 deletions

View file

@ -17,38 +17,44 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
LANGCODE:ca_ES
CHARSET: utf-8
NEEDSSORT:true
LANGINFO: <p>Catalan includes several special tiles, "L.L", "NY" and
LANGINFO: <p>Catalan includes several special tiles, "L·L", "NY" and
LANGINFO: "QU" in addition to Ç. There are no "Y" or "Q" tiles,
LANGINFO: and all words containing either of these letters not in
LANGINFO: combination with a "N" or "U" will be excluded from the
LANGINFO: dictionary. </p>
LANGINFO: <p>"L" is legal by itself, as are words in which two "L"s
LANGINFO: appear side-by-side. If you want your dictionary to include
LANGINFO: the "L.L" tile you will need to make sure that the exact
LANGINFO: string "L.L" (or "l.l") appears in the wordlist you
LANGINFO: upload.</p>
LANGINFO: appear side-by-side. The "L·L" tile is used whenever any of
LANGINFO: these three strings appears in the wordlist you upload:
LANGINFO: "L-L", "L.L" or "L·L". (And of course "l-l", "l.l" or
LANGINFO: "l·l".)</p>
LANGINFO: <p>In addition to the special multi-letter tiles discussed
LANGINFO: above, the following letters are allowed: A-J, L-V, X, Z and
LANGINFO: Ç. Lowercase letters will be converted to uppercase, then
LANGINFO: words containing letters not listed here will be excluded.</p>
LANGINFO: <p>The file you upload should be encoded in UTF-8.</p>
LANGFILTER_PRECLIP: tr 'ça-z' 'ÇA-Z' |
LANGFILTER_PRECLIP: grep -v 'Q[^U]' |
LANGFILTER_PRECLIP: grep -v '[^N]Y' |
LANGFILTER_PRECLIP: grep -v '^Y' |
LANGFILTER_PRECLIP: grep '^[ÇA-JL-VXYZ\.]*$' |
LANGFILTER_PRECLIP: sed -e 's/L\.L/1/g' -e 's/NY/2/g' -e 's/QU/3/g' |
# MSDos LF chars go bye-bye
LANGFILTER: tr -d '\r'
LANGFILTER_POSTCLIP: | tr -d '\r'
LANGFILTER_POSTCLIP: | sort -u
LANGFILTER_POSTCLIP: | tr -s '\n' '\000'
LANGFILTER: | tr 'a-zç' 'A-ZÇ'
LANGFILTER: | sed -e 's/L·L/1/g' -e 's/L\.L/1/g' -e 's/L-L/1/g'
LANGFILTER: | sed -e 's/NY/2/g' -e 's/QU/3/g'
LANGFILTER: | grep '^[Ç1-3A-JL-VXZ\.]*$'
#LANGFILTER_PRECLIP: sed 's/NY/2/g' |
#LANGFILTER_PRECLIP: sed 's/QU/3/g' |
# substitute in the octal control character values
LANGFILTER: | tr '123' '\001\002\003'
LANGFILTER: | tr -s '\n' '\000'
D2DARGS: -r -term 0 -enc UTF-8
LANGFILTER_POSTCLIP: | tr '123' '\001\002\003'
# High bit means "official". Next 7 bits are an enum where
# Catalan==c. Low byte is padding

View file

@ -1,4 +1,4 @@
# -*- mode: makefile -*-
# -*- mode: Makefile; coding: iso-8859-1; -*-
# Copyright 2002-2005 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or
@ -15,8 +15,9 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG=Danish
LANGCODE=da_DK
XWLANG = Danish
LANGCODE = da_DK
ENC = ISO-8859-1
TARGET_TYPE ?= PALM
@ -24,12 +25,14 @@ include ../Makefile.2to8
include ../Makefile.langcommon
SOURCEDICT ?= $(XWDICTPATH)/$(XWLANG)/LarsDanish.dict.gz
SOURCEDICT ?= $(XWDICTPATH)/Danish/LarsDanish.dict.gz
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
zcat $< | tr -d '\r' | tr [a-zĺćř] [A-ZĹĆŘ] | \
grep '[AEIOUĹĆŘ]' | \
grep '^[A-PR-VX-ZĹĆŘ]\+$$' | sort -u | \
zcat $< | tr -d '\r' | \
LANG=$(LANGCODE):$(ENC) tr [a-zåæø] [A-ZÅÆØ] | \
LANG=$(LANGCODE):$(ENC) grep '[AEIOUÅÆØ]' | \
LANG=$(LANGCODE):$(ENC) grep '^[A-PR-VX-ZÅÆØ]\+$$' | \
LANG=$(LANGCODE):$(ENC) sort -u | \
gzip -c > $@
# Everything but creating of the Main.dict file is inherited from the

View file

@ -1,5 +1,5 @@
# -*- mode: makefile -*-
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
# -*- mode: makefile; coding: iso-8859-1 -*-
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
@ -17,18 +17,25 @@
XWLANG=Dutch
LANGCODE=nl_NL
ENC = ISO-8859-1
TARGET_TYPE ?= PALM
TARGET_TYPE ?= WINCE
include ../Makefile.2to8
include ../Makefile.langcommon
SOURCEDICT ?= $(XWDICTPATH)/$(XWLANG)/Dutch__unofficial_alphabetical.dict.gz
SOURCEDICT ?= $(XWDICTPATH)/Dutch/Dutch__unofficial_alphabetical.dict.gz
# This is weird. We're keeping umlaut letters even though they're not
# on tiles. Do they get translated to non-umlaut equivalents or are
# they allowed to drop this way. Need to confirm the tile set and conversion.
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
zcat $< | tr -d '\r' | tr [a-zäöü] [A-ZÄÖÜ] | \
grep '^[A-Z]\+$$' | sort -u | \
zcat $< | tr -d '\r' | \
tr [a-zäöü] [A-ZÄÖÜ] | \
LANG=$(LANGCODE):$(ENC) grep '^[A-Z]\+$$' | \
LANG=$(LANGCODE):$(ENC) sort -u | \
gzip -c > $@
# Everything but creating of the Main.dict file is inherited from the

View file

@ -1,4 +1,4 @@
# -*- mode:conf; -*-
# -*- mode:conf; coding: iso-8859-1; -*-
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or

View file

@ -1,3 +1,4 @@
# -*- mode: conf; -*-
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or

View file

@ -24,7 +24,7 @@ include ../Makefile.2to8
include ../Makefile.langcommon
$(XWLANG)Main.dict.gz: $(XWDICTPATH)/$(XWLANG)/ods4c.txt.gz
$(XWLANG)Main.dict.gz: $(XWDICTPATH)/French/ods4c.txt.gz
zcat $< | tr a-z A-Z | tr -d '\r' | gzip >$@
# Everything but creating of the Main.dict file is inherited from the

View file

@ -1,4 +1,4 @@
# -*- mode: makefile -*-
# -*- mode: makefile; coding: iso-8859-1; -*-
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or
@ -15,21 +15,23 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG=German
LANGCODE=de_DE
XWLANG = German
LANGCODE = de_DE
ENC = ISO-8859-1
TARGET_TYPE ?= PALM
TARGET_TYPE ?= WINCE
include ../Makefile.2to8
include ../Makefile.langcommon
SOURCEDICT ?= $(XWDICTPATH)/$(XWLANG)/HansGerman.dict.gz
SOURCEDICT ?= $(XWDICTPATH)/German/HansGerman.dict.gz
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
zcat $< | tr [a-zäöü] [A-ZÄÖÜ] | \
sed -e 's/ß/SS/g' | \
grep '[AEIOUÄÖÜ]' | grep '^[A-ZÄÖÜ]\+$$' | \
LANG=$(LANGCODE):$(ENC) sed -e 's/ß/SS/g' | \
LANG=$(LANGCODE):$(ENC) grep '[AEIOUÄÖÜ]' | \
LANG=$(LANGCODE):$(ENC) grep '^[A-ZÄÖÜ]\+$$' | \
gzip -c > $@
# Everything but creating of the Main.dict file is inherited from the

View file

@ -24,7 +24,7 @@ include ../Makefile.2to8
include ../Makefile.langcommon
$(XWLANG)Main.dict.gz: $(XWDICTPATH)/$(XWLANG)/ITALIANO.txt.gz
$(XWLANG)Main.dict.gz: $(XWDICTPATH)/Italian/ITALIANO.txt.gz
zcat $< | tr a-z A-Z | grep '^[A-IL-VZ]*$$' | gzip >$@
# Everything but creating of the Main.dict file is inherited from the

View file

@ -1,3 +1,4 @@
# -*- mode: conf; -*-
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or

View file

@ -1,4 +1,4 @@
# -*- mode: makefile -*-
# -*- mode: makefile; coding: iso-8859-1 -*-
# Copyright 2002, 2006 by Eric House (xwords@eehouse.org). All rights
# reserved.
#
@ -16,10 +16,11 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
LANG=PortugueseBR
LANGCODE=pt_PT
XWLANG = PortugueseBR
LANGCODE = pt_PT
ENC = ISO-8859-1
TARGET_TYPE ?= PALM
TARGET_TYPE ?= WINCE
include ../Makefile.2to8
@ -27,9 +28,9 @@ include ../Makefile.langcommon
SOURCEDICT ?= $(XWDICTPATH)/Portuguese/portugueseBR.txt.gz
$(LANG)Main.dict.gz: $(SOURCEDICT) Makefile.BrOffice
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile.BrOffice
zcat $< | tr [a-zç] [A-ZÇ] | \
grep '^[A-JL-VXZÇ]\+$$' | \
LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VXZÇ]\+$$' | \
gzip -c > $@
# Everything but creating of the Main.dict file is inherited from the

View file

@ -1,4 +1,5 @@
# -*- mode: makefile -*-
# -*- mode: makefile; coding: iso-8859-1 -*-
#
# Copyright 2002, 2006 by Eric House (xwords@eehouse.org). All rights
# reserved.
#
@ -17,9 +18,10 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG=PortuguesePT
LANGCODE=pt_PT
LANGCODE = pt_PT
ENC = ISO-8859-1
TARGET_TYPE ?= PALM
TARGET_TYPE ?= WINCE
include ../Makefile.2to8
@ -29,7 +31,7 @@ SOURCEDICT ?= $(XWDICTPATH)/Portuguese/portuguese_pt.bz2
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile.Minho
bzcat $< | tr [a-zç] [A-ZÇ] | \
grep '^[A-JL-VXZÇ]\+$$' | \
LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VXZÇ]\+$$' | \
gzip -c > $@
# Everything but creating of the Main.dict file is inherited from the

View file

@ -1,4 +1,4 @@
# -*-mode: Makefile; compile-command: "make all"; -*-
# -*-mode: Makefile; compile-command: "make all"; coding: iso-8859-1; -*-
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or
@ -15,9 +15,10 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG=SpanishFAA41
LANGCODE=es_ES
TARGET_TYPE ?= PALM
XWLANG = SpanishFAA41
LANGCODE = es_ES
TARGET_TYPE ?= WINCE
ENC = ISO-8859-1
ifeq ($(TARGET_TYPE),PALM)
PBITMS = ./bmps/palm
@ -49,8 +50,8 @@ $(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
zcat $< \
| tr -d '\r' \
| tr '\207\216\222\227\234\237\226' 'aeiouu\321' \
| tr [a-zñ] [A-ZÑ] \
| grep '^[[A-JL-VX-ZÑ]*$$' \
| tr [a-zñ] [A-ZÑ] \
| LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VX-ZÑ]*$$' \
| sed 's/CH/1/g' \
| sed 's/LL/2/g' \
| sed 's/RR/3/g' \

View file

@ -1,4 +1,4 @@
# -*- mode: conf; -*-
# -*- mode: conf; coding: iso-8859-1; -*-
# Copyright 2002-2006 by Eric House (xwords@eehouse.org). All rights
# reserved.
#

View file

@ -1,4 +1,4 @@
# -*-mode: Makefile -*-
# -*-mode: Makefile; coding: iso-8859-1; -*-
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or
@ -17,24 +17,25 @@
XWLANG=Swedish
LANGCODE=sv_SE
ENC = ISO-8859-1
# Swedish has too many chars for the old format.
NEWDAWG=whatever
TARGET_TYPE ?= FRANK
TARGET_TYPE ?= WINCE
include ../Makefile.2to8
include ../Makefile.langcommon
SOURCEDICT ?= $(XWDICTPATH)/$(XWLANG)/swedish15.dict.gz
SOURCEDICT ?= $(XWDICTPATH)/Swedish/swedish15.dict.gz
# Q and W are not available as tiles, but I'm told there's a custom in
# Swedish play of allowing blanks to stand for those letters as well.
# So we don't exclude words with those letters from the dictionary.
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
zcat $< | tr [a-zäåæöü] [A-ZÄÅÆÖÜ] | \
grep '^[A-ZÄÅÆÖÜ]\{2,15\}$$' | \
LANG=$(LANGCODE):$(ENC) grep '^[A-ZÄÅÆÖÜ]\{2,15\}$$' | \
gzip -c > $@
# Everything but creating of the Main.dict file is inherited from the

View file

@ -1,3 +1,4 @@
# -*- mode: conf; coding: iso-8859-1; -*-
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
#
# This program is free software; you can redistribute it and/or
@ -22,7 +23,7 @@ LANGFILTER: | grep '^[A-Z
D2DARGS: -r -term 10
LANGINFO: <p>From an English-speaker's perspective, Swedish drops Q
LANGINFO: <p>From an English-speaker&apos;s perspective, Swedish drops Q
LANGINFO: and W, and adds Ä, Å, Æ, Ö and Ü.</p>
# High bit means "official". Next 7 bits are an enum where