From ec464cf57f52047f7e39333408b0d10ca76588d1 Mon Sep 17 00:00:00 2001 From: Eric House Date: Sun, 14 Nov 2010 13:43:16 -0800 Subject: [PATCH] switch german to utf-8 --- dawg/German/Makefile | 17 +++++++++-------- dawg/German/info.txt | 24 ++++++++++++------------ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/dawg/German/Makefile b/dawg/German/Makefile index 62ec0be1e..b5e453d5a 100644 --- a/dawg/German/Makefile +++ b/dawg/German/Makefile @@ -1,5 +1,6 @@ -# -*- mode: makefile; coding: iso-8859-1; -*- -# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved. +# -*- mode: makefile; coding: utf-8; -*- +# Copyright 2002 - 2010 by Eric House (xwords@eehouse.org). All +# rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -17,7 +18,7 @@ XWLANG = German LANGCODE = de_DE -ENC = ISO-8859-1 +ENC = UTF-8 TARGET_TYPE ?= WINCE @@ -28,11 +29,11 @@ include ../Makefile.langcommon SOURCEDICT ?= $(XWDICTPATH)/German/HansGerman.dict.gz $(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile - zcat $< | tr [a-zäöü] [A-ZÄÖÜ] | \ - LANG=$(LANGCODE):$(ENC) sed -e 's/ß/SS/g' | \ - LANG=$(LANGCODE):$(ENC) grep '[AEIOUÄÖÜ]' | \ - LANG=$(LANGCODE):$(ENC) grep '^[A-ZÄÖÜ]\+$$' | \ - gzip -c > $@ + zcat $< \ + | tr [a-zäöü] [A-ZÄÖÜ] \ + | sed -e 's/ß/SS/g' \ + | grep '^[A-ZÄÖÜ]*$$' \ + | gzip -c > $@ # Everything but creating of the Main.dict file is inherited from the # "parent" Makefile.langcommon in the parent directory. diff --git a/dawg/German/info.txt b/dawg/German/info.txt index f6321981d..7c3ee588f 100644 --- a/dawg/German/info.txt +++ b/dawg/German/info.txt @@ -1,4 +1,6 @@ -# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved. +# -*- mode: conf; coding: utf-8; -*- +# Copyright 2002 - 2010 by Eric House (xwords@eehouse.org). All +# rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -15,17 +17,18 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. LANGCODE:de_DE +CHARSET: utf-8 # deal with DOS files LANGFILTER: tr -d '\r' # substitute for sharfes-s -LANGFILTER: | sed -e 's/ß/SS/g' +LANGFILTER: | sed -e 's/ß/SS/g' # uppercase all -LANGFILTER: | tr [a-zäöü] [A-ZÄÖÜ] +LANGFILTER: | tr [a-zäöü] [A-ZÄÖÜ] # no words not containing a vowel -LANGFILTER: | grep '[AEIOUÄÖÜ]' +LANGFILTER: | grep '[AEIOUÄÖÜ]' # none with illegal chars -LANGFILTER: | grep '^[A-ZÄÖÜ]\+$' +LANGFILTER: | grep '^[A-ZÄÖÜ]\+$' # Until I can figure out how to force sort to use a locale's collation # rules we can't trust sort in the filtering rules above and so must @@ -46,9 +49,8 @@ XLOC_HEADER:0x8300 2 0 {"_"} -5 1 'A' -# A mit umlaut -1 6 196 +5 1 'A' +1 6 'Ä' 2 3 'B' 2 4 'C' 4 1 'D' @@ -63,16 +65,14 @@ XLOC_HEADER:0x8300 4 3 'M' 9 1 'N' 3 2 'O' -# O mit umlaut -1 8 214 +1 8 'Ö' 1 4 'P' 1 10 'Q' 6 1 'R' 7 1 'S' 6 1 'T' 6 1 'U' -# U mit umlaut -1 6 220 +1 6 'Ãœ' 1 6 'V' 1 3 'W' 1 8 'X'