mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2024-12-26 09:58:20 +01:00
iso-8859-1 -> utf8 for files and the dicts they build
This commit is contained in:
parent
73bd9be80a
commit
6f9ba42e21
4 changed files with 28 additions and 29 deletions
|
@ -1,4 +1,4 @@
|
|||
# -*-mode: Makefile; compile-command: "make all"; coding: iso-8859-1; -*-
|
||||
# -*-mode: Makefile; compile-command: "make all"; coding: utf-8; -*-
|
||||
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
|
@ -18,7 +18,7 @@
|
|||
XWLANG = SpanishFAA41
|
||||
LANGCODE = es_ES
|
||||
TARGET_TYPE ?= WINCE
|
||||
ENC = ISO-8859-1
|
||||
ENC = UTF-8
|
||||
|
||||
ifeq ($(TARGET_TYPE),PALM)
|
||||
PBITMS = ./bmps/palm
|
||||
|
@ -44,14 +44,13 @@ include ../Makefile.langcommon
|
|||
#$(LANG)Main.dict.gz: SpanishMain.dict.gz
|
||||
# ln -s $< $@
|
||||
|
||||
SOURCEDICT ?= $(XWDICTPATH)/Spanish/FAA_4.1.txt.gz
|
||||
SOURCEDICT ?= $(XWDICTPATH)/Spanish/FAA_4.1.utf8.gz
|
||||
|
||||
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
|
||||
zcat $< \
|
||||
| tr -d '\r' \
|
||||
| tr '\207\216\222\227\234\237\226' 'aeiouu\321' \
|
||||
| tr [a-zñ] [A-ZÑ] \
|
||||
| LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VX-ZÑ]*$$' \
|
||||
| tr [a-zñ] [A-ZÑ] \
|
||||
| LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VX-ZÑ]*$$' \
|
||||
| sed 's/CH/1/g' \
|
||||
| sed 's/LL/2/g' \
|
||||
| sed 's/RR/3/g' \
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# -*- mode: conf; coding: iso-8859-1; -*-
|
||||
# -*- mode: conf; coding: utf-8; -*-
|
||||
# Copyright 2002-2006 by Eric House (xwords@eehouse.org). All rights
|
||||
# reserved.
|
||||
#
|
||||
|
@ -20,6 +20,7 @@
|
|||
# below
|
||||
|
||||
NEEDSSORT:true
|
||||
CHARSET: utf-8
|
||||
|
||||
# MSDos LF chars go bye-bye
|
||||
LANGFILTER: tr -d '\r'
|
||||
|
@ -27,9 +28,9 @@ LANGFILTER: tr -d '\r'
|
|||
# convert accented vowels
|
||||
LANGFILTER: | tr '\207\216\222\227\234\237\226' 'aeiouu\321'
|
||||
# uppercase
|
||||
LANGFILTER: | tr [a-zñ] [A-ZÑ]
|
||||
LANGFILTER: | tr [a-zñ] [A-ZÃ]
|
||||
# remove words with illegal letters
|
||||
LANGFILTER: | grep '^[[A-JL-VX-ZÑ]*$'
|
||||
LANGFILTER: | grep '^[[A-JL-VX-ZÃ]*$'
|
||||
# substitute pairs (can't figure out how to use octal values)
|
||||
LANGFILTER: | sed 's/CH/1/g'
|
||||
LANGFILTER: | sed 's/LL/2/g'
|
||||
|
@ -43,7 +44,7 @@ LANGFILTER: | sort -u -z
|
|||
D2DARGS: -r -term 0
|
||||
|
||||
LANGINFO: <p>Spanish words include all letters in the English alphabet
|
||||
LANGINFO: except "K" and "W", and with "Ñ" added. Since there are no
|
||||
LANGINFO: except "K" and "W", and with "Ã" added. Since there are no
|
||||
LANGINFO: tiles for accented vowels, these are replaced by the
|
||||
LANGINFO: unaccented forms.</p>
|
||||
|
||||
|
@ -92,8 +93,7 @@ XLOC_HEADER:0x8600
|
|||
1 8 {"LL", true, true}
|
||||
2 3 'M'
|
||||
5 1 'N'
|
||||
# /*'N~'*/
|
||||
1 8 209
|
||||
1 8 'Ñ'
|
||||
9 1 'O'
|
||||
2 3 'P'
|
||||
1 5 'Q'
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# -*-mode: Makefile; coding: iso-8859-1; -*-
|
||||
# -*-mode: Makefile; coding: utf-8; -*-
|
||||
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
|
@ -17,7 +17,7 @@
|
|||
|
||||
XWLANG=Swedish
|
||||
LANGCODE=sv_SE
|
||||
ENC = ISO-8859-1
|
||||
ENC = UTF-8
|
||||
|
||||
# Swedish has too many chars for the old format.
|
||||
NEWDAWG=whatever
|
||||
|
@ -28,14 +28,14 @@ include ../Makefile.2to8
|
|||
|
||||
include ../Makefile.langcommon
|
||||
|
||||
SOURCEDICT ?= $(XWDICTPATH)/Swedish/swedish15.dict.gz
|
||||
SOURCEDICT ?= $(XWDICTPATH)/Swedish/swedish15.utf8.gz
|
||||
|
||||
# Q and W are not available as tiles, but I'm told there's a custom in
|
||||
# Swedish play of allowing blanks to stand for those letters as well.
|
||||
# So we don't exclude words with those letters from the dictionary.
|
||||
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
|
||||
zcat $< | tr [a-zäĺćöü] [A-ZÄĹĆÖÜ] | \
|
||||
LANG=$(LANGCODE):$(ENC) grep '^[A-ZÄĹĆÖÜ]\{2,15\}$$' | \
|
||||
zcat $< | tr [a-zäåæöü] [A-ZÄÅÆÖÜ] | \
|
||||
LANG=$(LANGCODE):$(ENC) grep '^[A-ZÄÅÆÖÜ]\{2,15\}$$' | \
|
||||
gzip -c > $@
|
||||
|
||||
# Everything but creating of the Main.dict file is inherited from the
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# -*- mode: conf; coding: iso-8859-1; -*-
|
||||
# -*- mode: conf; coding: utf-8; -*-
|
||||
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
|
@ -15,16 +15,17 @@
|
|||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
CHARSET: utf-8
|
||||
LANGCODE:sv_SE
|
||||
|
||||
LANGFILTER: tr -d '\r'
|
||||
LANGFILTER: | tr [a-zäåæöü] [A-ZÄÅÆÖÜ]
|
||||
LANGFILTER: | grep '^[A-ZÄÅÆÖÜ]*$'
|
||||
LANGFILTER: | tr [a-zäåæöü] [A-ZÄÅÆÖÜ]
|
||||
LANGFILTER: | grep '^[A-ZÄÅÆÖÜ]*$'
|
||||
|
||||
D2DARGS: -r -term 10
|
||||
|
||||
LANGINFO: <p>From an English-speaker's perspective, Swedish drops Q
|
||||
LANGINFO: and W, and adds Ä, Å, Æ, Ö and Ü.</p>
|
||||
LANGINFO: and W, and adds Ä, Å, Æ, Ö and Ü.</p>
|
||||
|
||||
# High bit means "official". Next 7 bits are an enum where
|
||||
# Swedish==7. Low byte is padding
|
||||
|
@ -36,11 +37,11 @@ XLOC_HEADER:0x8700
|
|||
2 0 {"_"}
|
||||
8 1 'A'
|
||||
# A with two dots
|
||||
2 3 'Ä'
|
||||
2 3 'Ä'
|
||||
# A with circle
|
||||
2 4 'Å'
|
||||
# Æ tile only available for blanks
|
||||
0 1 'Æ'
|
||||
2 4 'Å'
|
||||
# Æ tile only available for blanks
|
||||
0 1 'Æ'
|
||||
2 4 'B'
|
||||
1 8 'C'
|
||||
5 1 'D'
|
||||
|
@ -56,7 +57,7 @@ XLOC_HEADER:0x8700
|
|||
6 1 'N'
|
||||
5 2 'O'
|
||||
# O with two dots
|
||||
2 4 'Ö'
|
||||
2 4 'Ö'
|
||||
2 4 'P'
|
||||
# Q tile only available for blanks
|
||||
0 1 'Q'
|
||||
|
@ -64,13 +65,12 @@ XLOC_HEADER:0x8700
|
|||
8 1 'S'
|
||||
8 1 'T'
|
||||
3 4 'U'
|
||||
# Ü tile only available for blanks
|
||||
0 1 'Ü'
|
||||
# Ü tile only available for blanks
|
||||
0 1 'Ü'
|
||||
2 3 'V'
|
||||
# W tile only available for blanks
|
||||
0 1 'W'
|
||||
1 8 'X'
|
||||
1 7 'Y'
|
||||
1 10 'Z'
|
||||
|
||||
<END_TILES>
|
||||
|
|
Loading…
Reference in a new issue