mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2024-12-30 10:26:58 +01:00
switch to utf-8, adding an iconv call to translate the wordlists.
This commit is contained in:
parent
32fccca995
commit
18f8b0d4e4
3 changed files with 18 additions and 13 deletions
|
@ -1,4 +1,4 @@
|
||||||
# -*- mode: makefile; coding: iso-8859-1 -*-
|
# -*- mode: Makefile; coding: utf-8; -*-
|
||||||
# Copyright 2002, 2006 by Eric House (xwords@eehouse.org). All rights
|
# Copyright 2002, 2006 by Eric House (xwords@eehouse.org). All rights
|
||||||
# reserved.
|
# reserved.
|
||||||
#
|
#
|
||||||
|
@ -18,7 +18,7 @@
|
||||||
|
|
||||||
XWLANG = PortugueseBR
|
XWLANG = PortugueseBR
|
||||||
LANGCODE = pt_PT
|
LANGCODE = pt_PT
|
||||||
ENC = ISO-8859-1
|
ENC = UTF-8
|
||||||
|
|
||||||
TARGET_TYPE ?= WINCE
|
TARGET_TYPE ?= WINCE
|
||||||
|
|
||||||
|
@ -29,8 +29,10 @@ include ../Makefile.langcommon
|
||||||
SOURCEDICT ?= $(XWDICTPATH)/Portuguese/portugueseBR.txt.gz
|
SOURCEDICT ?= $(XWDICTPATH)/Portuguese/portugueseBR.txt.gz
|
||||||
|
|
||||||
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile.BrOffice
|
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile.BrOffice
|
||||||
zcat $< | tr [a-zç] [A-ZÇ] | \
|
zcat $< | \
|
||||||
LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VXZÇ]\+$$' | \
|
iconv -f iso88591 -t utf8 | \
|
||||||
|
sed 's,.,\U\0,g' | \
|
||||||
|
grep '^[ABCÇDEFGHIJLMNOPQRSTUVXZ]\+$$' | \
|
||||||
gzip -c > $@
|
gzip -c > $@
|
||||||
|
|
||||||
# Everything but creating of the Main.dict file is inherited from the
|
# Everything but creating of the Main.dict file is inherited from the
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# -*- mode: makefile; coding: iso-8859-1 -*-
|
# -*- mode: makefile; coding: utf-8 -*-
|
||||||
#
|
#
|
||||||
# Copyright 2002, 2006 by Eric House (xwords@eehouse.org). All rights
|
# Copyright 2002, 2006 by Eric House (xwords@eehouse.org). All rights
|
||||||
# reserved.
|
# reserved.
|
||||||
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
XWLANG=PortuguesePT
|
XWLANG=PortuguesePT
|
||||||
LANGCODE = pt_PT
|
LANGCODE = pt_PT
|
||||||
ENC = ISO-8859-1
|
ENC = UTF-8
|
||||||
|
|
||||||
TARGET_TYPE ?= WINCE
|
TARGET_TYPE ?= WINCE
|
||||||
|
|
||||||
|
@ -30,8 +30,10 @@ include ../Makefile.langcommon
|
||||||
SOURCEDICT ?= $(XWDICTPATH)/Portuguese/portuguese_pt.bz2
|
SOURCEDICT ?= $(XWDICTPATH)/Portuguese/portuguese_pt.bz2
|
||||||
|
|
||||||
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile.Minho
|
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile.Minho
|
||||||
bzcat $< | tr [a-zç] [A-ZÇ] | \
|
bzcat $< | \
|
||||||
LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VXZÇ]\+$$' | \
|
iconv -f iso88591 -t utf8 | \
|
||||||
|
sed 's,.,\U\0,g' | \
|
||||||
|
grep '^[ABCÇDEFGHIJLMNOPQRSTUVXZ]\+$$' | \
|
||||||
gzip -c > $@
|
gzip -c > $@
|
||||||
|
|
||||||
# Everything but creating of the Main.dict file is inherited from the
|
# Everything but creating of the Main.dict file is inherited from the
|
||||||
|
|
|
@ -15,15 +15,16 @@
|
||||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
LANGCODE:pt_PT
|
LANGCODE:pt_PT
|
||||||
|
CHARSET: utf-8
|
||||||
|
|
||||||
# deal with DOS files
|
# deal with DOS files
|
||||||
LANGFILTER: tr -d '\r'
|
LANGFILTER: tr -d '\r'
|
||||||
# uppercase all
|
# uppercase all
|
||||||
LANGFILTER: | tr [a-zç] [A-ZÇ]
|
LANGFILTER: | tr [a-zç] [A-ZÇ]
|
||||||
# no words not containing a vowel
|
# no words not containing a vowel
|
||||||
LANGFILTER: | grep '[AEIOU]'
|
LANGFILTER: | grep '[AEIOU]'
|
||||||
# none with illegal chars
|
# none with illegal chars
|
||||||
LANGFILTER: | grep '^[A-JL-VXZÇ]\+$'
|
LANGFILTER: | grep '^[A-JL-VXZÇ]\+$'
|
||||||
|
|
||||||
# Until I can figure out how to force sort to use a locale's collation
|
# Until I can figure out how to force sort to use a locale's collation
|
||||||
# rules we can't trust sort in the filtering rules above and so must
|
# rules we can't trust sort in the filtering rules above and so must
|
||||||
|
@ -31,8 +32,8 @@ LANGFILTER: | grep '^[A-JL-VXZ
|
||||||
D2DARGS: -r -term 10
|
D2DARGS: -r -term 10
|
||||||
|
|
||||||
|
|
||||||
LANGINFO: <p>Portugese uses the letter A-Z, excluding K, W and Y, and adds
|
LANGINFO: <p>Portuguese uses the letter A-Z, excluding K, W and Y, and adds
|
||||||
LANGINFO: Ç. Words containing any other letters are dropped. </p>
|
LANGINFO: Ç. Words containing any other letters are dropped. </p>
|
||||||
|
|
||||||
# High bit means "official". Next 7 bits are an enum where
|
# High bit means "official". Next 7 bits are an enum where
|
||||||
# Portuguese==D. Low byte is padding
|
# Portuguese==D. Low byte is padding
|
||||||
|
@ -44,7 +45,7 @@ XLOC_HEADER:0x8D00
|
||||||
14 1 'A'
|
14 1 'A'
|
||||||
3 3 'B'
|
3 3 'B'
|
||||||
4 2 'C'
|
4 2 'C'
|
||||||
2 3 'Ç'
|
2 3 'Ç'
|
||||||
5 2 'D'
|
5 2 'D'
|
||||||
11 1 'E'
|
11 1 'E'
|
||||||
2 4 'F'
|
2 4 'F'
|
||||||
|
|
Loading…
Reference in a new issue