mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2024-12-28 09:58:30 +01:00
switch to utf-8, adding an iconv call to translate the wordlists.
This commit is contained in:
parent
32fccca995
commit
18f8b0d4e4
3 changed files with 18 additions and 13 deletions
|
@ -1,4 +1,4 @@
|
|||
# -*- mode: makefile; coding: iso-8859-1 -*-
|
||||
# -*- mode: Makefile; coding: utf-8; -*-
|
||||
# Copyright 2002, 2006 by Eric House (xwords@eehouse.org). All rights
|
||||
# reserved.
|
||||
#
|
||||
|
@ -18,7 +18,7 @@
|
|||
|
||||
XWLANG = PortugueseBR
|
||||
LANGCODE = pt_PT
|
||||
ENC = ISO-8859-1
|
||||
ENC = UTF-8
|
||||
|
||||
TARGET_TYPE ?= WINCE
|
||||
|
||||
|
@ -29,8 +29,10 @@ include ../Makefile.langcommon
|
|||
SOURCEDICT ?= $(XWDICTPATH)/Portuguese/portugueseBR.txt.gz
|
||||
|
||||
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile.BrOffice
|
||||
zcat $< | tr [a-zç] [A-ZÇ] | \
|
||||
LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VXZÇ]\+$$' | \
|
||||
zcat $< | \
|
||||
iconv -f iso88591 -t utf8 | \
|
||||
sed 's,.,\U\0,g' | \
|
||||
grep '^[ABCÇDEFGHIJLMNOPQRSTUVXZ]\+$$' | \
|
||||
gzip -c > $@
|
||||
|
||||
# Everything but creating of the Main.dict file is inherited from the
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# -*- mode: makefile; coding: iso-8859-1 -*-
|
||||
# -*- mode: makefile; coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2002, 2006 by Eric House (xwords@eehouse.org). All rights
|
||||
# reserved.
|
||||
|
@ -19,7 +19,7 @@
|
|||
|
||||
XWLANG=PortuguesePT
|
||||
LANGCODE = pt_PT
|
||||
ENC = ISO-8859-1
|
||||
ENC = UTF-8
|
||||
|
||||
TARGET_TYPE ?= WINCE
|
||||
|
||||
|
@ -30,8 +30,10 @@ include ../Makefile.langcommon
|
|||
SOURCEDICT ?= $(XWDICTPATH)/Portuguese/portuguese_pt.bz2
|
||||
|
||||
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile.Minho
|
||||
bzcat $< | tr [a-zç] [A-ZÇ] | \
|
||||
LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VXZÇ]\+$$' | \
|
||||
bzcat $< | \
|
||||
iconv -f iso88591 -t utf8 | \
|
||||
sed 's,.,\U\0,g' | \
|
||||
grep '^[ABCÇDEFGHIJLMNOPQRSTUVXZ]\+$$' | \
|
||||
gzip -c > $@
|
||||
|
||||
# Everything but creating of the Main.dict file is inherited from the
|
||||
|
|
|
@ -15,15 +15,16 @@
|
|||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
LANGCODE:pt_PT
|
||||
CHARSET: utf-8
|
||||
|
||||
# deal with DOS files
|
||||
LANGFILTER: tr -d '\r'
|
||||
# uppercase all
|
||||
LANGFILTER: | tr [a-zç] [A-ZÇ]
|
||||
LANGFILTER: | tr [a-zç] [A-ZÇ]
|
||||
# no words not containing a vowel
|
||||
LANGFILTER: | grep '[AEIOU]'
|
||||
# none with illegal chars
|
||||
LANGFILTER: | grep '^[A-JL-VXZÇ]\+$'
|
||||
LANGFILTER: | grep '^[A-JL-VXZÇ]\+$'
|
||||
|
||||
# Until I can figure out how to force sort to use a locale's collation
|
||||
# rules we can't trust sort in the filtering rules above and so must
|
||||
|
@ -31,8 +32,8 @@ LANGFILTER: | grep '^[A-JL-VXZ
|
|||
D2DARGS: -r -term 10
|
||||
|
||||
|
||||
LANGINFO: <p>Portugese uses the letter A-Z, excluding K, W and Y, and adds
|
||||
LANGINFO: Ç. Words containing any other letters are dropped. </p>
|
||||
LANGINFO: <p>Portuguese uses the letter A-Z, excluding K, W and Y, and adds
|
||||
LANGINFO: Ç. Words containing any other letters are dropped. </p>
|
||||
|
||||
# High bit means "official". Next 7 bits are an enum where
|
||||
# Portuguese==D. Low byte is padding
|
||||
|
@ -44,7 +45,7 @@ XLOC_HEADER:0x8D00
|
|||
14 1 'A'
|
||||
3 3 'B'
|
||||
4 2 'C'
|
||||
2 3 'Ç'
|
||||
2 3 'Ç'
|
||||
5 2 'D'
|
||||
11 1 'E'
|
||||
2 4 'F'
|
||||
|
|
Loading…
Reference in a new issue