# Copyright 2006 by Eric House (xwords@eehouse.org). All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. LANGCODE:pt LANGNAME:Portuguese CHARSET: utf-8 # deal with DOS files LANGFILTER: tr -d '\r' # uppercase all LANGFILTER: | sed -e 's/[[:lower:]]*/\U&/' # no words not containing a vowel LANGFILTER: | grep '[AEIOU]' # none with illegal chars LANGFILTER: | grep -x '[A-JL-VXZÇ]\{2,15\}' # Until I can figure out how to force sort to use a locale's collation # rules we can't trust sort in the filtering rules above and so must # leave the sorting work to dict2dawg.pl. D2DARGS: -r -term 10 LANGINFO:

Portuguese uses the letter A-Z, excluding K, W and Y, and adds LANGINFO: Ç. Words containing any other letters are dropped.

# High bit means "official". Next 7 bits are an enum where # Portuguese==D. Low byte is padding XLOC_HEADER:0x8D00 {"_"} 0 3 'A|a' 1 14 'B|b' 3 3 'C|c' 2 4 'Ç|ç' 3 2 'D|d' 2 5 'E|e' 1 11 'F|f' 4 2 'G|g' 4 2 'H|h' 4 2 'I|i' 1 10 'J|j' 5 2 'L|l' 2 5 'M|m' 1 6 'N|n' 3 4 'O|o' 1 10 'P|p' 2 4 'Q|q' 6 1 'R|r' 1 6 'S|s' 1 8 'T|t' 1 5 'U|u' 1 7 'V|v' 4 2 'X|x' 8 1 'Z|z' 8 1 # should ignore all after the above