xwords/xwords4/dawg/Spanish/info.txt

111 lines
3.5 KiB
Text
Raw Normal View History

# -*- mode: conf; coding: utf-8; -*-
# Copyright 2002-2006 by Eric House (xwords@eehouse.org). All rights
# reserved.
2003-12-14 19:51:44 +00:00
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# no way can unix sort handle the control chars I'm adding to text
# below
NEEDSSORT:true
CHARSET: utf-8
2003-12-14 19:51:44 +00:00
2004-07-09 04:02:48 +00:00
# MSDos LF chars go bye-bye
LANGFILTER: tr -d '\r'
2003-12-14 19:51:44 +00:00
# convert accented vowels
LANGFILTER: | tr '\207\216\222\227\234\237\226' 'aeiouu\321'
2003-12-14 19:51:44 +00:00
# uppercase
LANGFILTER: | tr [a-zñ] [A-ZÑ]
2003-12-14 19:51:44 +00:00
# remove words with illegal letters
LANGFILTER: | grep '^[[A-JL-VX-ZÑ]*$'
2003-12-14 19:51:44 +00:00
# substitute pairs (can't figure out how to use octal values)
LANGFILTER: | sed 's/CH/1/g'
LANGFILTER: | sed 's/LL/2/g'
LANGFILTER: | sed 's/RR/3/g'
2003-12-14 19:51:44 +00:00
# substitute in the octal control character values
LANGFILTER: | tr '123' '\001\002\003'
2004-05-20 02:15:20 +00:00
# now add nulls as terminators
LANGFILTER: | tr -s '\n' '\000'
LANGFILTER: | sort -u -z
2003-12-14 19:51:44 +00:00
D2DARGS: -r -term 0
2003-12-14 19:51:44 +00:00
LANGINFO: <p>Spanish words include all letters in the English alphabet
LANGINFO: except "K" and "W", and with "Ñ" added. Since there are no
2003-12-14 19:51:44 +00:00
LANGINFO: tiles for accented vowels, these are replaced by the
LANGINFO: unaccented forms.</p>
LANGINFO: <p>In addition, there are three special two-letter tiles
LANGINFO: "CH", "LL" and "RR". The rules say that the corresponding
LANGINFO: two single tiles may not be used where a two-letter tile is
LANGINFO: possible (e.g. if a word contains "CH" you must use the "CH"
LANGINFO: tile rather than a "C" tile followed by an "H" tile. Thus
2003-12-14 19:51:44 +00:00
LANGINFO: we remove all of these pairs from your wordlist and replace
LANGINFO: them with the appropriate two-letter "letter". </p>
LANGCODE:es_ES
# I think dealing with "specials" goes like this. In the {} pairs
# below, if the first string is followed by other strings (one or two)
# they are assumed to be filenames. The filenames will need to be
# found, and converted into binary files appropriate for the platform
# by rules given somewhere -- here? No, since they're the same for
# all platforms. Just put 'em in the byod.cgi file for now.
# It'll be assumed that the first name is for the "small" bitmap, and
# the second for the "large". It's ok for a file not to exist; it'll
# just be ignored. In the unlikely case that you wanted to specify
# the large but not the small this is what you'd need to do.
# High bit means "official". Next 7 bits are an enum where
# Spanish==6. Low byte is padding
XLOC_HEADER:0x8600
<BEGIN_TILES>
2 0 {"_"}
12 1 'A|a'
2 3 'B|b'
4 3 'C|c'
1 5 {"CH|ch|Ch|cH",true,true}
5 2 'D|d'
12 1 'E|e'
1 4 'F|f'
2 2 'G|g'
2 4 'H|h'
6 1 'I|i'
1 8 'J|j'
4 1 'L|l'
1 8 {"LL|ll|Ll|lL", true, true}
2 3 'M|m'
5 1 'N|n'
1 8 'Ñ|ñ'
9 1 'O|o'
2 3 'P|p'
1 5 'Q|q'
5 1 'R|r'
1 8 {"RR|rr|Rr|rR",true,true}
6 1 'S|s'
4 1 'T|t'
5 1 'U|u'
1 4 'V|v'
1 8 'X|x'
1 4 'Y|y'
1 10 'Z|z'
2003-12-14 19:51:44 +00:00
<END_TILES>
# should ignore all after the <END> above