mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2025-01-04 23:02:02 +01:00
Populate an actual wordlist for Hungarian
Add Makefile filters to create a wordlist with about 42K words derived from a github project (thanks to pointers from an informant. :-) Per him, and contrary to how Catalan does it, double-letter-tile words also appear in single-letter variants if the tiles allow.
This commit is contained in:
parent
ebc6c4629d
commit
cc4776d29d
2 changed files with 30 additions and 3 deletions
|
@ -21,7 +21,7 @@ LANGCODE=hu_HU
|
|||
ENC = UTF-8
|
||||
TARGET_TYPE ?= WINCE
|
||||
|
||||
DICTNOTE = "This wordlist contains the tile information for Hungarian but no words."
|
||||
DICTNOTE = "Derived from szotar/alap/ in https://github.com/laszlonemeth/magyarispell.git"
|
||||
|
||||
LANG_SPECIAL_INFO = \
|
||||
"CS Cs cS cs" /dev/null /dev/null \
|
||||
|
@ -34,9 +34,25 @@ LANG_SPECIAL_INFO = \
|
|||
|
||||
include ../Makefile.langcommon
|
||||
|
||||
# Empty dict
|
||||
# Filtering wordlist: When a word can be spelled with either a
|
||||
# double-letter tile or two single-letter tiles, it's in the list with
|
||||
# both spellings. That's what the longer sed expressions are doing
|
||||
# (emitting two words)
|
||||
$(XWLANG)Main.dict.gz:
|
||||
echo -n "" | gzip -c > $@
|
||||
cat hungarian_wordlist.txt \
|
||||
| tr -d '\r' \
|
||||
| tr [aábcdeéfghiíjklmnnyoóöőprtuúüűvzs] [AÁBCDEÉFGHIÍJKLMNNYOÓÖŐPRTUÚÜŰVZS] \
|
||||
| grep -v '1\|2\|3\|4\|5\|6\|7' \
|
||||
| sed -e 's,^\(.*\)CS\(.*\)$$,\11\2\n\1CS\2,g' \
|
||||
| sed -e 's,GY,2,g' \
|
||||
| sed -e 's,LY,3,g' \
|
||||
| sed -e 's,NY,4,g' \
|
||||
| sed -e 's,^\(.*\)SZ\(.*\)$$,\15\2\n\1SZ\2,g' \
|
||||
| sed -e 's,TY,6,g' \
|
||||
| sed -e 's,^\(.*\)ZS\(.*\)$$,\17\2\n\1ZS\2,g' \
|
||||
| grep '^[1-7AÁBCDEÉFGHIÍJKLMNOÓÖŐPRSTUÚÜŰVZ]*$$' \
|
||||
| tr '1234567' '\001\002\003\004\005\006\007' \
|
||||
| gzip -c > $@
|
||||
|
||||
# Everything but creating of the Main.dict file is inherited from the
|
||||
# "parent" Makefile.langcommon in the parent directory.
|
||||
|
|
11
xwords4/dawg/Hungarian/mklist.sh
Executable file
11
xwords4/dawg/Hungarian/mklist.sh
Executable file
|
@ -0,0 +1,11 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e -u
|
||||
|
||||
# from: https://github.com/laszlonemeth/magyarispell.git
|
||||
DIR=/home/eehouse/dev/git/magyarispell/szotar/alap
|
||||
|
||||
cat ${DIR}/fonev.1 ${DIR}/melleknev.1 ${DIR}/ige_alanyi.1 ${DIR}/ige_targy.1 ${DIR}/ragozatlan.2 |\
|
||||
sed -e 's/#.*$//' -e 's/\[.*$//' -e 's/ .*$//' |\
|
||||
grep -v '^$' |\
|
||||
sort -u > hungarian_wordlist.txt
|
Loading…
Reference in a new issue