mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2025-01-04 23:02:02 +01:00
tmp fix for Hungarian: remove duplicate words
Find-prefix feature in current code crashes on Hungarian because it allows duplicates (words that occur spelled with the same letters but different tile combinations.) Modify Makefile to exclude those (as it does for all other multi-letter-tile languages). And to pull the git source of the wordlist on demand.
This commit is contained in:
parent
7efd084d35
commit
fb2fcf15cc
3 changed files with 34 additions and 17 deletions
1
xwords4/dawg/Hungarian/.gitignore
vendored
Normal file
1
xwords4/dawg/Hungarian/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
magyarispell
|
|
@ -34,22 +34,49 @@ LANG_SPECIAL_INFO = \
|
|||
|
||||
include ../Makefile.langcommon
|
||||
|
||||
GIT_DIR = ./magyarispell
|
||||
SRC_COMMIT = 39ee7f3f8631b953d44ed6f12cfe8ae7193fbf13
|
||||
|
||||
SRC = \
|
||||
${GIT_DIR}/szotar/alap/fonev.1 \
|
||||
${GIT_DIR}/szotar/alap/melleknev.1 \
|
||||
${GIT_DIR}/szotar/alap/ige_alanyi.1 \
|
||||
${GIT_DIR}/szotar/alap/ige_targy.1 \
|
||||
${GIT_DIR}/szotar/alap/ragozatlan.2
|
||||
|
||||
PHONY: GIT_TREE
|
||||
|
||||
$(SRC) : GIT_TREE
|
||||
|
||||
GIT_TREE:
|
||||
if [ ! -d $(GIT_DIR) ]; then \
|
||||
mkdir -p $(GIT_DIR); \
|
||||
git clone https://github.com/laszlonemeth/magyarispell.git $(GIT_DIR); \
|
||||
fi
|
||||
(cd $(GIT_DIR) && git checkout $(SRC_COMMIT))
|
||||
|
||||
hungarian_wordlist.txt: $(SRC)
|
||||
cat $^ | \
|
||||
sed -e 's/#.*$$//' -e 's/\[.*$$//' -e 's/ .*$$//' |\
|
||||
grep -v '^$$' |\
|
||||
sort -u > $@
|
||||
|
||||
# Filtering wordlist: When a word can be spelled with either a
|
||||
# double-letter tile or two single-letter tiles, it's in the list with
|
||||
# both spellings. That's what the longer sed expressions are doing
|
||||
# (emitting two words)
|
||||
$(XWLANG)Main.dict.gz:
|
||||
cat hungarian_wordlist.txt \
|
||||
$(XWLANG)Main.dict.gz: hungarian_wordlist.txt
|
||||
cat $< \
|
||||
| tr -d '\r' \
|
||||
| tr [aábcdeéfghiíjklmnnyoóöőprtuúüűvzs] [AÁBCDEÉFGHIÍJKLMNNYOÓÖŐPRTUÚÜŰVZS] \
|
||||
| grep -v '1\|2\|3\|4\|5\|6\|7' \
|
||||
| sed -e 's,^\(.*\)CS\(.*\)$$,\11\2\n\1CS\2,g' \
|
||||
| sed -e 's,CS,1,g' \
|
||||
| sed -e 's,GY,2,g' \
|
||||
| sed -e 's,LY,3,g' \
|
||||
| sed -e 's,NY,4,g' \
|
||||
| sed -e 's,^\(.*\)SZ\(.*\)$$,\15\2\n\1SZ\2,g' \
|
||||
| sed -e 's,SZ,5,g' \
|
||||
| sed -e 's,TY,6,g' \
|
||||
| sed -e 's,^\(.*\)ZS\(.*\)$$,\17\2\n\1ZS\2,g' \
|
||||
| sed -e 's,ZS,7,g' \
|
||||
| grep '^[1-7AÁBCDEÉFGHIÍJKLMNOÓÖŐPRSTUÚÜŰVZ]*$$' \
|
||||
| tr '1234567' '\001\002\003\004\005\006\007' \
|
||||
| gzip -c > $@
|
||||
|
@ -58,4 +85,4 @@ $(XWLANG)Main.dict.gz:
|
|||
# "parent" Makefile.langcommon in the parent directory.
|
||||
|
||||
clean: clean_common
|
||||
rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb
|
||||
rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb hungarian_wordlist.txt
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e -u
|
||||
|
||||
# from: https://github.com/laszlonemeth/magyarispell.git
|
||||
DIR=/home/eehouse/dev/git/magyarispell/szotar/alap
|
||||
|
||||
cat ${DIR}/fonev.1 ${DIR}/melleknev.1 ${DIR}/ige_alanyi.1 ${DIR}/ige_targy.1 ${DIR}/ragozatlan.2 |\
|
||||
sed -e 's/#.*$//' -e 's/\[.*$//' -e 's/ .*$//' |\
|
||||
grep -v '^$' |\
|
||||
sort -u > hungarian_wordlist.txt
|
Loading…
Reference in a new issue