mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2025-01-14 08:01:38 +01:00
use comm instead of huge grep loop to filter wordlist
This commit is contained in:
parent
0749266ec3
commit
b34f701b1e
1 changed files with 4 additions and 5 deletions
|
@ -32,11 +32,10 @@ SOURCEDICT ?= $(ENGLISH)/CSW.dict.gz
|
||||||
# word from CSW that's also in top5000 makes the grade
|
# word from CSW that's also in top5000 makes the grade
|
||||||
|
|
||||||
$(XWLANG)Main.dict.gz: $(SOURCEDICT) $(ENGLISH)/Top_5000.dict.gz
|
$(XWLANG)Main.dict.gz: $(SOURCEDICT) $(ENGLISH)/Top_5000.dict.gz
|
||||||
zcat $(ENGLISH)/Top_5000.dict.gz | tr [a-z] [A-Z] > /tmp/Top_5000.dict
|
zcat $(ENGLISH)/Top_5000.dict.gz | tr [a-z] [A-Z] | sort > /tmp/Top_5000.dict
|
||||||
rm -f $@
|
zcat $< | tr -d '\r' | tr [a-z] [A-Z] | grep -e "^[A-Z]\{2,15\}$$" | sort > /tmp/$$(basename $<)
|
||||||
for WORD in $$(zcat $< | tr -d '\r' | tr [a-z] [A-Z] | grep -e "^[A-Z]\{2,15\}$$"); do \
|
rm -f $@
|
||||||
grep -q "^$$WORD$$" /tmp/Top_5000.dict && echo $$WORD; \
|
comm -12 /tmp/$$(basename $<) /tmp/Top_5000.dict | gzip -c > $@
|
||||||
done | gzip -c > $@
|
|
||||||
|
|
||||||
# Everything but creating of the Main.dict file is inherited from the
|
# Everything but creating of the Main.dict file is inherited from the
|
||||||
# "parent" Makefile.langcommon in the parent directory.
|
# "parent" Makefile.langcommon in the parent directory.
|
||||||
|
|
Loading…
Reference in a new issue