use comm instead of huge grep loop to filter wordlist

This commit is contained in:
Eric House 2014-06-23 07:29:05 -07:00
parent 0749266ec3
commit b34f701b1e

View file

@ -32,11 +32,10 @@ SOURCEDICT ?= $(ENGLISH)/CSW.dict.gz
# word from CSW that's also in top5000 makes the grade
$(XWLANG)Main.dict.gz: $(SOURCEDICT) $(ENGLISH)/Top_5000.dict.gz
zcat $(ENGLISH)/Top_5000.dict.gz | tr [a-z] [A-Z] > /tmp/Top_5000.dict
rm -f $@
for WORD in $$(zcat $< | tr -d '\r' | tr [a-z] [A-Z] | grep -e "^[A-Z]\{2,15\}$$"); do \
grep -q "^$$WORD$$" /tmp/Top_5000.dict && echo $$WORD; \
done | gzip -c > $@
zcat $(ENGLISH)/Top_5000.dict.gz | tr [a-z] [A-Z] | sort > /tmp/Top_5000.dict
zcat $< | tr -d '\r' | tr [a-z] [A-Z] | grep -e "^[A-Z]\{2,15\}$$" | sort > /tmp/$$(basename $<)
rm -f $@
comm -12 /tmp/$$(basename $<) /tmp/Top_5000.dict | gzip -c > $@
# Everything but creating of the Main.dict file is inherited from the
# "parent" Makefile.langcommon in the parent directory.