From 10792a168d8add1a70c141bf0d317a65a2e2f4d8 Mon Sep 17 00:00:00 2001 From: ehouse Date: Sun, 30 Apr 2006 02:35:26 +0000 Subject: [PATCH] sort to get rid of duplicates and so sort inside dict2dawg won't be needed --- dawg/Dutch/info.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dawg/Dutch/info.txt b/dawg/Dutch/info.txt index 1b212a4b2..27e7a2679 100644 --- a/dawg/Dutch/info.txt +++ b/dawg/Dutch/info.txt @@ -21,18 +21,19 @@ LANGCODE:nl_NL LANGFILTER_PRECLIP: tr -d '\r' | # uppercase all -LANGFILTER_POSTCLIP: | tr [a-zäöü] [A-ZÄÖÜ] +LANGFILTER_POSTCLIP: | tr [a-z] [A-Z] # no words not containing a vowel LANGFILTER_POSTCLIP: | grep '[AEIOU]' # none with illegal chars LANGFILTER_POSTCLIP: | grep '^[A-Z]\+$' +LANGFILTER_POSTCLIP: | sort -u LANGFILTER_POSTCLIP: | tr -s '\n' '\000' # Until I can figure out how to force sort to use a locale's collation # rules we can't trust sort in the filtering rules above and so must # leave the sorting work to dict2dawg.pl. -NEEDSSORT:true +NEEDSSORT:false LANGINFO:

Dutch has the same 26 letters as English, though of LANGINFO: course the counts and values are different. Filtering rules