Fix to work with BYOD: pass -r rather than use grep to pull illegal words; fix language code; include charset.

2025-01-30 08:34:16 +01:00 · 2008-02-23 21:59:38 +00:00 · 2008-02-23 21:59:38 +00:00 · 0ab471e8c3
commit 0ab471e8c3
parent 073aa6716b
1 changed files with 9 additions and 4 deletions
--- a/dawg/Czech/info.txt
+++ b/dawg/Czech/info.txt
@ -15,18 +15,23 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

-LANGCODE:cs_CS
+LANGCODE:cs_CZ
+CHARSET:windows-1250

 # deal with DOS files
 LANGFILTER: tr -d '\r'
+# tr seems to work on systems that don't know the Czech locale, but
+# grep does not.  So don't use grep, e.g. to eliminate words
+# containing letters not in our alphabet.  Instead, pass the -r flag
+# via D2DARGS so they're dropped.
 LANGFILTER: | tr [aábcèdïeéìfghiíjklmnòoóprøsšt<C5A1>uúùvxyýzž] [AÁBCÈDÏEÉÌFGHIÍJKLMNÒOÓPRØSŠT<C5A0>UÚÙVXYÝZŽ]
-LANGFILTER: | grep '^[AÁBCÈDÏEÉÌFGHIÍJKLMNÒOÓPRØSŠT<C5A0>UÚÙVXYÝZŽ]*$'
 LANGFILTER: | sort -u

 # presence of high-ascii means we must not pass -nosort
-D2DARGS: -term 10
+D2DARGS: -term 10 -r

-LANGINFO: <p>Czech blah blah blah.</p>
+LANGINFO: <p>Czech has been tested so far with windows-1250 text
+LANGINFO: producing dictionaries that run on PalmOS.</p>

 # High bit means "official".  Next 7 bits are an enum where
 # Czech==0x10.  Low byte is padding.