xwords/xwords4/dawg/Makefile.langcommon
Eric House b8f359c3e5 add filtering to wordlist browser
Add a basic regular expression engine to the dictiter, and to the UI add
the ability to filter for "starts with", "contains" and "ends with",
which translate into ANDed RE_*, _*RE_* and _*RE, respectively (with
_ standing for blank/wildcard). The engine's tightly integrated with the
next/prevWord() functions for greatest possible speed, but unless
there's no pattern does slow things down a bit (especially when "ENDS
WITH" is used.) The full engine is not exposed (users can't provide raw
REs), and while the parser will accept nesting (e.g. ([AB]_*[CD]){2,5}
to mean words from 2-5 tiles long starting with A or B and ending with C
or D) the engine can't handle it. Which is why filtering for word length
is handled separately from REs (but also tightly integrated.)

Users can enter strings that don't map to tiles. They now get an
error. It made sense for the error alert to have a "Show tiles"
button, so there's now a dialog listing all the tiles in a wordlist,
something the browser has needed all along.
2020-08-05 09:47:44 -07:00

328 lines
9.9 KiB
Makefile

# -*-mode: Makefile -*-
# Copyright 2000-2009 by Eric House (xwords@eehouse.org)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# This breaks generating byod files on otherwise-unconfigured
# machines. Move it perhaps?
# ifndef XWDICTPATH
# $(error XWDICTPATH is not set)
# endif
ifneq ($(DIRTY_LIST),)
BOWDLERIZER = ../remove-dirty.py $(DIRTY_LIST)
XWLANG := $(XWLANG)_BOWD
DICTNOTE := "$(DICTNOTE) (Bowdlerized)"
else
BOWDLERIZER = cat
endif
XWLANG := $(XWLANG)_
FRANK_EXT = xwd
# This is now on for all dicts and languages. dict2dawg.pl can't
# produce the old form any longer.
NEWDAWG=foo
# this will make all dicts the new, larger type
#FORCE_4 = -force4
PALM_DICT_TYPE = DAWG
PAR = ../par.pl
LANGUAGE = $(shell basename $$(pwd))
ifdef ENC
ENCP = -enc $(ENC)
endif
# prefer the compiled version if available. But don't compile it
# automatically until it's a bit better tested.
# DICT2DAWG = $(if $(shell test -x ../dict2dawg && echo foo),\
# ../dict2dawg,../dict2dawg.pl)
#
# No. The perl version no longer works. Don't use without fixing.
DICT2DAWG = ../dict2dawg
#all: target_all
# let languages set this first, but we always add blank to it.
BLANK_INFO = "_" /dev/null /dev/null
# Supply a default so don't have to type so much; feel free to change
TARGET_TYPE ?= FRANK
ifdef NEWDAWG
TABLE_ARG = -mn
else
TABLE_ARG = -m
endif
ifeq ($(TARGET_TYPE),WINCE)
WINCE_ONLY = true
endif
.PHONY: clean_common help allbins checkARCH
##############################################################################
# PalmOS rules
##############################################################################
ifeq ($(TARGET_TYPE),PALM)
ifdef NEWDAWG
PDBTYPE = XwrD
else
PDBTYPE = Xwr3
endif
all: $(XWLANG)2to8.pdb $(XWLANG)2to9.pdb $(XWLANG)2to15.pdb
empty: $(XWLANG)0to0.pdb
# Those languages that have bitmap files for custom glyphs will need to
# define BMPBINFILES and perhaps provide a rule for building the files
binfiles.stamp: $(BMPBINFILES)
touch binfiles.stamp
palmspecials.bin: ../palm_mkspecials.pl $(BMPFILES)
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
# can't just use values.bin because the specials bitmap info is
# platform-specific
palmvalues.bin: values.bin palmspecials.bin
cat $^ > $@
# values.bin: palmspecials.bin ../xloc binfiles.stamp
# cd ../ && $(MAKE) xloc
# binfileparms=""; \
# if [ "$(BMPBINFILES)" != "" ]; then \
# for f in $(BMPBINFILES)""; \
# do binfileparms="$$binfileparms -i $$f"; \
# done; \
# fi; \
# ../xloc -l $(LANGCODE) $$binfileparms -T $@
# cat palmspecials.bin >> $@
# header (first record) is node count (long) and 4 chars:
# unsigned char firstEdgeRecNum;
# unsigned char charTableRecNum;
# unsigned char valTableRecNum;
# unsigned char reserved[3]; // worst case this points to a new resource
# include "flags" as used on the other platforms
palmheader%.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_flags.bin
rm -f $@
touch $@
cat $< >> $@
perl -e "print pack(\"C\",3)" >> $@ # first edge
perl -e "print pack(\"C\",1)" >> $@ # char table rec number
perl -e "print pack(\"C\",2)" >> $@ # valTable rec number
perl -e "print pack(\"CCC\",0)" >> $@ # reserved 3 bytes
ifdef NEWDAWG
cat $(XWLANG)$*_flags.bin >> $@
else
perl -e "print pack(\"CC\",0)" >> $@ # c code added two more...
endif
# This works, but leaves out the header info that the current version
# has. I'm not sure anybody cares, though...
$(XWLANG)%.pdb: dawg$(XWLANG)%.stamp table.bin palmvalues.bin palmheader%.bin
$(PAR) c -a backup $@ \
$(basename $(@F)) $(PALM_DICT_TYPE) $(PDBTYPE) \
palmheader$*.bin table.bin palmvalues.bin dawg$(XWLANG)$*_*.bin
# the files to export for byod
byodbins: table.bin values.bin palmvalues.bin info.txt
#endif # TARGET_TYPE==PALM
##############################################################################
# Franklin ebook rules
##############################################################################
else
ifeq ($(TARGET_TYPE),FRANK)
# If we're on a system that can build for Franklin, assume that's what
# we want to build (and the .xwd.saved [<-bug] file for other non-palm
# platforms is a by-product). But if the EBM tools aren't there, just
# build the .xwd file.
ifeq (x$(shell echo -n $$EBOOKMAN_SDK)x,xx)
all: $(XWLANG)2to8.xwd $(XWLANG)2to9.xwd $(XWLANG)2to15.xwd
empty: $(XWLANG)0to0.xwd
else
all: checkARCH $(XWLANG)2to8.seb
empty: $(XWLANG)0to0.seb
include ${EBOOKMAN_SDK}/ebsdk.uses
endif
checkARCH:
if [[ $$ARCH == "" ]]; then \
$(error "ARCH must be defined in ENV if TARGET_TYPE==FRANK"); \
fi
$(XWLANG)%.seb: $(XWLANG)%.$(FRANK_EXT) $(XWLANG)%.atts
${ESDK_CREATESEB_EXE} $<
cp $< $<.saved
$(XWLANG)%.atts: #recreate it each time based on params
echo '_PUB|global+read-only|"Eric_House"' >> $@
echo "_NAME|global+read-only|\"$(XWLANG)2to8\"" >> $@
echo "_EXT|global+read-only|\"$(FRANK_EXT)\"" >> $@
echo '_LCAT|nosign+global|"CONTENT"' >> $@
echo '_PERM|global+read-only|"r"' >> $@
# the files to export for byod
byodbins: table.bin charcount.bin values.bin frankspecials.bin info.txt
else
ifeq ($(TARGET_TYPE),WINCE)
### WINCE section here ###
all: $(XWLANG)2to8.xwd $(XWLANG)2to9.xwd $(XWLANG)2to15.xwd
../mkxwdcab.pl -f $<
empty: $(XWLANG)0to0.xwd
byodbins: table.bin charcount.bin values.bin frankspecials.bin info.txt
else
(Need to define TARGET_TYPE if get error pointing to this line)
endif #ifeq ($(TARGET_TYPE),FRANK)
endif
endif
ifeq (s$(TARGET_TYPE),s)
echo "It\'s an error not to specify a TARGET_TYPE"
endif
##############################################################################
# shared rules
##############################################################################
# For each entry in the table whose face < 32, there needs to be a pair of
# pbitm files and a string giving the printing form
frankspecials.bin: ../frank_mkspecials.py $(BMPFILES)
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin charcount.bin table.bin values.bin frankspecials.bin
cat $(XWLANG)$*_flags.bin $(XWLANG)$*_newheader.bin charcount.bin table.bin values.bin \
frankspecials.bin $(XWLANG)StartLoc.bin \
$$(ls dawg$(XWLANG)$*_*.bin) > $@
cp $@ saveme.bin
# For some reason I can't fathom dawg$(XWLANG)% gets nuked every time
# the top-level rule fires (all: for whatever TARGET_TYPE.) It
# happens after the rule finishes....
# 16 bits worth of flags for the start of the eventual file. At this
# point, the flags mean this:
# 1: old-style DAWG.
# 2: new-style DAWG, three bytes per node.
# 3: new-style DAWG, four bytes per node
# 4: had dict-header
# 5: has new (2013) synonyms feature, e.g. 'a' for 'A'
$(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
ifdef NEWDAWG
if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x001C)" > $@; echo "flags=4"; \
elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x001D)" > $@; echo "flags=5"; \
elif true; \
then echo "Unexpected node size"; exit 1; \
fi
else
if [ 3 == $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x0001)" > $@; echo "flags=1"; \
else echo "ERROR: old format can't handle 4-byte"; exit 1; \
fi
endif
dawg$(XWLANG)%.stamp: $(XWLANG)Main.dict.gz $(DICT2DAWG) table.bin ../Makefile.langcommon
start=$$(echo $@ | sed -e 's/dawg$(XWLANG)\([0-9]*\)to[0-9]*.stamp/\1/'); \
end=$$(echo $@ | sed -e 's/dawg$(XWLANG)[0-9]*to\([0-9]*\).stamp/\1/'); \
echo $${start} and $${end}; \
zcat $< | $(BOWDLERIZER) | $(DICT2DAWG) $(DICT2DAWGARGS) $(TABLE_ARG) table.bin \
-ob dawg$(XWLANG)$* $(ENCP) \
-sn $(XWLANG)StartLoc.bin -min $${start} -max $${end} \
-wc $(XWLANG)$*_wordcount.bin $(FORCE_4) -ns $(XWLANG)$*_nodesize.bin
touch $@
$(XWLANG)%_wordcount.bin: dawg$(XWLANG)%.stamp
@echo "got this rule"
# the files to export for byod
allbins:
$(MAKE) TARGET_TYPE=PALM byodbins
$(MAKE) TARGET_TYPE=FRANK byodbins
rm palmspecials.bin
table.bin: ../xloc.py
ifdef NEWDAWG
../xloc.py $(ENCP) -tn -out $@
else
error
endif
values.bin: ../xloc.py
../xloc.py -v -out $@
# a binary file, two bytes, one giving the size of tiles data and the
# other the number of tiles in the dict. Tiles data is utf-8 and so
# number is not derivable from size.
charcount.bin: table.bin ../xloc.py
SIZ=$$(ls -l $< | awk '{print $$5}'); \
perl -e "print pack(\"c\",$$SIZ)" > $@
TMP=/tmp/tmp$$$$; \
../xloc.py -s -out $$TMP; \
cat $$TMP >> $@; \
rm -f $$TMP
$(XWLANG)%_note.bin:
echo -n $(DICTNOTE) > $@
perl -e "print pack(\"c\",0)" >> $@
$(XWLANG)%_md5sum.bin:
cat table.bin values.bin frankspecials.bin $(XWLANG)StartLoc.bin \
dawg$(XWLANG)$*_*.bin | md5sum | awk '{print $$1}' | tr -d '\n' > $@
perl -e "print pack(\"c\",0)" >> $@
$(XWLANG)%_headerFlags.bin:
[ -n "$(ALLOWS_DUPLICATES)" ] && FLAGS=1 || FLAGS=0; \
perl -e "print pack(\"n\",$$FLAGS)" > $@
$(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin \
$(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin
SIZ=0; \
for FILE in $+; do \
SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \
done; \
perl -e "print pack(\"n\",$$SIZ)" > $@
cat $+ >> $@
# clean this up....
../dict2dawg: ../dict2dawg.cpp
g++ -DDEBUG -O0 -g -Wall -o $@ $<
clean_common:
rm -f $(XWLANG)Main.dict *.bin *.pdb *.seb dawg*.stamp *.$(FRANK_EXT) \
$(XWLANG)*.pdb $(XWLANG)*.seb
help:
@echo "make TARGET_TYPE=[FRANK|PALM]"