xwords/xwords4/dawg/Makefile.langcommon
Eric House 8752432de3 add ability to filter out "dirty" words
If a Makefile defines a dirty word list then a new python script is
invoked to filter for and remove those words as the dict is being
built. So far I have for English only, which makes sense because only
English wordlists are built-in on Android and Google's rating system
cares only about what's built in.
2017-05-04 22:45:27 -07:00

315 lines
9.6 KiB
Makefile

# -*-mode: Makefile -*-
# Copyright 2000-2009 by Eric House (xwords@eehouse.org)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
ifneq ($(DIRTY_LIST),)
BOWDLERIZER = ../remove-dirty.py $(DIRTY_LIST)
XWLANG := $(XWLANG)_BOWD
DICTNOTE := "$(DICTNOTE) (Bowdlerized)"
else
BOWDLERIZER = cat
endif
XWLANG := $(XWLANG)_
FRANK_EXT = xwd
# This is now on for all dicts and languages. dict2dawg.pl can't
# produce the old form any longer.
NEWDAWG=foo
# this will make all dicts the new, larger type
#FORCE_4 = -force4
PALM_DICT_TYPE = DAWG
PAR = ../par.pl
LANGUAGE = $(shell basename $$(pwd))
ifdef ENC
ENCP = -enc $(ENC)
endif
# prefer the compiled version if available. But don't compile it
# automatically until it's a bit better tested.
# DICT2DAWG = $(if $(shell test -x ../dict2dawg && echo foo),\
# ../dict2dawg,../dict2dawg.pl)
#
# No. The perl version no longer works. Don't use without fixing.
DICT2DAWG = ../dict2dawg
#all: target_all
# let languages set this first, but we always add blank to it.
BLANK_INFO = "_" /dev/null /dev/null
# Supply a default so don't have to type so much; feel free to change
TARGET_TYPE ?= FRANK
ifdef NEWDAWG
TABLE_ARG = -mn
else
TABLE_ARG = -m
endif
ifeq ($(TARGET_TYPE),WINCE)
WINCE_ONLY = true
endif
.PHONY: clean_common help allbins checkARCH
##############################################################################
# PalmOS rules
##############################################################################
ifeq ($(TARGET_TYPE),PALM)
ifdef NEWDAWG
PDBTYPE = XwrD
else
PDBTYPE = Xwr3
endif
all: $(XWLANG)2to8.pdb $(XWLANG)2to9.pdb $(XWLANG)2to15.pdb
empty: $(XWLANG)0to0.pdb
# Those languages that have bitmap files for custom glyphs will need to
# define BMPBINFILES and perhaps provide a rule for building the files
binfiles.stamp: $(BMPBINFILES)
touch binfiles.stamp
palmspecials.bin: ../palm_mkspecials.pl $(BMPFILES)
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
# can't just use values.bin because the specials bitmap info is
# platform-specific
palmvalues.bin: values.bin palmspecials.bin
cat $^ > $@
# values.bin: palmspecials.bin ../xloc binfiles.stamp
# cd ../ && $(MAKE) xloc
# binfileparms=""; \
# if [ "$(BMPBINFILES)" != "" ]; then \
# for f in $(BMPBINFILES)""; \
# do binfileparms="$$binfileparms -i $$f"; \
# done; \
# fi; \
# ../xloc -l $(LANGCODE) $$binfileparms -T $@
# cat palmspecials.bin >> $@
# header (first record) is node count (long) and 4 chars:
# unsigned char firstEdgeRecNum;
# unsigned char charTableRecNum;
# unsigned char valTableRecNum;
# unsigned char reserved[3]; // worst case this points to a new resource
# include "flags" as used on the other platforms
palmheader%.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_flags.bin
rm -f $@
touch $@
cat $< >> $@
perl -e "print pack(\"C\",3)" >> $@ # first edge
perl -e "print pack(\"C\",1)" >> $@ # char table rec number
perl -e "print pack(\"C\",2)" >> $@ # valTable rec number
perl -e "print pack(\"CCC\",0)" >> $@ # reserved 3 bytes
ifdef NEWDAWG
cat $(XWLANG)$*_flags.bin >> $@
else
perl -e "print pack(\"CC\",0)" >> $@ # c code added two more...
endif
# This works, but leaves out the header info that the current version
# has. I'm not sure anybody cares, though...
$(XWLANG)%.pdb: dawg$(XWLANG)%.stamp table.bin palmvalues.bin palmheader%.bin
$(PAR) c -a backup $@ \
$(basename $(@F)) $(PALM_DICT_TYPE) $(PDBTYPE) \
palmheader$*.bin table.bin palmvalues.bin dawg$(XWLANG)$*_*.bin
# the files to export for byod
byodbins: table.bin values.bin palmvalues.bin info.txt
#endif # TARGET_TYPE==PALM
##############################################################################
# Franklin ebook rules
##############################################################################
else
ifeq ($(TARGET_TYPE),FRANK)
# If we're on a system that can build for Franklin, assume that's what
# we want to build (and the .xwd.saved [<-bug] file for other non-palm
# platforms is a by-product). But if the EBM tools aren't there, just
# build the .xwd file.
ifeq (x$(shell echo -n $$EBOOKMAN_SDK)x,xx)
all: $(XWLANG)2to8.xwd $(XWLANG)2to9.xwd $(XWLANG)2to15.xwd
empty: $(XWLANG)0to0.xwd
else
all: checkARCH $(XWLANG)2to8.seb
empty: $(XWLANG)0to0.seb
include ${EBOOKMAN_SDK}/ebsdk.uses
endif
checkARCH:
if [[ $$ARCH == "" ]]; then \
$(error "ARCH must be defined in ENV if TARGET_TYPE==FRANK"); \
fi
$(XWLANG)%.seb: $(XWLANG)%.$(FRANK_EXT) $(XWLANG)%.atts
${ESDK_CREATESEB_EXE} $<
cp $< $<.saved
$(XWLANG)%.atts: #recreate it each time based on params
echo '_PUB|global+read-only|"Eric_House"' >> $@
echo "_NAME|global+read-only|\"$(XWLANG)2to8\"" >> $@
echo "_EXT|global+read-only|\"$(FRANK_EXT)\"" >> $@
echo '_LCAT|nosign+global|"CONTENT"' >> $@
echo '_PERM|global+read-only|"r"' >> $@
# the files to export for byod
byodbins: table.bin values.bin frankspecials.bin info.txt
else
ifeq ($(TARGET_TYPE),WINCE)
### WINCE section here ###
all: $(XWLANG)2to8.xwd $(XWLANG)2to9.xwd $(XWLANG)2to15.xwd
../mkxwdcab.pl -f $<
empty: $(XWLANG)0to0.xwd
else
(Need to define TARGET_TYPE if get error pointing to this line)
endif #ifeq ($(TARGET_TYPE),FRANK)
endif
endif
ifeq (s$(TARGET_TYPE),s)
echo "It\'s an error not to specify a TARGET_TYPE"
endif
##############################################################################
# shared rules
##############################################################################
# For each entry in the table whose face < 32, there needs to be a pair of
# pbitm files and a string giving the printing form
frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES)
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin frankspecials.bin
cat $(XWLANG)$*_flags.bin $(XWLANG)$*_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin \
frankspecials.bin $(XWLANG)StartLoc.bin \
$$(ls dawg$(XWLANG)$*_*.bin) > $@
cp $@ saveme.bin
# For some reason I can't fathom dawg$(XWLANG)% gets nuked every time
# the top-level rule fires (all: for whatever TARGET_TYPE.) It
# happens after the rule finishes....
# 16 bits worth of flags for the start of the eventual file. At this
# point, the flags mean this:
# 1: old-style DAWG.
# 2: new-style DAWG, three bytes per node.
# 3: new-style DAWG, four bytes per node
# 4: had dict-header
# 5: has new (2013) synonyms feature, e.g. 'a' for 'A'
$(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
ifdef NEWDAWG
if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x001C)" > $@; echo "flags=4"; \
elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x001D)" > $@; echo "flags=5"; \
elif true; \
then echo "Unexpected node size"; exit 1; \
fi
else
if [ 3 == $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x0001)" > $@; echo "flags=1"; \
else echo "ERROR: old format can't handle 4-byte"; exit 1; \
fi
endif
dawg$(XWLANG)%.stamp: $(XWLANG)Main.dict.gz $(DICT2DAWG) table.bin ../Makefile.langcommon
start=$$(echo $@ | sed -e 's/dawg$(XWLANG)\([0-9]*\)to[0-9]*.stamp/\1/'); \
end=$$(echo $@ | sed -e 's/dawg$(XWLANG)[0-9]*to\([0-9]*\).stamp/\1/'); \
echo $${start} and $${end}; \
zcat $< | $(BOWDLERIZER) | $(DICT2DAWG) $(DICT2DAWGARGS) $(TABLE_ARG) table.bin -b 28000 \
-ob dawg$(XWLANG)$* $(ENCP) \
-sn $(XWLANG)StartLoc.bin -min $${start} -max $${end} \
-wc $(XWLANG)$*_wordcount.bin $(FORCE_4) -ns $(XWLANG)$*_nodesize.bin
touch $@
$(XWLANG)%_wordcount.bin: dawg$(XWLANG)%.stamp
@echo "got this rule"
# the files to export for byod
allbins:
$(MAKE) TARGET_TYPE=PALM byodbins
$(MAKE) TARGET_TYPE=FRANK byodbins
rm palmspecials.bin
table.bin: ../xloc.pl
ifdef NEWDAWG
perl -I../ ../xloc.pl $(ENCP) -tn -out $@
else
perl -I../ ../xloc.pl -t -out $@
endif
values.bin: ../xloc.pl
perl -I../ ../xloc.pl -v -out $@
# a binary file, two bytes, one giving the size of tiles data and the
# other the number of tiles in the dict. Tiles data is utf-8 and so
# number is not derivable from size.
$(XWLANG)_charcount.bin: table.bin ../xloc.pl
SIZ=$$(ls -l $< | awk '{print $$5}'); \
perl -e "print pack(\"c\",$$SIZ)" > $@
TMP=/tmp/tmp$$$$; \
perl -I../ ../xloc.pl -s -out $$TMP; \
cat $$TMP >> $@; \
rm -f $$TMP
$(XWLANG)%_note.bin:
echo -n $(DICTNOTE) > $@
perl -e "print pack(\"c\",0)" >> $@
$(XWLANG)%_md5sum.bin:
cat table.bin values.bin frankspecials.bin $(XWLANG)StartLoc.bin \
dawg$(XWLANG)$*_*.bin | md5sum | awk '{print $$1}' | tr -d '\n' > $@
perl -e "print pack(\"c\",0)" >> $@
$(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin $(XWLANG)%_md5sum.bin
SIZ=0; \
for FILE in $+; do \
SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \
done; \
perl -e "print pack(\"n\",$$SIZ)" > $@
cat $+ >> $@
# clean this up....
../dict2dawg: ../dict2dawg.cpp
g++ -DDEBUG -O0 -g -Wall -o $@ $<
clean_common:
rm -f $(XWLANG)Main.dict *.bin *.pdb *.seb dawg*.stamp *.$(FRANK_EXT) \
$(XWLANG)*.pdb $(XWLANG)*.seb
help:
@echo "make TARGET_TYPE=[FRANK|PALM]"