mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2025-01-20 22:26:54 +01:00
add ability to filter out "dirty" words
If a Makefile defines a dirty word list then a new python script is invoked to filter for and remove those words as the dict is being built. So far I have for English only, which makes sense because only English wordlists are built-in on Android and Google's rating system cares only about what's built in.
This commit is contained in:
parent
ed781b36b5
commit
8752432de3
6 changed files with 292 additions and 4 deletions
20
xwords4/dawg/English/Makefile.BasEnglishBowd
Normal file
20
xwords4/dawg/English/Makefile.BasEnglishBowd
Normal file
|
@ -0,0 +1,20 @@
|
|||
# -*- mode: makefile; compile-command: "make -f Makefile.top5000Bowd"; -*-
|
||||
# Copyright 2017 by Eric House (xwords@eehouse.org). All rights
|
||||
# reserved.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
DIRTY_LIST=dirtywords.txt
|
||||
include Makefile.BasEnglish
|
19
xwords4/dawg/English/Makefile.CollegeEngBowd
Normal file
19
xwords4/dawg/English/Makefile.CollegeEngBowd
Normal file
|
@ -0,0 +1,19 @@
|
|||
# -*- mode: makefile; compile-command: "make -f Makefile.CollegeEngBowd"; -*-
|
||||
# Copyright 2017 by Eric House (xwords@eehouse.org). All rights reserved.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
DIRTY_LIST=dirtywords.txt
|
||||
include Makefile.CollegeEng
|
20
xwords4/dawg/English/Makefile.top5000Bowd
Normal file
20
xwords4/dawg/English/Makefile.top5000Bowd
Normal file
|
@ -0,0 +1,20 @@
|
|||
# -*- mode: makefile; compile-command: "make -f Makefile.top5000Bowd"; -*-
|
||||
# Copyright 2017 by Eric House (xwords@eehouse.org). All rights
|
||||
# reserved.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
DIRTY_LIST=dirtywords.txt
|
||||
include Makefile.top5000
|
206
xwords4/dawg/English/dirtywords.txt
Normal file
206
xwords4/dawg/English/dirtywords.txt
Normal file
|
@ -0,0 +1,206 @@
|
|||
ABO
|
||||
ABOS
|
||||
ARSE
|
||||
ASSHOLE
|
||||
ASSHOLES
|
||||
BADASS
|
||||
BADASSED
|
||||
BADASSES
|
||||
BALLSIER
|
||||
BALLSIEST
|
||||
BALLSY
|
||||
BAZOOMS
|
||||
BLOWJOB
|
||||
BLOWJOBS
|
||||
BOCHE
|
||||
BOCHES
|
||||
BOOBIE
|
||||
BUBBIES
|
||||
BUBBY
|
||||
BUCKRA
|
||||
BUCKRAS
|
||||
BULLSHIT
|
||||
BULLSHITS
|
||||
BULLSHITTED
|
||||
BULLSHITTING
|
||||
CLIT
|
||||
CLITORAL
|
||||
CLITORIS
|
||||
COLOREDS
|
||||
COMSYMP
|
||||
COMSYMPS
|
||||
CRAPPER
|
||||
CRAPPERS
|
||||
CUNT
|
||||
CUNTS
|
||||
DAGO
|
||||
DAGOES
|
||||
DAGOS
|
||||
DARKEY
|
||||
DARKEYS
|
||||
DARKIE
|
||||
DARKIES
|
||||
DARKY
|
||||
DICKED
|
||||
DICKING
|
||||
DIKEY
|
||||
DYKEY
|
||||
FAGGOTRIES
|
||||
FAGGOTRY
|
||||
FAGGOTY
|
||||
FAGGY
|
||||
FART
|
||||
FARTED
|
||||
FARTING
|
||||
FARTS
|
||||
FATSO
|
||||
FATSOES
|
||||
FATSOS
|
||||
FRIG
|
||||
FRIGGED
|
||||
FRIGGING
|
||||
FRIGS
|
||||
FUCK
|
||||
FUCKED
|
||||
FUCKER
|
||||
FUCKERS
|
||||
FUCKING
|
||||
FUCKS
|
||||
FUCKUP
|
||||
FUCKUPS
|
||||
GANGBANG
|
||||
GANGBANGS
|
||||
GOY
|
||||
GOYIM
|
||||
GOYISH
|
||||
GOYS
|
||||
GRINGO
|
||||
GRINGOS
|
||||
HAOLE
|
||||
HAOLES
|
||||
HEBE
|
||||
HEBES
|
||||
HONKEY
|
||||
HONKEYS
|
||||
HONKIE
|
||||
HONKIES
|
||||
HONKY
|
||||
HUNKIES
|
||||
JESUIT
|
||||
JESUITIC
|
||||
JESUITRIES
|
||||
JESUITRY
|
||||
JESUITS
|
||||
JEW
|
||||
JEWED
|
||||
JEWING
|
||||
JEWS
|
||||
JIGABOO
|
||||
JIGABOOS
|
||||
JISM
|
||||
JISMS
|
||||
KIKE
|
||||
KIKES
|
||||
LEZ
|
||||
LEZES
|
||||
LEZZIE
|
||||
LEZZIES
|
||||
LEZZY
|
||||
LIBBER
|
||||
LIBBERS
|
||||
MERDE
|
||||
MERDES
|
||||
MICK
|
||||
MICKS
|
||||
NANCE
|
||||
NANCES
|
||||
NANCIES
|
||||
NANCY
|
||||
NIGGER
|
||||
NIGGERS
|
||||
NITCHIE
|
||||
NITCHIES
|
||||
NOOKIES
|
||||
NOOKY
|
||||
OFAY
|
||||
OFAYS
|
||||
PAPIST
|
||||
PAPISTIC
|
||||
PAPISTRIES
|
||||
PAPISTRY
|
||||
PAPISTS
|
||||
PEED
|
||||
PEEING
|
||||
PISS
|
||||
PISSED
|
||||
PISSER
|
||||
PISSERS
|
||||
PISSES
|
||||
PISSING
|
||||
POM
|
||||
POMMIE
|
||||
POMMIES
|
||||
POMMY
|
||||
POMS
|
||||
POOFS
|
||||
POOFTAH
|
||||
POOFTAHS
|
||||
POOFTER
|
||||
POOFTERS
|
||||
POOFY
|
||||
POOVE
|
||||
POOVES
|
||||
POPERIES
|
||||
POPERY
|
||||
POPISH
|
||||
POPISHLY
|
||||
REDNECK
|
||||
REDNECKS
|
||||
REDSKIN
|
||||
REDSKINS
|
||||
SHAT
|
||||
SHEENEY
|
||||
SHEENEYS
|
||||
SHEENIE
|
||||
SHEENIES
|
||||
SHEGETZ
|
||||
SHICKSA
|
||||
SHICKSAS
|
||||
SHIKSA
|
||||
SHIKSAS
|
||||
SHIKSE
|
||||
SHIKSES
|
||||
SHIT
|
||||
SHITHEAD
|
||||
SHITHEADS
|
||||
SHITS
|
||||
SHITTED
|
||||
SHITTIER
|
||||
SHITTIEST
|
||||
SHITTING
|
||||
SHITTY
|
||||
SHKOTZIM
|
||||
SKIMO
|
||||
SKIMOS
|
||||
SPIC
|
||||
SPICK
|
||||
SPICKS
|
||||
SPICS
|
||||
SPIK
|
||||
SPIKS
|
||||
TOMMED
|
||||
TOMMING
|
||||
TURD
|
||||
TURDS
|
||||
TWAT
|
||||
TWATS
|
||||
WETBACK
|
||||
WETBACKS
|
||||
WHITEYS
|
||||
WHITIES
|
||||
WOG
|
||||
WOGS
|
||||
WOP
|
||||
WOPS
|
||||
YID
|
||||
YIDS
|
|
@ -16,6 +16,14 @@
|
|||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
ifneq ($(DIRTY_LIST),)
|
||||
BOWDLERIZER = ../remove-dirty.py $(DIRTY_LIST)
|
||||
XWLANG := $(XWLANG)_BOWD
|
||||
DICTNOTE := "$(DICTNOTE) (Bowdlerized)"
|
||||
else
|
||||
BOWDLERIZER = cat
|
||||
endif
|
||||
|
||||
XWLANG := $(XWLANG)_
|
||||
|
||||
FRANK_EXT = xwd
|
||||
|
@ -242,7 +250,7 @@ dawg$(XWLANG)%.stamp: $(XWLANG)Main.dict.gz $(DICT2DAWG) table.bin ../Makefile.l
|
|||
start=$$(echo $@ | sed -e 's/dawg$(XWLANG)\([0-9]*\)to[0-9]*.stamp/\1/'); \
|
||||
end=$$(echo $@ | sed -e 's/dawg$(XWLANG)[0-9]*to\([0-9]*\).stamp/\1/'); \
|
||||
echo $${start} and $${end}; \
|
||||
zcat $< | $(DICT2DAWG) $(DICT2DAWGARGS) $(TABLE_ARG) table.bin -b 28000 \
|
||||
zcat $< | $(BOWDLERIZER) | $(DICT2DAWG) $(DICT2DAWGARGS) $(TABLE_ARG) table.bin -b 28000 \
|
||||
-ob dawg$(XWLANG)$* $(ENCP) \
|
||||
-sn $(XWLANG)StartLoc.bin -min $${start} -max $${end} \
|
||||
-wc $(XWLANG)$*_wordcount.bin $(FORCE_4) -ns $(XWLANG)$*_nodesize.bin
|
||||
|
@ -295,9 +303,6 @@ $(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin $(XWLANG)
|
|||
perl -e "print pack(\"n\",$$SIZ)" > $@
|
||||
cat $+ >> $@
|
||||
|
||||
%.dict: %.dict.gz
|
||||
zcat $< > $@
|
||||
|
||||
# clean this up....
|
||||
../dict2dawg: ../dict2dawg.cpp
|
||||
g++ -DDEBUG -O0 -g -Wall -o $@ $<
|
||||
|
|
18
xwords4/dawg/remove-dirty.py
Executable file
18
xwords4/dawg/remove-dirty.py
Executable file
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
# Invoked with path to bad words list as single parameter, and with a
|
||||
# stream of words via stdin, loads the bad words into a map and for
|
||||
# every word in stdin echos it to stdout IFF it's not in the map.
|
||||
|
||||
import sys
|
||||
|
||||
dirtyMap = {}
|
||||
dirtyList = sys.argv[1]
|
||||
for f in open(dirtyList):
|
||||
dirtyMap[f] = True
|
||||
|
||||
for word in sys.stdin:
|
||||
if word in dirtyMap:
|
||||
sys.stderr.write( sys.argv[0] + ": dropping: " + word )
|
||||
else:
|
||||
sys.stdout.write( word )
|
Loading…
Reference in a new issue