xwords/xwords4/android/scripts/check_strings.py
Eric House fea31e9e3b fix script checking localized strings
It wasn't even checking all languages. Now it does, lets you specify
which ones if a subset's desired, and differentiates between sets of
format specifiers mismatching in a way that'll cause a
crash (e.g. expecting int in one and string in another) and just having
some missing.
2016-12-30 09:57:33 -08:00

158 lines
5.1 KiB
Python
Executable file

#!/usr/bin/python
import re, sys, os, getopt
from lxml import etree
g_formats = {}
g_verbose = 0
def usage(msg=''):
print
if msg: print 'Error:', msg
print "usage:", sys.argv[0], '[-c <??>]* [-l] [-h]'
print " Compares all files res/values-??/strings.xml with res/values/strings.xml"
print " and makes sure they're legal: same format strings, etc."
print " -c options, if present, limit the check to what's specified"
print " -l option lists available codes and exits"
print " -h option prints this message"
sys.exit(1)
def associate( formats, name, fmt ):
if name in formats:
forName = formats[name]
else:
forName = set()
formats[name] = forName
forName.add(fmt)
# print 'added', fmt, 'to', name
def checkFormats( formats ):
for name in formats:
curSet = formats[name]
testSet = set()
for digit in range(1, 9):
foundDigit = False
for spec in ['s', 'd']:
fmt = '%' + str(digit) + '$' + spec
if fmt in curSet:
foundDigit = True
testSet.add(fmt)
break
if not foundDigit:
break
if curSet == testSet:
if 0 < g_verbose: print name, "is ok"
else:
print 'WARNING: sets different for', name, curSet, testSet
# Make sure that if there's a positional param of one type (%s or %d,
# typically) in one set that the same positional in the other is of
# the same type.
g_specPat = re.compile('%(\d)\$[ds]')
def asDict(set):
result = {}
for elem in set:
match = re.match( g_specPat, elem )
if match:
index = int(match.group(1))
result[index] = elem
return result
def setIndicesAgree(set1, set2):
result = True
dict1 = asDict(set1)
dict2 = asDict(set2)
for index in dict1:
if index in dict2 and not dict1[index] == dict2[index]:
result = False
break
return result
def checkLangFormats( engData, langData, lang ):
for key in langData:
if not key in engData:
print 'WARNING: key', key, 'in', lang, 'but not in English'
elif not setIndicesAgree(engData[key], langData[key] ):
print 'ERROR: illegal set mismatch', key, 'from', lang, engData[key], 'vs', langData[key]
sys.exit(1)
elif not engData[key] == langData[key]:
print 'WARNING: set mismatch', key, 'from', lang, engData[key], 'vs', langData[key]
def getForElem( data, pat, elem, name ):
splits = re.split( pat, elem.text )
nParts = len(splits)
if 1 < nParts:
for ii in range(nParts):
part = splits[ii]
if re.match( pat, part ):
associate( data, name, part )
def getFormats( doc, pat, lang ):
result = {}
for elem in doc.findall('string'):
getForElem( result, pat, elem, elem.get('name') )
for elem in doc.findall('plurals'):
name = elem.get('name')
for elem in elem.findall('item'):
quantity = elem.get('quantity')
if not elem.text or 0 == len(elem.text):
print 'ERROR: plurals', name, 'has empty quantity', quantity, \
'in file', lang
sys.exit(1)
else:
add = name + '/' + quantity
getForElem( result, pat, elem, add )
return result
g_dirpat = re.compile( '.*values-(..)$' )
def getCodes(wd):
result = []
path = wd + '/../XWords4/res_src'
for subdir, dirs, files in os.walk(path):
for file in [file for file in files if file == "strings.xml"]:
match = re.match(g_dirpat, subdir)
if match:
result.append( match.group(1) )
return result
def main():
wd = os.path.dirname(sys.argv[0])
langCodes = []
allCodes = getCodes(wd)
pairs, rest = getopt.getopt(sys.argv[1:], "c:hl")
for option, value in pairs:
if option == '-c':
if value in allCodes:
langCodes.append(value)
else:
usage( "unexpected code: " + value + " not one of "
+ ', '.join(allCodes) )
elif option == '-h': usage()
elif option == '-l':
print 'Available codes:', ', '.join(allCodes)
sys.exit(0)
else:
usage()
# use the entire set if not specified
if not langCodes: langCodes = allCodes
parser = etree.XMLParser(remove_blank_text=True, encoding="utf-8")
# Load English
path = wd + '/../XWords4/res/values/strings.xml'
doc = etree.parse(path, parser)
pat = re.compile( '(%\d\$[sd])', re.DOTALL | re.MULTILINE )
engFormats = getFormats( doc, pat, 'en' )
checkFormats( engFormats )
for code in langCodes:
file = wd + '/../XWords4/res_src/values-%s/strings.xml' % code
doc = etree.parse( file, parser )
forLang = getFormats( doc, pat, code )
checkLangFormats( engFormats, forLang, code )
##############################################################################
if __name__ == '__main__':
main()