progress: compares english and other languages use of format

specifiers, but I think is missing mismatches in plurals still.
This commit is contained in:
Eric House 2015-08-05 07:18:22 -07:00
parent 97ebdd0d63
commit 8b90f211ed

View file

@ -4,6 +4,7 @@ import re, sys, os, getopt
from lxml import etree
g_formats = {}
g_verbose = 0
def usage(msg=''):
print
@ -20,8 +21,6 @@ def associate( formats, name, fmt ):
else:
forName = set()
formats[name] = forName
if fmt in forName:
print "Warning: %s duplicated in %s" % (fmt, name)
forName.add(fmt)
# print 'added', fmt, 'to', name
@ -40,39 +39,64 @@ def checkFormats( formats ):
if not foundDigit:
break
if curSet == testSet:
print name, "is ok"
if 0 < g_verbose: print name, "is ok"
else:
print 'ERROR: sets different for', name, curSet, testSet
print 'WARNING: sets different for', name, curSet, testSet
def checkLangFormats( engData, langData, lang ):
for key in langData:
if not key in engData:
print 'WARNING: key', key, 'in', lang, 'but not in English'
elif not engData[key] == langData[key]:
print 'ERROR: set mismatch', key, 'from', lang, engData[key], 'vs', langData[key]
sys.exit(1)
def getForElem( data, pat, elem, name ):
splits = re.split( pat, elem.text )
nParts = len(splits)
if 1 < nParts:
for ii in range(nParts):
part = splits[ii]
if re.match( pat, part ):
associate( data, name, part )
def getFormats( doc, pat, lang ):
result = {}
for elem in doc.findall('string'):
getForElem( result, pat, elem, elem.get('name') )
for elem in doc.findall('plurals'):
name = elem.get('name')
for elem in elem.findall('item'):
quantity = elem.get('quantity')
if not elem.text or 0 == len(elem.text):
print 'plurals', name, 'has empty quantity', quantity, \
'in file', lang
sys.exit(1)
else:
getForElem( result, pat, elem, name + '/' + quantity )
return result
def main():
if 1 < len(sys.argv): usage()
parser = etree.XMLParser(remove_blank_text=True, encoding="utf-8")
wd = os.path.dirname(sys.argv[0])
path = wd + '/../XWords4/res/values/strings.xml'
# Load English
engFormats = {}
parser = etree.XMLParser(remove_blank_text=True, encoding="utf-8")
path = wd + '/../XWords4/res/values/strings.xml'
doc = etree.parse(path, parser)
pat = re.compile( '(%\d\$[sd])', re.DOTALL | re.MULTILINE )
for typ in ['string', 'item']:
for elem in doc.findall(typ):
splits = re.split( pat, elem.text )
nParts = len(splits)
if 1 < nParts:
for ii in range(nParts):
part = splits[ii]
if re.match( pat, part ):
associate( engFormats, elem.get('name'), part )
engFormats = getFormats( doc, pat, 'en' )
checkFormats( engFormats )
path = wd + '/../XWords4/res_src'
for subdir, dirs, files in os.walk(path):
for file in [file for file in files if file == "strings.xml" \
and not subdir.endswith('/values')]:
print file, subdir
doc = etree.parse( subdir + '/' + file, parser )
forLang = getFormats( doc, pat, subdir )
checkLangFormats( engFormats, forLang, subdir )
sys.exit(0)
##############################################################################
if __name__ == '__main__':