lots of changes/fixes in progress

This commit is contained in:
Eric House 2015-08-04 07:57:56 -07:00
parent ed7d44c8ad
commit 3803341c9b

View file

@ -4,13 +4,31 @@ import mk_xml, os, sys, getopt, re
from lxml import etree from lxml import etree
g_verbose = False
def getDocNames( doc ):
stringNames = {}
pluralsNames = {}
for elem in doc.getroot():
if elem.tag == 'string': stringNames[elem.get('name')] = True
elif elem.tag == 'plurals': pluralsNames[elem.get('name')] = True
return { 'stringNames' : stringNames,
'pluralsNames' : pluralsNames,
}
def getEnglishNames():
doc = etree.parse("res/values/strings.xml")
return getDocNames( doc )
def longestCommon( name, pairs ): def longestCommon( name, pairs ):
match = None match = None
for ii in range(1, len(name)): for ii in range(1, len(name)):
str = name[:ii] str = name[:ii]
for key in pairs.keys(): for key in pairs.keys():
if str == key[:ii]: if str == key[:ii]:
print str, "matches", key, "so far" if g_verbose: print str, "matches", key, "so far"
match = key match = key
break break
return match return match
@ -50,28 +68,40 @@ def checkAgainst( doc, pairs ):
# response = raw_input( "unknown name: %s; respond:" % (name) ) # response = raw_input( "unknown name: %s; respond:" % (name) )
# print "you wrote:", response # print "you wrote:", response
def findWithName( doc, name ): def findWithName( doc, name, tag ):
result = None result = None
for string in doc.findall('string'): for string in doc.findall(tag):
if string.get('name') == name: if string.get('name') == name:
result = string result = string
break break
print 'findWithName=>', result, 'for', name # if g_verbose: print 'findWithName=>', result, 'for', name
return result return result
def makePluralsFrom( src ):
newNode = etree.fromstring('<plurals name="%s"></plurals>' % (src.get('name')))
for item in src.findall('item'):
obj = etree.fromstring('<item quantity="%s">XLATE ME: %s</item>'
% (item.get('quantity'), item.text))
newNode.append(obj)
return newNode
def insertAfter( locRoot, englishElem, lastMatch, prevComments ): def insertAfter( locRoot, englishElem, lastMatch, prevComments ):
name = englishElem.get('name') name = englishElem.get('name')
text = englishElem.text text = englishElem.text
print "insertAfter(", locRoot, englishElem.get('name'), lastMatch.get('name'), prevComments, ")" if g_verbose: print "insertAfter(", locRoot, englishElem.get('name'), lastMatch.get('name'), prevComments, ")"
index = locRoot.getchildren().index(lastMatch) index = locRoot.getchildren().index(lastMatch)
print 'index:', index if g_verbose: print 'index:', index
for comment in prevComments: for comment in prevComments:
commentNode = etree.Comment(comment) commentNode = etree.Comment(comment)
index += 1 index += 1
locRoot.insert( index, commentNode ) locRoot.insert( index, commentNode )
newNode = etree.fromstring('<string name="%s">XLATE ME: %s</string>' % (name, text)) if 'string' == englishElem.tag:
newNode = etree.fromstring('<string name="%s">XLATE ME: %s</string>' % (name, text))
elif 'plurals' == englishElem.tag:
newNode = makePluralsFrom(englishElem)
else: sys.exit(1)
index += 1 index += 1
locRoot.insert( index, newNode ) locRoot.insert( index, newNode )
@ -80,6 +110,14 @@ def longFormFor(fmt ):
elif fmt == '%d': return '%1$d' elif fmt == '%d': return '%1$d'
else: assert False else: assert False
def printStats( doc ):
engNames = getEnglishNames()
langNames = getDocNames( doc )
print "strings: English: %d; lang: %d" % (len(engNames['stringNames']),
len(langNames['stringNames']))
print "plurals: English: %d; lang: %d" % (len(engNames['pluralsNames']),
len(langNames['pluralsNames']))
def replacePcts( doc ): def replacePcts( doc ):
pat = re.compile( '(%[sd])', re.DOTALL | re.MULTILINE ) pat = re.compile( '(%[sd])', re.DOTALL | re.MULTILINE )
for string in doc.findall('string'): for string in doc.findall('string'):
@ -95,29 +133,29 @@ def replacePcts( doc ):
# elem before it that is in doc and insert it after. Start over each # elem before it that is in doc and insert it after. Start over each
# time to avoid problems with iteration and order # time to avoid problems with iteration and order
def doAddMissing( doc ): def doAddMissing( doc ):
done = False locRoot = doc.getroot()
while not done: lastMatch = None
locRoot = doc.getroot() prevComments = []
lastMatch = None resources = etree.parse("res/values/strings.xml").getroot()
prevComments = [] for elem in resources:
for elem in etree.parse("res/values/strings.xml").getroot().iter(): # if g_verbose: print "got elem:", elem
if not isinstance( elem.tag, basestring ): tag = elem.tag
prevComments.append( elem.text ) if not isinstance( tag, basestring ):
print "added comment:", elem.text prevComments.append( elem.text )
elif 'string' == elem.tag: # if g_verbose: print "added comment:", elem.text
name = elem.get('name') elif 'string' == tag or 'plurals' == tag:
match = findWithName( locRoot, name ) name = elem.get('name')
print 'elem', name, 'has comments', prevComments match = findWithName( locRoot, name, tag )
if None == match: if None == match:
print 'NO match for', name if g_verbose: print 'NO match for', name
insertAfter( locRoot, elem, lastMatch, prevComments ) insertAfter( locRoot, elem, lastMatch, prevComments )
done = True else:
# sys.exit(0) lastMatch = match
else: lastComments = prevComments
print 'got match for', name prevComments = []
lastMatch = match else:
lastComments = prevComments print "unexpected tag:", elem.tag
prevComments = [] sys.exit(1)
def compare( engPairs, docPath ): def compare( engPairs, docPath ):
locStrings = mk_xml.getStrings( docPath, True ) locStrings = mk_xml.getStrings( docPath, True )
@ -127,29 +165,67 @@ def compare( engPairs, docPath ):
otherOnly = [key for key in locStrings.keys() if not key in engPairs] otherOnly = [key for key in locStrings.keys() if not key in engPairs]
print "%d strings missing from English: %s" % (len(otherOnly), ", ".join(otherOnly)) print "%d strings missing from English: %s" % (len(otherOnly), ", ".join(otherOnly))
def removeNotInEnglish( doc ):
locRoot = doc.getroot()
engNames = getEnglishNames()
for elem in locRoot:
if not isinstance( elem.tag, basestring ):
prevComment = elem
elif elem.tag == 'string':
name = elem.get('name')
if not name in engNames['stringNames']:
print "removing string", name
locRoot.remove(elem)
if prevComment: locRoot.remove(prevComment)
prevComment = None
elif elem.tag == 'plurals':
name = elem.get('name')
if not name in engNames['pluralsNames']:
print "removing plurals", name
locRoot.remove(elem)
if prevComment: locRoot.remove(prevComment)
prevComment = None
else:
print "unknown tag", elem.tag
sys.exit(1)
def usage(): def usage():
print "usage:", sys.argv[0] print "usage:", sys.argv[0]
print " -a # insert missing string elements for translation" print " -a # insert missing string elements for translation"
print " -c # compare each file with the English, listing string not in both" print " -c # compare each file with the English, listing string not in both"
print " -i # save any changes made (does not by default)"
print " -f # work on this strings.xml file (does all if none specified)" print " -f # work on this strings.xml file (does all if none specified)"
print " -l # work on the strings.xml file for this language (e.g. ca, nl)"
print " -r # remove elements not present in English"
print " -s # print stats"
print " -% # replace %[sd] with the correct longer form" print " -% # replace %[sd] with the correct longer form"
print " -s # save any changes made (does not by default)"
sys.exit(1) sys.exit(1)
def langFileFor(code):
return "res_src/values-%s/strings.xml" % code
def main(): def main():
global g_verbose
stringsFiles = [] stringsFiles = []
addMissing = False addMissing = False
doSave = False doSave = False
doCompare = False doCompare = False
doReplace = False doReplace = False
doRemove = False
doStats = False
try: try:
pairs, rest = getopt.getopt(sys.argv[1:], "acf:s%") pairs, rest = getopt.getopt(sys.argv[1:], "acf:il:rsv%")
for option, value in pairs: for option, value in pairs:
if option == '-a': addMissing = True if option == '-a': addMissing = True
elif option == '-c': doCompare = True elif option == '-c': doCompare = True
elif option == '-%': doReplace = True elif option == '-i': doSave = True
elif option == '-f': stringsFiles.append(value) elif option == '-f': stringsFiles.append(value)
elif option == '-s': doSave = True elif option == '-l': stringsFiles.append(langFileFor(value))
elif option == '-v': g_verbose = True
elif option == '-r': doRemove = True
elif option == '-s': doStats = True
elif option == '-%': doReplace = True
else: usage() else: usage()
except: except:
usage() usage()
@ -164,11 +240,20 @@ def main():
parser = etree.XMLParser(remove_blank_text=True, encoding="utf-8") parser = etree.XMLParser(remove_blank_text=True, encoding="utf-8")
for path in stringsFiles: for path in stringsFiles:
print "looking at", path
doc = etree.parse(path, parser) doc = etree.parse(path, parser)
# checkAgainst( doc, pairs ) # checkAgainst( doc, pairs )
if doReplace: replacePcts( doc ) if doReplace:
if addMissing: doAddMissing( doc ) replacePcts( doc )
if doCompare: compare( pairs, path ) if addMissing:
doAddMissing( doc )
if doCompare:
compare( pairs, path )
if doRemove:
removeNotInEnglish( doc )
# print stats after any other changes have been made
if doStats:
printStats( doc )
if doSave: if doSave:
out = open( path, "w" ) out = open( path, "w" )
out.write( etree.tostring( doc, pretty_print=True, encoding="utf-8", xml_declaration=True ) ) out.write( etree.tostring( doc, pretty_print=True, encoding="utf-8", xml_declaration=True ) )