#!/usr/bin/python3 # Go through all the res_src strings.xml files, and copy them over # into the world where they'll get used in a build. This is meant to # allow them to be built-in as an alternative to the # locutils/downloadable system. import re, sys, os, getopt from lxml import etree s_prefix = 'XLATE ME: ' # languages in which it's ok to make a standalone quantity="one" into # quantity="other" g_oneToOthers = ['values-ja'] g_formatsPat = re.compile( '(%\d\$[sd])', re.DOTALL | re.MULTILINE ) sComment = """ DO NOT EDIT THIS FILE!!!! It was generated (from %s, by %s). Any changes you make to it will be lost. """ def exitWithError(msg): print( 'ERROR:', msg ) sys.exit(1) def usage(): print( "usage:", sys.argv[0], '[-k ] [-f inFile]') sys.exit(1) def sameOrSameWithPrefix( str1, str2 ): result = str1 == str2 if not result: if str1.startswith(s_prefix): result = str1[len(s_prefix):] == str2 return result def sameAsEnglishPlural(engNames, strElem): strs = engNames[strElem.get('name')]['strings'] str = strElem.text result = 1 == len(strs) and 'other' in strs \ and sameOrSameWithPrefix( str, strs['other'] ) return result # If has a one and no (or empty) other, convert the one to other def tryConvertOne( plurals ): quantities = {} for item in plurals.getchildren(): quantities[item.get("quantity")] = item use = False if "one" in quantities: if "other" in quantities: text = quantities['other'].text if not text or 0 == len(text): use = True else: use = True if use: print( "converting", plurals.get('name')) plurals.remove(quantities['other']) quantities['one'].set('quantity', 'other') def pluralsIsBogus(engNames, plurals, verbose): haveOther = False # will crash without one bogus = False for item in plurals.getchildren(): text = item.text if not text or 0 == len(text): bogus = True if verbose: quantity = item.get("quantity") print( 'dropping plurals {name} because of empty/missing \"{quantity}\"' \ .format(name=plurals.get("name"), quantity=quantity )) break if item.get("quantity") == "other": haveOther = True if verbose and not bogus and not haveOther: print( "dropping plurals {name} because no \"other\" quantity" \ .format(name=plurals.get("name"))) return bogus or not haveOther def pluralsIsSame(engNames, plurals): different = False # all children duplicates of English engItem = engNames[plurals.get('name')] strings = engItem['strings'] for item in plurals.getchildren(): text = item.text if not text or 0 == len(text): exitWithError( "bogus empty plurals item in " + plurals.get('name')) engItem = engItem quantity = item.get('quantity') if quantity in strings: if sameOrSameWithPrefix( strings[quantity], text ): different = True return different # path will be something like res_src/values-pt/strings.xml. We want # the next-to-last entry. def valuesDir( path ): splits = path.split('/') return splits[-2] def checkPlurals( engNames, elem, src, verbose ): name = elem.get('name') ok = True if not name in engNames or not 'plurals' == engNames[name]['type']: print( 'plurals', name, 'not in engNames or not a plurals there') ok = False if ok and valuesDir(src) in g_oneToOthers: tryConvertOne( elem ) if ok and pluralsIsBogus(engNames, elem, verbose): ok = False if ok and pluralsIsSame(engNames, elem): ok = False if ok: for item in elem.getchildren(): if 0 == len(item.text): ok = False exitWithError( 'bad empty item ' + name ) return ok def loadPlural(plural): items = {} for child in plural.getchildren(): items[child.get('quantity')] = child.text return items def writeDoc(doc, src, dest): comment = etree.Comment(sComment % (src, os.path.basename(sys.argv[0]))) doc.getroot().insert( 0, comment ) dir = os.path.dirname( dest ) try: os.makedirs( dir ) except: pass content = etree.tostring( doc, pretty_print=True, encoding="utf-8", xml_declaration=True ) \ .decode('utf-8' ) # print('writing:', content) with open( dest, "w" ) as out: out.write( content ) def exitWithFormatError(engSet, otherSet, name, path): exitWithError( 'formats set mismatch: ' + str(engSet) \ + ' vs ' + str(otherSet) + '; ' + name \ + ' in file ' + path ) def checkOrConvertString(engNames, elem, verbose): name = elem.get('name') if not elem.text: exitWithError( 'elem' + name + " is empty" ) elif not name in engNames or elem.text.startswith(s_prefix): ok = False elif not 'string' == engNames[name]['type']: if 'plurals' == engNames[name]['type']: if sameAsEnglishPlural( engNames, elem ): ok = False else: elem.tag = 'plurals' item = etree.Element("item") item.text = elem.text elem.text = None item.set('quantity', 'other') elem.append( item ) if verbose: print( 'translated string', name, 'to plural') ok = True else: ok = False elif sameOrSameWithPrefix(engNames[name]['string'], elem.text ): if verbose: print( "Same as english: name: %s; text: %s" % (name, elem.text)) ok = False else: ok = True return ok def checkAndCopy( parser, engNames, engFormats, src, dest, verbose ): doc = etree.parse(src, parser) # strings for elem in doc.findall('string'): if not checkOrConvertString(engNames, elem, verbose): elem.getparent().remove(elem) for elem in doc.findall('plurals'): if not checkPlurals(engNames, elem, src, verbose): # print('checkAndCopy(): removing {}'.format(elem)) elem.getparent().remove(elem) # Languages don't have to provide the same set of plurals items: # one, zero, few, other etc. (My English doesn't include "few" so # far.) What can't happen is for there to be a format string in the # totality of them that's not in the totality of English, or for # there to be internal inconsistencies among their format strings, # e.g. a %1$d in "one" and a %1$s in "other" formats = getFormats( doc, src ) for name in formats: if not engFormats[name].hasAll( formats[name] ): exitWithFormatError( engFormats[name], formats[name], name, dest ) writeDoc(doc, src, dest) class FormatSet(): def __init__(self): self.fmts = { 's': set(), 'd': set(), } def add(self, fmt): assert(len(fmt) == 4) typ = fmt[3] indx = int(fmt[1]) # print('FormatSet.add({},{})'.format(fmt, typ)) self.fmts[typ].add(indx) def append( self, ps ): for ch in ['s', 'd']: self.fmts[ch].update( ps.fmts[ch] ) # assert don't have any indices in BOTH sets def check(self): isOK = True if self.fmts['d'].intersection(self.fmts['s']): isOK = False print('error!!!: same index in both: {}'.format(self)) return isOK def hasAll(self, other): result = True for ch in ['s', 'd']: result = result and 0 == len(other.fmts[ch] - self.fmts[ch]) return result def __str__(self): return '{}'.format(self.fmts) def setForElem( elem, name ): result = FormatSet() splits = re.split( g_formatsPat, elem.text ) nParts = len(splits) # print( 'setForElem({}): text: {}; nParts: {}'.format( name, elem.text, nParts )) if 1 < nParts: for ii in range(nParts): part = splits[ii] if re.match( g_formatsPat, part ): result.add( part ) # print( 'setForElem({}) => {}'.format( name, result)) return result def getFormats( doc, path ): result = {} typ = 'string' for elem in doc.findall(typ): name = typ + '/' + elem.get('name') result[name] = setForElem( elem, name ) typ = 'plurals' for elem in doc.findall(typ): name = typ + '/' + elem.get('name') pluralsSet = FormatSet() for item in elem.findall('item'): quantity = item.get('quantity') if not item.text or 0 == len(item.text): exitWithError( '{} has empty quantity {} in file {}'.format( name, quantity, lang ) ) else: pluralsSet.append(setForElem( item, name )) if not pluralsSet.check(): exitWithError( '{} has overlapping sets: {}'.format(name, pluralsSet)) result[name] = pluralsSet # print( 'getFormats({}) => {}'.format( path, result ) ) return result def main(): # add these via params later excepts = ['values-ca_PS', 'values-ba_CK'] verboses = ['values-ja'] srcFiles = [] try: pairs, rest = getopt.getopt(sys.argv[1:], "k:f:") for option, value in pairs: if option == '-k': excepts += value.split(' ') if option == '-f': srcFiles.append(value) else: usage() except: print( "Unexpected error:", sys.exc_info()[0]) usage() # summarize the english file wd = os.path.dirname(sys.argv[0]) path = wd + '/../app/src/main/res/values/strings.xml' parser = etree.XMLParser(remove_blank_text=True, encoding="utf-8") engDoc = etree.parse(path, parser) engFormats = getFormats( engDoc, path ) engNames = {} for typ in ['string', 'plurals']: for elem in engDoc.findall(typ): name = elem.get('name') item = { 'type' : typ } if typ == 'string': item['string'] = elem.text else: item['strings'] = loadPlural(elem) engNames[name] = item # print( engNames) # if -f option not used, iterate over src files to collect them all if not srcFiles: for subdir, dirs, files in os.walk('res_src'): for file in [file for file in files if file == "strings.xml"]: path = "%s/%s" % (subdir, file) if path: for excpt in excepts: if path and excpt in path: path = None if path: srcFiles.append(path) # Finally, do the work for path in srcFiles: verbose = 0 == len(verboses) or 0 < len([verb for verb in verboses if verb in path]) dest = path.replace( 'res_src', 'app/src/main/res', 1 ) checkAndCopy( parser, engNames, engFormats, path, dest, verbose ) ############################################################################## if __name__ == '__main__': main()