2020-06-15 18:40:29 +02:00
|
|
|
#!/usr/bin/python3
|
2015-03-27 15:26:51 +01:00
|
|
|
|
|
|
|
# Go through all the res_src strings.xml files, and copy them over
|
|
|
|
# into the world where they'll get used in a build. This is meant to
|
|
|
|
# allow them to be built-in as an alternative to the
|
|
|
|
# locutils/downloadable system.
|
|
|
|
|
|
|
|
import re, sys, os, getopt
|
|
|
|
from lxml import etree
|
|
|
|
|
2015-03-28 05:37:29 +01:00
|
|
|
s_prefix = 'XLATE ME: '
|
2016-01-24 06:59:31 +01:00
|
|
|
# languages in which it's ok to make a standalone quantity="one" into
|
|
|
|
# quantity="other"
|
|
|
|
g_oneToOthers = ['values-ja']
|
2019-12-31 07:06:24 +01:00
|
|
|
g_formatsPat = re.compile( '(%\d\$[sd])', re.DOTALL | re.MULTILINE )
|
2015-03-28 05:37:29 +01:00
|
|
|
|
|
|
|
sComment = """
|
|
|
|
DO NOT EDIT THIS FILE!!!!
|
2016-06-30 15:54:36 +02:00
|
|
|
It was generated (from %s, by %s).
|
2015-03-28 05:37:29 +01:00
|
|
|
Any changes you make to it will be lost.
|
|
|
|
"""
|
|
|
|
|
2019-12-31 07:06:24 +01:00
|
|
|
def exitWithError(msg):
|
2020-06-15 18:40:29 +02:00
|
|
|
print( 'ERROR:', msg )
|
2019-12-31 07:06:24 +01:00
|
|
|
sys.exit(1)
|
|
|
|
|
2016-06-30 15:54:36 +02:00
|
|
|
def usage():
|
2020-06-15 18:40:29 +02:00
|
|
|
print( "usage:", sys.argv[0], '[-k <list-o-dirs>] [-f inFile]')
|
2016-06-30 15:54:36 +02:00
|
|
|
sys.exit(1)
|
|
|
|
|
2015-03-28 05:37:29 +01:00
|
|
|
def sameOrSameWithPrefix( str1, str2 ):
|
|
|
|
result = str1 == str2
|
|
|
|
if not result:
|
|
|
|
if str1.startswith(s_prefix):
|
|
|
|
result = str1[len(s_prefix):] == str2
|
|
|
|
return result
|
|
|
|
|
|
|
|
def sameAsEnglishPlural(engNames, strElem):
|
|
|
|
strs = engNames[strElem.get('name')]['strings']
|
|
|
|
str = strElem.text
|
|
|
|
result = 1 == len(strs) and 'other' in strs \
|
|
|
|
and sameOrSameWithPrefix( str, strs['other'] )
|
|
|
|
return result
|
|
|
|
|
2016-01-24 06:59:31 +01:00
|
|
|
# If has a one and no (or empty) other, convert the one to other
|
|
|
|
def tryConvertOne( plurals ):
|
|
|
|
quantities = {}
|
|
|
|
for item in plurals.getchildren():
|
|
|
|
quantities[item.get("quantity")] = item
|
|
|
|
|
|
|
|
use = False
|
|
|
|
if "one" in quantities:
|
|
|
|
if "other" in quantities:
|
|
|
|
text = quantities['other'].text
|
|
|
|
if not text or 0 == len(text):
|
|
|
|
use = True
|
|
|
|
else:
|
|
|
|
use = True
|
|
|
|
|
|
|
|
if use:
|
2020-06-15 18:40:29 +02:00
|
|
|
print( "converting", plurals.get('name'))
|
2016-01-24 06:59:31 +01:00
|
|
|
plurals.remove(quantities['other'])
|
|
|
|
quantities['one'].set('quantity', 'other')
|
|
|
|
|
|
|
|
def pluralsIsBogus(engNames, plurals, verbose):
|
|
|
|
haveOther = False # will crash without one
|
|
|
|
bogus = False
|
|
|
|
for item in plurals.getchildren():
|
|
|
|
text = item.text
|
|
|
|
if not text or 0 == len(text):
|
|
|
|
bogus = True
|
|
|
|
if verbose:
|
|
|
|
quantity = item.get("quantity")
|
2020-06-15 18:40:29 +02:00
|
|
|
print( 'dropping plurals {name} because of empty/missing \"{quantity}\"' \
|
|
|
|
.format(name=plurals.get("name"), quantity=quantity ))
|
2016-01-24 06:59:31 +01:00
|
|
|
break
|
|
|
|
if item.get("quantity") == "other":
|
|
|
|
haveOther = True
|
|
|
|
|
|
|
|
if verbose and not bogus and not haveOther:
|
2020-06-15 18:40:29 +02:00
|
|
|
print( "dropping plurals {name} because no \"other\" quantity" \
|
|
|
|
.format(name=plurals.get("name")))
|
2016-01-24 06:59:31 +01:00
|
|
|
|
|
|
|
return bogus or not haveOther
|
|
|
|
|
2015-08-27 15:41:47 +02:00
|
|
|
def pluralsIsSame(engNames, plurals):
|
|
|
|
different = False # all children duplicates of English
|
2016-01-24 06:59:31 +01:00
|
|
|
engItem = engNames[plurals.get('name')]
|
|
|
|
strings = engItem['strings']
|
2015-08-27 15:41:47 +02:00
|
|
|
for item in plurals.getchildren():
|
|
|
|
text = item.text
|
|
|
|
if not text or 0 == len(text):
|
2019-12-31 07:06:24 +01:00
|
|
|
exitWithError( "bogus empty plurals item in " + plurals.get('name'))
|
2016-01-24 06:59:31 +01:00
|
|
|
engItem = engItem
|
|
|
|
|
|
|
|
quantity = item.get('quantity')
|
2015-08-27 15:41:47 +02:00
|
|
|
if quantity in strings:
|
|
|
|
if sameOrSameWithPrefix( strings[quantity], text ):
|
|
|
|
different = True
|
|
|
|
return different
|
|
|
|
|
2016-01-24 06:59:31 +01:00
|
|
|
# path will be something like res_src/values-pt/strings.xml. We want
|
|
|
|
# the next-to-last entry.
|
|
|
|
def valuesDir( path ):
|
|
|
|
splits = path.split('/')
|
|
|
|
return splits[-2]
|
|
|
|
|
|
|
|
def checkPlurals( engNames, elem, src, verbose ):
|
2015-08-27 15:41:47 +02:00
|
|
|
name = elem.get('name')
|
2016-01-24 06:59:31 +01:00
|
|
|
ok = True
|
2015-08-27 15:41:47 +02:00
|
|
|
if not name in engNames or not 'plurals' == engNames[name]['type']:
|
2020-06-15 18:40:29 +02:00
|
|
|
print( 'plurals', name, 'not in engNames or not a plurals there')
|
2015-08-27 15:41:47 +02:00
|
|
|
ok = False
|
2016-01-24 06:59:31 +01:00
|
|
|
|
|
|
|
if ok and valuesDir(src) in g_oneToOthers:
|
|
|
|
tryConvertOne( elem )
|
|
|
|
|
|
|
|
if ok and pluralsIsBogus(engNames, elem, verbose):
|
2015-08-27 15:41:47 +02:00
|
|
|
ok = False
|
2016-01-24 06:59:31 +01:00
|
|
|
if ok and pluralsIsSame(engNames, elem):
|
|
|
|
ok = False
|
|
|
|
if ok:
|
2015-08-27 15:41:47 +02:00
|
|
|
for item in elem.getchildren():
|
|
|
|
if 0 == len(item.text):
|
|
|
|
ok = False
|
2019-12-31 07:06:24 +01:00
|
|
|
exitWithError( 'bad empty item ' + name )
|
2015-08-27 15:41:47 +02:00
|
|
|
return ok
|
|
|
|
|
2015-03-28 05:37:29 +01:00
|
|
|
def loadPlural(plural):
|
|
|
|
items = {}
|
|
|
|
for child in plural.getchildren():
|
|
|
|
items[child.get('quantity')] = child.text
|
|
|
|
return items
|
|
|
|
|
2015-08-27 15:41:47 +02:00
|
|
|
def writeDoc(doc, src, dest):
|
2016-06-30 15:54:36 +02:00
|
|
|
comment = etree.Comment(sComment % (src, os.path.basename(sys.argv[0])))
|
2015-08-27 15:41:47 +02:00
|
|
|
doc.getroot().insert( 0, comment )
|
|
|
|
dir = os.path.dirname( dest )
|
|
|
|
try: os.makedirs( dir )
|
|
|
|
except: pass
|
2020-06-15 18:40:29 +02:00
|
|
|
content = etree.tostring( doc, pretty_print=True, encoding="utf-8", xml_declaration=True ) \
|
|
|
|
.decode('utf-8' )
|
|
|
|
# print('writing:', content)
|
|
|
|
with open( dest, "w" ) as out:
|
|
|
|
out.write( content )
|
2015-08-27 15:41:47 +02:00
|
|
|
|
2019-12-31 07:06:24 +01:00
|
|
|
def exitWithFormatError(engSet, otherSet, name, path):
|
|
|
|
exitWithError( 'formats set mismatch: ' + str(engSet) \
|
|
|
|
+ ' vs ' + str(otherSet) + '; ' + name \
|
|
|
|
+ ' in file ' + path )
|
|
|
|
|
2015-08-27 15:41:47 +02:00
|
|
|
def checkOrConvertString(engNames, elem, verbose):
|
|
|
|
name = elem.get('name')
|
2016-01-24 06:59:31 +01:00
|
|
|
if not elem.text:
|
2019-12-31 07:06:24 +01:00
|
|
|
exitWithError( 'elem' + name + " is empty" )
|
2016-01-24 06:59:31 +01:00
|
|
|
elif not name in engNames or elem.text.startswith(s_prefix):
|
2015-08-27 15:41:47 +02:00
|
|
|
ok = False
|
|
|
|
elif not 'string' == engNames[name]['type']:
|
|
|
|
if 'plurals' == engNames[name]['type']:
|
|
|
|
if sameAsEnglishPlural( engNames, elem ):
|
|
|
|
ok = False
|
|
|
|
else:
|
|
|
|
elem.tag = 'plurals'
|
|
|
|
item = etree.Element("item")
|
|
|
|
item.text = elem.text
|
|
|
|
elem.text = None
|
|
|
|
item.set('quantity', 'other')
|
|
|
|
elem.append( item )
|
2020-06-15 18:40:29 +02:00
|
|
|
if verbose: print( 'translated string', name, 'to plural')
|
2015-08-27 15:41:47 +02:00
|
|
|
ok = True
|
2015-03-27 15:26:51 +01:00
|
|
|
else:
|
2015-08-27 15:41:47 +02:00
|
|
|
ok = False
|
|
|
|
elif sameOrSameWithPrefix(engNames[name]['string'], elem.text ):
|
2020-06-15 18:40:29 +02:00
|
|
|
if verbose: print( "Same as english: name: %s; text: %s" % (name, elem.text))
|
2015-08-27 15:41:47 +02:00
|
|
|
ok = False
|
|
|
|
else:
|
|
|
|
ok = True
|
|
|
|
return ok
|
2015-03-28 05:53:37 +01:00
|
|
|
|
2019-12-31 07:06:24 +01:00
|
|
|
def checkAndCopy( parser, engNames, engFormats, src, dest, verbose ):
|
2015-08-27 15:41:47 +02:00
|
|
|
doc = etree.parse(src, parser)
|
|
|
|
|
|
|
|
# strings
|
|
|
|
for elem in doc.findall('string'):
|
|
|
|
if not checkOrConvertString(engNames, elem, verbose):
|
|
|
|
elem.getparent().remove(elem)
|
|
|
|
|
|
|
|
for elem in doc.findall('plurals'):
|
2016-01-24 06:59:31 +01:00
|
|
|
if not checkPlurals(engNames, elem, src, verbose):
|
2020-06-15 21:58:23 +02:00
|
|
|
# print('checkAndCopy(): removing {}'.format(elem))
|
2015-08-27 15:41:47 +02:00
|
|
|
elem.getparent().remove(elem)
|
|
|
|
|
2020-06-15 21:58:23 +02:00
|
|
|
# Languages don't have to provide the same set of plurals items:
|
|
|
|
# one, zero, few, other etc. (My English doesn't include "few" so
|
|
|
|
# far.) What can't happen is for there to be a format string in the
|
|
|
|
# totality of them that's not in the totality of English, or for
|
|
|
|
# there to be internal inconsistencies among their format strings,
|
|
|
|
# e.g. a %1$d in "one" and a %1$s in "other"
|
2019-12-31 07:06:24 +01:00
|
|
|
formats = getFormats( doc, src )
|
|
|
|
for name in formats:
|
2020-06-15 21:58:23 +02:00
|
|
|
if not engFormats[name].hasAll( formats[name] ):
|
2019-12-31 07:06:24 +01:00
|
|
|
exitWithFormatError( engFormats[name], formats[name], name, dest )
|
|
|
|
|
2015-08-27 15:41:47 +02:00
|
|
|
writeDoc(doc, src, dest)
|
2015-03-27 15:26:51 +01:00
|
|
|
|
2020-06-15 21:58:23 +02:00
|
|
|
class FormatSet():
|
|
|
|
def __init__(self):
|
|
|
|
self.fmts = {
|
|
|
|
's': set(),
|
|
|
|
'd': set(),
|
|
|
|
}
|
|
|
|
|
|
|
|
def add(self, fmt):
|
|
|
|
assert(len(fmt) == 4)
|
|
|
|
typ = fmt[3]
|
|
|
|
indx = int(fmt[1])
|
|
|
|
# print('FormatSet.add({},{})'.format(fmt, typ))
|
|
|
|
self.fmts[typ].add(indx)
|
|
|
|
|
|
|
|
def append( self, ps ):
|
|
|
|
for ch in ['s', 'd']:
|
|
|
|
self.fmts[ch].update( ps.fmts[ch] )
|
|
|
|
|
|
|
|
# assert don't have any indices in BOTH sets
|
|
|
|
def check(self):
|
|
|
|
isOK = True
|
|
|
|
if self.fmts['d'].intersection(self.fmts['s']):
|
|
|
|
isOK = False
|
|
|
|
print('error!!!: same index in both: {}'.format(self))
|
|
|
|
return isOK
|
|
|
|
|
|
|
|
def hasAll(self, other):
|
|
|
|
result = True
|
|
|
|
for ch in ['s', 'd']:
|
|
|
|
result = result and 0 == len(other.fmts[ch] - self.fmts[ch])
|
|
|
|
return result
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return '{}'.format(self.fmts)
|
|
|
|
|
2019-12-31 07:06:24 +01:00
|
|
|
def setForElem( elem, name ):
|
2020-06-15 21:58:23 +02:00
|
|
|
result = FormatSet()
|
2019-12-31 07:06:24 +01:00
|
|
|
splits = re.split( g_formatsPat, elem.text )
|
|
|
|
nParts = len(splits)
|
2020-06-15 21:58:23 +02:00
|
|
|
# print( 'setForElem({}): text: {}; nParts: {}'.format( name, elem.text, nParts ))
|
2019-12-31 07:06:24 +01:00
|
|
|
if 1 < nParts:
|
|
|
|
for ii in range(nParts):
|
|
|
|
part = splits[ii]
|
|
|
|
if re.match( g_formatsPat, part ):
|
|
|
|
result.add( part )
|
2020-06-15 21:58:23 +02:00
|
|
|
# print( 'setForElem({}) => {}'.format( name, result))
|
2019-12-31 07:06:24 +01:00
|
|
|
return result
|
|
|
|
|
|
|
|
def getFormats( doc, path ):
|
|
|
|
result = {}
|
2020-06-15 21:58:23 +02:00
|
|
|
typ = 'string'
|
|
|
|
for elem in doc.findall(typ):
|
|
|
|
name = typ + '/' + elem.get('name')
|
2019-12-31 07:06:24 +01:00
|
|
|
result[name] = setForElem( elem, name )
|
2020-06-15 21:58:23 +02:00
|
|
|
|
|
|
|
typ = 'plurals'
|
|
|
|
for elem in doc.findall(typ):
|
|
|
|
name = typ + '/' + elem.get('name')
|
|
|
|
pluralsSet = FormatSet()
|
2019-12-31 07:06:24 +01:00
|
|
|
for item in elem.findall('item'):
|
|
|
|
quantity = item.get('quantity')
|
|
|
|
if not item.text or 0 == len(item.text):
|
2020-06-15 21:58:23 +02:00
|
|
|
exitWithError( '{} has empty quantity {} in file {}'.format( name, quantity, lang ) )
|
2019-12-31 07:06:24 +01:00
|
|
|
else:
|
2020-06-15 21:58:23 +02:00
|
|
|
pluralsSet.append(setForElem( item, name ))
|
|
|
|
if not pluralsSet.check():
|
|
|
|
exitWithError( '{} has overlapping sets: {}'.format(name, pluralsSet))
|
|
|
|
result[name] = pluralsSet
|
|
|
|
# print( 'getFormats({}) => {}'.format( path, result ) )
|
2019-12-31 07:06:24 +01:00
|
|
|
return result
|
|
|
|
|
2015-03-27 15:26:51 +01:00
|
|
|
def main():
|
|
|
|
# add these via params later
|
|
|
|
excepts = ['values-ca_PS', 'values-ba_CK']
|
2016-01-24 06:59:31 +01:00
|
|
|
verboses = ['values-ja']
|
2020-01-13 21:10:40 +01:00
|
|
|
srcFiles = []
|
2015-03-27 15:26:51 +01:00
|
|
|
|
2016-06-30 15:54:36 +02:00
|
|
|
try:
|
2020-01-13 21:10:40 +01:00
|
|
|
pairs, rest = getopt.getopt(sys.argv[1:], "k:f:")
|
2016-06-30 15:54:36 +02:00
|
|
|
for option, value in pairs:
|
|
|
|
if option == '-k': excepts += value.split(' ')
|
2020-01-13 21:10:40 +01:00
|
|
|
if option == '-f': srcFiles.append(value)
|
2016-06-30 15:54:36 +02:00
|
|
|
else: usage()
|
|
|
|
except:
|
2020-06-15 18:40:29 +02:00
|
|
|
print( "Unexpected error:", sys.exc_info()[0])
|
2016-06-30 15:54:36 +02:00
|
|
|
usage()
|
|
|
|
|
2015-03-27 15:26:51 +01:00
|
|
|
# summarize the english file
|
|
|
|
wd = os.path.dirname(sys.argv[0])
|
2017-01-18 16:05:18 +01:00
|
|
|
path = wd + '/../app/src/main/res/values/strings.xml'
|
2015-03-27 15:26:51 +01:00
|
|
|
|
|
|
|
parser = etree.XMLParser(remove_blank_text=True, encoding="utf-8")
|
2016-01-24 06:59:31 +01:00
|
|
|
engDoc = etree.parse(path, parser)
|
2019-12-31 07:06:24 +01:00
|
|
|
engFormats = getFormats( engDoc, path )
|
|
|
|
|
|
|
|
engNames = {}
|
2015-03-27 15:26:51 +01:00
|
|
|
for typ in ['string', 'plurals']:
|
2016-01-24 06:59:31 +01:00
|
|
|
for elem in engDoc.findall(typ):
|
2015-03-27 15:26:51 +01:00
|
|
|
name = elem.get('name')
|
2015-03-28 05:37:29 +01:00
|
|
|
item = { 'type' : typ }
|
|
|
|
if typ == 'string':
|
|
|
|
item['string'] = elem.text
|
|
|
|
else:
|
|
|
|
item['strings'] = loadPlural(elem)
|
|
|
|
engNames[name] = item
|
2020-06-15 18:40:29 +02:00
|
|
|
# print( engNames)
|
2015-03-27 15:26:51 +01:00
|
|
|
|
2020-01-13 21:10:40 +01:00
|
|
|
# if -f option not used, iterate over src files to collect them all
|
|
|
|
if not srcFiles:
|
|
|
|
for subdir, dirs, files in os.walk('res_src'):
|
|
|
|
for file in [file for file in files if file == "strings.xml"]:
|
|
|
|
path = "%s/%s" % (subdir, file)
|
|
|
|
if path:
|
|
|
|
for excpt in excepts:
|
|
|
|
if path and excpt in path:
|
|
|
|
path = None
|
|
|
|
if path:
|
|
|
|
srcFiles.append(path)
|
|
|
|
|
|
|
|
# Finally, do the work
|
|
|
|
for path in srcFiles:
|
|
|
|
verbose = 0 == len(verboses) or 0 < len([verb for verb in verboses if verb in path])
|
|
|
|
dest = path.replace( 'res_src', 'app/src/main/res', 1 )
|
|
|
|
checkAndCopy( parser, engNames, engFormats, path, dest, verbose )
|
2015-03-27 15:26:51 +01:00
|
|
|
|
|
|
|
##############################################################################
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|