xwords/xwords4/android/scripts/copy-strings.py

334 lines
11 KiB
Python
Raw Normal View History

2020-06-15 09:40:29 -07:00
#!/usr/bin/python3
# Go through all the res_src strings.xml files, and copy them over
# into the world where they'll get used in a build. This is meant to
# allow them to be built-in as an alternative to the
# locutils/downloadable system.
import re, sys, os, getopt
from lxml import etree
s_prefix = 'XLATE ME: '
# languages in which it's ok to make a standalone quantity="one" into
# quantity="other"
g_oneToOthers = ['values-ja']
g_formatsPat = re.compile( '(%\d\$[sd])', re.DOTALL | re.MULTILINE )
sComment = """
DO NOT EDIT THIS FILE!!!!
It was generated (from %s, by %s).
Any changes you make to it will be lost.
"""
def exitWithError(msg):
2020-06-15 09:40:29 -07:00
print( 'ERROR:', msg )
sys.exit(1)
def usage():
2020-06-15 09:40:29 -07:00
print( "usage:", sys.argv[0], '[-k <list-o-dirs>] [-f inFile]')
sys.exit(1)
def sameOrSameWithPrefix( str1, str2 ):
result = str1 == str2
if not result:
if str1.startswith(s_prefix):
result = str1[len(s_prefix):] == str2
return result
def sameAsEnglishPlural(engNames, strElem):
strs = engNames[strElem.get('name')]['strings']
str = strElem.text
result = 1 == len(strs) and 'other' in strs \
and sameOrSameWithPrefix( str, strs['other'] )
return result
# If has a one and no (or empty) other, convert the one to other
def tryConvertOne( plurals ):
quantities = {}
for item in plurals.getchildren():
quantities[item.get("quantity")] = item
use = False
if "one" in quantities:
if "other" in quantities:
text = quantities['other'].text
if not text or 0 == len(text):
use = True
else:
use = True
if use:
2020-06-15 09:40:29 -07:00
print( "converting", plurals.get('name'))
plurals.remove(quantities['other'])
quantities['one'].set('quantity', 'other')
def pluralsIsBogus(engNames, plurals, verbose):
haveOther = False # will crash without one
bogus = False
for item in plurals.getchildren():
text = item.text
if not text or 0 == len(text):
bogus = True
if verbose:
quantity = item.get("quantity")
2020-06-15 09:40:29 -07:00
print( 'dropping plurals {name} because of empty/missing \"{quantity}\"' \
.format(name=plurals.get("name"), quantity=quantity ))
break
if item.get("quantity") == "other":
haveOther = True
if verbose and not bogus and not haveOther:
2020-06-15 09:40:29 -07:00
print( "dropping plurals {name} because no \"other\" quantity" \
.format(name=plurals.get("name")))
return bogus or not haveOther
def pluralsIsSame(engNames, plurals):
different = False # all children duplicates of English
engItem = engNames[plurals.get('name')]
strings = engItem['strings']
for item in plurals.getchildren():
text = item.text
if not text or 0 == len(text):
exitWithError( "bogus empty plurals item in " + plurals.get('name'))
engItem = engItem
quantity = item.get('quantity')
if quantity in strings:
if sameOrSameWithPrefix( strings[quantity], text ):
different = True
return different
# path will be something like res_src/values-pt/strings.xml. We want
# the next-to-last entry.
def valuesDir( path ):
splits = path.split('/')
return splits[-2]
def checkPlurals( engNames, elem, src, verbose ):
name = elem.get('name')
ok = True
if not name in engNames or not 'plurals' == engNames[name]['type']:
2020-06-15 09:40:29 -07:00
print( 'plurals', name, 'not in engNames or not a plurals there')
ok = False
if ok and valuesDir(src) in g_oneToOthers:
tryConvertOne( elem )
if ok and pluralsIsBogus(engNames, elem, verbose):
ok = False
if ok and pluralsIsSame(engNames, elem):
ok = False
if ok:
for item in elem.getchildren():
if 0 == len(item.text):
ok = False
exitWithError( 'bad empty item ' + name )
return ok
def loadPlural(plural):
items = {}
for child in plural.getchildren():
items[child.get('quantity')] = child.text
return items
def writeDoc(doc, src, dest):
comment = etree.Comment(sComment % (src, os.path.basename(sys.argv[0])))
doc.getroot().insert( 0, comment )
dir = os.path.dirname( dest )
try: os.makedirs( dir )
except: pass
2020-06-15 09:40:29 -07:00
content = etree.tostring( doc, pretty_print=True, encoding="utf-8", xml_declaration=True ) \
.decode('utf-8' )
# print('writing:', content)
with open( dest, "w" ) as out:
out.write( content )
def exitWithFormatError(engSet, otherSet, name, path):
exitWithError( 'formats set mismatch: ' + str(engSet) \
+ ' vs ' + str(otherSet) + '; ' + name \
+ ' in file ' + path )
def checkOrConvertString(engNames, elem, verbose):
name = elem.get('name')
if not elem.text:
exitWithError( 'elem' + name + " is empty" )
elif not name in engNames or elem.text.startswith(s_prefix):
ok = False
elif not 'string' == engNames[name]['type']:
if 'plurals' == engNames[name]['type']:
if sameAsEnglishPlural( engNames, elem ):
ok = False
else:
elem.tag = 'plurals'
item = etree.Element("item")
item.text = elem.text
elem.text = None
item.set('quantity', 'other')
elem.append( item )
2020-06-15 09:40:29 -07:00
if verbose: print( 'translated string', name, 'to plural')
ok = True
else:
ok = False
elif sameOrSameWithPrefix(engNames[name]['string'], elem.text ):
2020-06-15 09:40:29 -07:00
if verbose: print( "Same as english: name: %s; text: %s" % (name, elem.text))
ok = False
else:
ok = True
return ok
2015-03-27 21:53:37 -07:00
def checkAndCopy( parser, engNames, engFormats, src, dest, verbose ):
doc = etree.parse(src, parser)
# strings
for elem in doc.findall('string'):
if not checkOrConvertString(engNames, elem, verbose):
elem.getparent().remove(elem)
for elem in doc.findall('plurals'):
if not checkPlurals(engNames, elem, src, verbose):
# print('checkAndCopy(): removing {}'.format(elem))
elem.getparent().remove(elem)
# Languages don't have to provide the same set of plurals items:
# one, zero, few, other etc. (My English doesn't include "few" so
# far.) What can't happen is for there to be a format string in the
# totality of them that's not in the totality of English, or for
# there to be internal inconsistencies among their format strings,
# e.g. a %1$d in "one" and a %1$s in "other"
formats = getFormats( doc, src )
for name in formats:
if not engFormats[name].hasAll( formats[name] ):
exitWithFormatError( engFormats[name], formats[name], name, dest )
writeDoc(doc, src, dest)
class FormatSet():
def __init__(self):
self.fmts = {
's': set(),
'd': set(),
}
def add(self, fmt):
assert(len(fmt) == 4)
typ = fmt[3]
indx = int(fmt[1])
# print('FormatSet.add({},{})'.format(fmt, typ))
self.fmts[typ].add(indx)
def append( self, ps ):
for ch in ['s', 'd']:
self.fmts[ch].update( ps.fmts[ch] )
# assert don't have any indices in BOTH sets
def check(self):
isOK = True
if self.fmts['d'].intersection(self.fmts['s']):
isOK = False
print('error!!!: same index in both: {}'.format(self))
return isOK
def hasAll(self, other):
result = True
for ch in ['s', 'd']:
result = result and 0 == len(other.fmts[ch] - self.fmts[ch])
return result
def __str__(self):
return '{}'.format(self.fmts)
def setForElem( elem, name ):
result = FormatSet()
splits = re.split( g_formatsPat, elem.text )
nParts = len(splits)
# print( 'setForElem({}): text: {}; nParts: {}'.format( name, elem.text, nParts ))
if 1 < nParts:
for ii in range(nParts):
part = splits[ii]
if re.match( g_formatsPat, part ):
result.add( part )
# print( 'setForElem({}) => {}'.format( name, result))
return result
def getFormats( doc, path ):
result = {}
typ = 'string'
for elem in doc.findall(typ):
name = typ + '/' + elem.get('name')
result[name] = setForElem( elem, name )
typ = 'plurals'
for elem in doc.findall(typ):
name = typ + '/' + elem.get('name')
pluralsSet = FormatSet()
for item in elem.findall('item'):
quantity = item.get('quantity')
if not item.text or 0 == len(item.text):
exitWithError( '{} has empty quantity {} in file {}'.format( name, quantity, lang ) )
else:
pluralsSet.append(setForElem( item, name ))
if not pluralsSet.check():
exitWithError( '{} has overlapping sets: {}'.format(name, pluralsSet))
result[name] = pluralsSet
# print( 'getFormats({}) => {}'.format( path, result ) )
return result
def main():
# add these via params later
excepts = ['values-ca_PS', 'values-ba_CK']
verboses = ['values-ja']
srcFiles = []
try:
pairs, rest = getopt.getopt(sys.argv[1:], "k:f:")
for option, value in pairs:
if option == '-k': excepts += value.split(' ')
if option == '-f': srcFiles.append(value)
else: usage()
except:
2020-06-15 09:40:29 -07:00
print( "Unexpected error:", sys.exc_info()[0])
usage()
# summarize the english file
wd = os.path.dirname(sys.argv[0])
2017-01-18 07:05:18 -08:00
path = wd + '/../app/src/main/res/values/strings.xml'
parser = etree.XMLParser(remove_blank_text=True, encoding="utf-8")
engDoc = etree.parse(path, parser)
engFormats = getFormats( engDoc, path )
engNames = {}
for typ in ['string', 'plurals']:
for elem in engDoc.findall(typ):
name = elem.get('name')
item = { 'type' : typ }
if typ == 'string':
item['string'] = elem.text
else:
item['strings'] = loadPlural(elem)
engNames[name] = item
2020-06-15 09:40:29 -07:00
# print( engNames)
# if -f option not used, iterate over src files to collect them all
if not srcFiles:
for subdir, dirs, files in os.walk('res_src'):
for file in [file for file in files if file == "strings.xml"]:
path = "%s/%s" % (subdir, file)
if path:
for excpt in excepts:
if path and excpt in path:
path = None
if path:
srcFiles.append(path)
# Finally, do the work
for path in srcFiles:
verbose = 0 == len(verboses) or 0 < len([verb for verb in verboses if verb in path])
dest = path.replace( 'res_src', 'app/src/main/res', 1 )
checkAndCopy( parser, engNames, engFormats, path, dest, verbose )
##############################################################################
if __name__ == '__main__':
main()