mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2025-01-22 07:28:16 +01:00
2f339450f3
Change copy-strings.py to insist that no language provide a format string not also found in English (since parameters to match all English format string are sure to exist, but other languages are free NOT to match them.) It's ok for other languages to provide "few", for example, when English doesn't.
333 lines
11 KiB
Python
Executable file
333 lines
11 KiB
Python
Executable file
#!/usr/bin/python3
|
|
|
|
# Go through all the res_src strings.xml files, and copy them over
|
|
# into the world where they'll get used in a build. This is meant to
|
|
# allow them to be built-in as an alternative to the
|
|
# locutils/downloadable system.
|
|
|
|
import re, sys, os, getopt
|
|
from lxml import etree
|
|
|
|
s_prefix = 'XLATE ME: '
|
|
# languages in which it's ok to make a standalone quantity="one" into
|
|
# quantity="other"
|
|
g_oneToOthers = ['values-ja']
|
|
g_formatsPat = re.compile( '(%\d\$[sd])', re.DOTALL | re.MULTILINE )
|
|
|
|
sComment = """
|
|
DO NOT EDIT THIS FILE!!!!
|
|
It was generated (from %s, by %s).
|
|
Any changes you make to it will be lost.
|
|
"""
|
|
|
|
def exitWithError(msg):
|
|
print( 'ERROR:', msg )
|
|
sys.exit(1)
|
|
|
|
def usage():
|
|
print( "usage:", sys.argv[0], '[-k <list-o-dirs>] [-f inFile]')
|
|
sys.exit(1)
|
|
|
|
def sameOrSameWithPrefix( str1, str2 ):
|
|
result = str1 == str2
|
|
if not result:
|
|
if str1.startswith(s_prefix):
|
|
result = str1[len(s_prefix):] == str2
|
|
return result
|
|
|
|
def sameAsEnglishPlural(engNames, strElem):
|
|
strs = engNames[strElem.get('name')]['strings']
|
|
str = strElem.text
|
|
result = 1 == len(strs) and 'other' in strs \
|
|
and sameOrSameWithPrefix( str, strs['other'] )
|
|
return result
|
|
|
|
# If has a one and no (or empty) other, convert the one to other
|
|
def tryConvertOne( plurals ):
|
|
quantities = {}
|
|
for item in plurals.getchildren():
|
|
quantities[item.get("quantity")] = item
|
|
|
|
use = False
|
|
if "one" in quantities:
|
|
if "other" in quantities:
|
|
text = quantities['other'].text
|
|
if not text or 0 == len(text):
|
|
use = True
|
|
else:
|
|
use = True
|
|
|
|
if use:
|
|
print( "converting", plurals.get('name'))
|
|
plurals.remove(quantities['other'])
|
|
quantities['one'].set('quantity', 'other')
|
|
|
|
def pluralsIsBogus(engNames, plurals, verbose):
|
|
haveOther = False # will crash without one
|
|
bogus = False
|
|
for item in plurals.getchildren():
|
|
text = item.text
|
|
if not text or 0 == len(text):
|
|
bogus = True
|
|
if verbose:
|
|
quantity = item.get("quantity")
|
|
print( 'dropping plurals {name} because of empty/missing \"{quantity}\"' \
|
|
.format(name=plurals.get("name"), quantity=quantity ))
|
|
break
|
|
if item.get("quantity") == "other":
|
|
haveOther = True
|
|
|
|
if verbose and not bogus and not haveOther:
|
|
print( "dropping plurals {name} because no \"other\" quantity" \
|
|
.format(name=plurals.get("name")))
|
|
|
|
return bogus or not haveOther
|
|
|
|
def pluralsIsSame(engNames, plurals):
|
|
different = False # all children duplicates of English
|
|
engItem = engNames[plurals.get('name')]
|
|
strings = engItem['strings']
|
|
for item in plurals.getchildren():
|
|
text = item.text
|
|
if not text or 0 == len(text):
|
|
exitWithError( "bogus empty plurals item in " + plurals.get('name'))
|
|
engItem = engItem
|
|
|
|
quantity = item.get('quantity')
|
|
if quantity in strings:
|
|
if sameOrSameWithPrefix( strings[quantity], text ):
|
|
different = True
|
|
return different
|
|
|
|
# path will be something like res_src/values-pt/strings.xml. We want
|
|
# the next-to-last entry.
|
|
def valuesDir( path ):
|
|
splits = path.split('/')
|
|
return splits[-2]
|
|
|
|
def checkPlurals( engNames, elem, src, verbose ):
|
|
name = elem.get('name')
|
|
ok = True
|
|
if not name in engNames or not 'plurals' == engNames[name]['type']:
|
|
print( 'plurals', name, 'not in engNames or not a plurals there')
|
|
ok = False
|
|
|
|
if ok and valuesDir(src) in g_oneToOthers:
|
|
tryConvertOne( elem )
|
|
|
|
if ok and pluralsIsBogus(engNames, elem, verbose):
|
|
ok = False
|
|
if ok and pluralsIsSame(engNames, elem):
|
|
ok = False
|
|
if ok:
|
|
for item in elem.getchildren():
|
|
if 0 == len(item.text):
|
|
ok = False
|
|
exitWithError( 'bad empty item ' + name )
|
|
return ok
|
|
|
|
def loadPlural(plural):
|
|
items = {}
|
|
for child in plural.getchildren():
|
|
items[child.get('quantity')] = child.text
|
|
return items
|
|
|
|
def writeDoc(doc, src, dest):
|
|
comment = etree.Comment(sComment % (src, os.path.basename(sys.argv[0])))
|
|
doc.getroot().insert( 0, comment )
|
|
dir = os.path.dirname( dest )
|
|
try: os.makedirs( dir )
|
|
except: pass
|
|
content = etree.tostring( doc, pretty_print=True, encoding="utf-8", xml_declaration=True ) \
|
|
.decode('utf-8' )
|
|
# print('writing:', content)
|
|
with open( dest, "w" ) as out:
|
|
out.write( content )
|
|
|
|
def exitWithFormatError(engSet, otherSet, name, path):
|
|
exitWithError( 'formats set mismatch: ' + str(engSet) \
|
|
+ ' vs ' + str(otherSet) + '; ' + name \
|
|
+ ' in file ' + path )
|
|
|
|
def checkOrConvertString(engNames, elem, verbose):
|
|
name = elem.get('name')
|
|
if not elem.text:
|
|
exitWithError( 'elem' + name + " is empty" )
|
|
elif not name in engNames or elem.text.startswith(s_prefix):
|
|
ok = False
|
|
elif not 'string' == engNames[name]['type']:
|
|
if 'plurals' == engNames[name]['type']:
|
|
if sameAsEnglishPlural( engNames, elem ):
|
|
ok = False
|
|
else:
|
|
elem.tag = 'plurals'
|
|
item = etree.Element("item")
|
|
item.text = elem.text
|
|
elem.text = None
|
|
item.set('quantity', 'other')
|
|
elem.append( item )
|
|
if verbose: print( 'translated string', name, 'to plural')
|
|
ok = True
|
|
else:
|
|
ok = False
|
|
elif sameOrSameWithPrefix(engNames[name]['string'], elem.text ):
|
|
if verbose: print( "Same as english: name: %s; text: %s" % (name, elem.text))
|
|
ok = False
|
|
else:
|
|
ok = True
|
|
return ok
|
|
|
|
def checkAndCopy( parser, engNames, engFormats, src, dest, verbose ):
|
|
doc = etree.parse(src, parser)
|
|
|
|
# strings
|
|
for elem in doc.findall('string'):
|
|
if not checkOrConvertString(engNames, elem, verbose):
|
|
elem.getparent().remove(elem)
|
|
|
|
for elem in doc.findall('plurals'):
|
|
if not checkPlurals(engNames, elem, src, verbose):
|
|
# print('checkAndCopy(): removing {}'.format(elem))
|
|
elem.getparent().remove(elem)
|
|
|
|
# Languages don't have to provide the same set of plurals items:
|
|
# one, zero, few, other etc. (My English doesn't include "few" so
|
|
# far.) What can't happen is for there to be a format string in the
|
|
# totality of them that's not in the totality of English, or for
|
|
# there to be internal inconsistencies among their format strings,
|
|
# e.g. a %1$d in "one" and a %1$s in "other"
|
|
formats = getFormats( doc, src )
|
|
for name in formats:
|
|
if not engFormats[name].hasAll( formats[name] ):
|
|
exitWithFormatError( engFormats[name], formats[name], name, dest )
|
|
|
|
writeDoc(doc, src, dest)
|
|
|
|
class FormatSet():
|
|
def __init__(self):
|
|
self.fmts = {
|
|
's': set(),
|
|
'd': set(),
|
|
}
|
|
|
|
def add(self, fmt):
|
|
assert(len(fmt) == 4)
|
|
typ = fmt[3]
|
|
indx = int(fmt[1])
|
|
# print('FormatSet.add({},{})'.format(fmt, typ))
|
|
self.fmts[typ].add(indx)
|
|
|
|
def append( self, ps ):
|
|
for ch in ['s', 'd']:
|
|
self.fmts[ch].update( ps.fmts[ch] )
|
|
|
|
# assert don't have any indices in BOTH sets
|
|
def check(self):
|
|
isOK = True
|
|
if self.fmts['d'].intersection(self.fmts['s']):
|
|
isOK = False
|
|
print('error!!!: same index in both: {}'.format(self))
|
|
return isOK
|
|
|
|
def hasAll(self, other):
|
|
result = True
|
|
for ch in ['s', 'd']:
|
|
result = result and 0 == len(other.fmts[ch] - self.fmts[ch])
|
|
return result
|
|
|
|
def __str__(self):
|
|
return '{}'.format(self.fmts)
|
|
|
|
def setForElem( elem, name ):
|
|
result = FormatSet()
|
|
splits = re.split( g_formatsPat, elem.text )
|
|
nParts = len(splits)
|
|
# print( 'setForElem({}): text: {}; nParts: {}'.format( name, elem.text, nParts ))
|
|
if 1 < nParts:
|
|
for ii in range(nParts):
|
|
part = splits[ii]
|
|
if re.match( g_formatsPat, part ):
|
|
result.add( part )
|
|
# print( 'setForElem({}) => {}'.format( name, result))
|
|
return result
|
|
|
|
def getFormats( doc, path ):
|
|
result = {}
|
|
typ = 'string'
|
|
for elem in doc.findall(typ):
|
|
name = typ + '/' + elem.get('name')
|
|
result[name] = setForElem( elem, name )
|
|
|
|
typ = 'plurals'
|
|
for elem in doc.findall(typ):
|
|
name = typ + '/' + elem.get('name')
|
|
pluralsSet = FormatSet()
|
|
for item in elem.findall('item'):
|
|
quantity = item.get('quantity')
|
|
if not item.text or 0 == len(item.text):
|
|
exitWithError( '{} has empty quantity {} in file {}'.format( name, quantity, lang ) )
|
|
else:
|
|
pluralsSet.append(setForElem( item, name ))
|
|
if not pluralsSet.check():
|
|
exitWithError( '{} has overlapping sets: {}'.format(name, pluralsSet))
|
|
result[name] = pluralsSet
|
|
# print( 'getFormats({}) => {}'.format( path, result ) )
|
|
return result
|
|
|
|
def main():
|
|
# add these via params later
|
|
excepts = ['values-ca_PS', 'values-ba_CK']
|
|
verboses = ['values-ja']
|
|
srcFiles = []
|
|
|
|
try:
|
|
pairs, rest = getopt.getopt(sys.argv[1:], "k:f:")
|
|
for option, value in pairs:
|
|
if option == '-k': excepts += value.split(' ')
|
|
if option == '-f': srcFiles.append(value)
|
|
else: usage()
|
|
except:
|
|
print( "Unexpected error:", sys.exc_info()[0])
|
|
usage()
|
|
|
|
# summarize the english file
|
|
wd = os.path.dirname(sys.argv[0])
|
|
path = wd + '/../app/src/main/res/values/strings.xml'
|
|
|
|
parser = etree.XMLParser(remove_blank_text=True, encoding="utf-8")
|
|
engDoc = etree.parse(path, parser)
|
|
engFormats = getFormats( engDoc, path )
|
|
|
|
engNames = {}
|
|
for typ in ['string', 'plurals']:
|
|
for elem in engDoc.findall(typ):
|
|
name = elem.get('name')
|
|
item = { 'type' : typ }
|
|
if typ == 'string':
|
|
item['string'] = elem.text
|
|
else:
|
|
item['strings'] = loadPlural(elem)
|
|
engNames[name] = item
|
|
# print( engNames)
|
|
|
|
# if -f option not used, iterate over src files to collect them all
|
|
if not srcFiles:
|
|
for subdir, dirs, files in os.walk('res_src'):
|
|
for file in [file for file in files if file == "strings.xml"]:
|
|
path = "%s/%s" % (subdir, file)
|
|
if path:
|
|
for excpt in excepts:
|
|
if path and excpt in path:
|
|
path = None
|
|
if path:
|
|
srcFiles.append(path)
|
|
|
|
# Finally, do the work
|
|
for path in srcFiles:
|
|
verbose = 0 == len(verboses) or 0 < len([verb for verb in verboses if verb in path])
|
|
dest = path.replace( 'res_src', 'app/src/main/res', 1 )
|
|
checkAndCopy( parser, engNames, engFormats, path, dest, verbose )
|
|
|
|
##############################################################################
|
|
if __name__ == '__main__':
|
|
main()
|