noDRM_DeDRM_tools/Topaz_Tools/lib/stylexml2css.py

224 lines
7.7 KiB
Python
Raw Normal View History

2010-01-17 13:10:35 +01:00
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import with_statement
import csv
import sys
import os
import getopt
from struct import pack
from struct import unpack
class DocParser(object):
2010-01-19 13:11:59 +01:00
def __init__(self, flatxml, fontsize):
2010-01-17 13:10:35 +01:00
self.flatdoc = flatxml.split('\n')
2010-01-19 13:11:59 +01:00
self.fontsize = int(fontsize)
2010-01-17 13:10:35 +01:00
stags = {
'paragraph' : 'p',
'graphic' : '.graphic'
}
attr_val_map = {
2010-01-19 13:11:59 +01:00
'hang' : 'text-indent: ',
'indent' : 'text-indent: ',
'line-space' : 'line-height: ',
'margin-bottom' : 'margin-bottom: ',
'margin-left' : 'margin-left: ',
'margin-right' : 'margin-right: ',
'margin-top' : 'margin-top: ',
'space-after' : 'padding-bottom: ',
2010-01-17 13:10:35 +01:00
}
attr_str_map = {
'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
'align-left' : 'text-align: left;',
'align-right' : 'text-align: right;',
'align-justify' : 'text-align: justify;',
'display-inline' : 'display: inline;',
'pos-left' : 'text-align: left;',
'pos-right' : 'text-align: right;',
'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
}
# find tag if within pos to end inclusive
def findinDoc(self, tagpath, pos, end) :
result = None
docList = self.flatdoc
cnt = len(docList)
if end == -1 :
end = cnt
else:
end = min(cnt,end)
foundat = -1
for j in xrange(pos, end):
item = docList[j]
if item.find('=') >= 0:
2010-01-19 13:11:59 +01:00
(name, argres) = item.split('=',1)
2010-01-17 13:10:35 +01:00
else :
name = item
argres = ''
if name.endswith(tagpath) :
result = argres
foundat = j
break
return foundat, result
# return list of start positions for the tagpath
def posinDoc(self, tagpath):
startpos = []
pos = 0
res = ""
while res != None :
(foundpos, res) = self.findinDoc(tagpath, pos, -1)
if res != None :
startpos.append(foundpos)
pos = foundpos + 1
return startpos
def process(self):
2010-01-19 13:11:59 +01:00
classlst = ''
2010-01-17 13:10:35 +01:00
csspage = ''
# generate a list of each <style> starting point in the stylesheet
styleList= self.posinDoc('book.stylesheet.style')
stylecnt = len(styleList)
styleList.append(-1)
# process each style converting what you can
for j in xrange(stylecnt):
start = styleList[j]
end = styleList[j+1]
(pos, tag) = self.findinDoc('style._tag',start,end)
if tag == None :
(pos, tag) = self.findinDoc('style.type',start,end)
# Is this something we know how to convert to css
if tag in self.stags :
# get the style class
(pos, sclass) = self.findinDoc('style.class',start,end)
if sclass != None:
sclass = '.cl_' + sclass.lower()
else :
sclass = ''
# check for any "after class" specifiers
(pos, aftclass) = self.findinDoc('style._after_class',start,end)
if aftclass != None:
aftclass = '.cl_' + aftclass.lower()
else :
aftclass = ''
cssargs = {}
while True :
(pos, attr) = self.findinDoc('style.rule.attr', start, end)
(pos, val) = self.findinDoc('style.rule.value', start, end)
if attr == None : break
if (attr == 'display') or (attr == 'pos') or (attr == 'align'):
# handle text based attributess
attr = attr + '-' + val
if attr in self.attr_str_map :
cssargs[attr] = (self.attr_str_map[attr], '')
else :
# handle value based attributes
if attr in self.attr_val_map :
2010-01-19 13:11:59 +01:00
name = self.attr_val_map[attr]
scale = self.fontsize
if attr == 'line-space': scale = scale * 1.41
2010-01-17 13:10:35 +01:00
if not ((attr == 'hang') and (int(val) == 0)) :
ems = int(val)/scale
2010-01-19 13:11:59 +01:00
cssargs[attr] = (self.attr_val_map[attr], ems)
2010-01-17 13:10:35 +01:00
keep = True
start = pos + 1
# disable all of the after class tags until I figure out how to handle them
if aftclass != "" : keep = False
if keep :
# make sure line-space does not go below 1em
if 'line-space' in cssargs:
seg = cssargs['line-space'][0]
val = cssargs['line-space'][1]
if val < 1.0: val = 1.0
del cssargs['line-space']
2010-01-19 13:11:59 +01:00
cssargs['line-space'] = (self.attr_val_map['line-space'], val)
2010-01-17 13:10:35 +01:00
# handle modifications for css style hanging indents
if 'hang' in cssargs:
hseg = cssargs['hang'][0]
hval = cssargs['hang'][1]
del cssargs['hang']
2010-01-19 13:11:59 +01:00
cssargs['hang'] = (self.attr_val_map['hang'], -hval)
2010-01-17 13:10:35 +01:00
mval = 0
mseg = 'margin-left: '
if 'margin-left' in cssargs:
mseg = cssargs['margin-left'][0]
mval = cssargs['margin-left'][1]
mval = hval + mval
cssargs['margin-left'] = (mseg, mval)
if 'indent' in cssargs:
del cssargs['indent']
cssline = sclass + ' { '
for key in iter(cssargs):
mseg = cssargs[key][0]
mval = cssargs[key][1]
if mval == '':
cssline += mseg + ' '
else :
aseg = mseg + '%.1fem;' % mval
cssline += aseg + ' '
cssline += '}'
2010-01-19 13:11:59 +01:00
if sclass != '' :
classlst += sclass + '\n'
2010-01-17 13:10:35 +01:00
# handle special case of paragraph class used inside chapter heading
# and non-chapter headings
if sclass != '' :
ctype = sclass[4:7]
if ctype == 'ch1' :
csspage += 'h1' + cssline + '\n'
if ctype == 'ch2' :
csspage += 'h2' + cssline + '\n'
if ctype == 'ch3' :
csspage += 'h3' + cssline + '\n'
if ctype == 'h1-' :
csspage += 'h4' + cssline + '\n'
if ctype == 'h2-' :
csspage += 'h5' + cssline + '\n'
if ctype == 'h3_' :
csspage += 'h6' + cssline + '\n'
csspage += self.stags[tag] + cssline + '\n'
2010-01-19 13:11:59 +01:00
return csspage, classlst
2010-01-17 13:10:35 +01:00
2010-01-19 13:11:59 +01:00
def convert2CSS(flatxml, fontsize):
2010-01-17 13:10:35 +01:00
# create a document parser
2010-01-19 13:11:59 +01:00
dp = DocParser(flatxml, fontsize)
2010-01-17 13:10:35 +01:00
csspage = dp.process()
return csspage