topazscripts 1.7

This commit is contained in:
some_updates 2009-01-22 12:15:33 +00:00 committed by Apprentice Alf
parent 58e9c973ab
commit 66933f6972
6 changed files with 316 additions and 272 deletions

View file

@ -1,3 +1,12 @@
Changes in version 1.7
- gensvg.py has been improved so that the glyphs render exactly (ClarkNova)
- gensvg.py has fixed a render order "bug" that allowed some images to cover or hide text. (ClarkNova)
- change generated html to use external stylesheet via a link to "style.css"
- add missing <title> tag
- make xhtml compliant doctype and minor changes to write correct xhtml
- make divs that act as anchors be hidden visually and to take up 0 height and 0 width to prevent any impact on layout
- added support for new version of the <_span> tag called <span>
Changes in version 1.6
- support for books whose paragraphs have no styles
- support to run cmbtc_dump on Linux and Mac OSX provided you know your PID of your ipod or standalone Kindle

View file

@ -246,14 +246,20 @@ class PageParser(object):
'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
'word_semantic.lastWord' : (1, 'scalar_number', 0, 0),
'word' : (1, 'snippets', 1, 0),
'word.type' : (1, 'scalar_text', 0, 0),
'word.class' : (1, 'scalar_text', 0, 0),
'word' : (1, 'snippets', 1, 0),
'word.type' : (1, 'scalar_text', 0, 0),
'word.class' : (1, 'scalar_text', 0, 0),
'word.firstGlyph' : (1, 'scalar_number', 0, 0),
'word.lastGlyph' : (1, 'scalar_number', 0, 0),
'_span' : (1, 'snippets', 1, 0),
'_span.firstWord' : (1, 'scalar_number', 0, 0),
'-span.lastWord' : (1, 'scalar_number', 0, 0),
'span' : (1, 'snippets', 1, 0),
'span.firstWord' : (1, 'scalar_number', 0, 0),
'span.lastWord' : (1, 'scalar_number', 0, 0),
'extratokens' : (1, 'snippets', 1, 0),
'extratokens.type' : (1, 'scalar_text', 0, 0),
'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0),

View file

@ -119,6 +119,7 @@ class DocParser(object):
# this type of paragrph may be made up of multiple _spans, inline
# word monograms (images) and words with semantic meaning
# and now a new type "span" versus the old "_span"
# need to parse this type line by line
line = start + 1
@ -132,10 +133,10 @@ class DocParser(object):
(name, argres) = self.lineinDoc(line)
if name.endswith('_span.firstWord') :
if name.endswith('span.firstWord') :
first = int(argres)
(name, argres) = self.lineinDoc(line+1)
if not name.endswith('_span.lastWord'):
if not name.endswith('span.lastWord'):
print 'Error: - incorrect _span ordering inside paragraph'
last = int(argres)
for wordnum in xrange(first, last):
@ -175,7 +176,7 @@ class DocParser(object):
if pclass :
classres = ' class="' + pclass + '"'
br_lb = (regtype == 'fixed') or (regtype == 'chapterheading')
br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
handle_links = len(self.link_id) > 0
@ -317,7 +318,7 @@ class DocParser(object):
# set anchor for link target on this page
if not anchorSet and not first_para_continued:
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '">&nbsp</div>\n'
htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="' + self.id + '" title="pagetype_' + pagetype + '"></div>\n'
anchorSet = True
if regtype == 'graphic' :
@ -343,7 +344,7 @@ class DocParser(object):
htmlpage += '</' + tag + '>'
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem') :
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
ptype = 'full'
# check to see if this is a continution from the previous page
if first_para_continued :
@ -371,6 +372,27 @@ class DocParser(object):
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
elif (regtype == 'vertical') :
ptype = 'full'
if first_para_continued :
ptype = 'end'
first_para_continued = False
(pclass, pdesc) = self.getParaDescription(start,end)
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
elif (regtype == 'table') :
ptype = 'full'
if first_para_continued :
ptype = 'end'
first_para_continued = False
(pclass, pdesc) = self.getParaDescription(start,end)
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
print "Warnings - Table Conversions are notoriously poor"
print "Strongly recommend taking a screen capture image of the "
print "table in %s.svg and using it to replace this attempt at a table" % self.id
elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
@ -378,10 +400,10 @@ class DocParser(object):
else :
print 'Warning: Unknown region type', regtype
print 'Warning: region type', regtype
(pos, temp) = self.findinDoc('paragraph',start,end)
if temp:
print 'Treating this like a "text" region'
if pos != -1:
print ' is a "text" region'
regtype = 'fixed'
ptype = 'full'
# check to see if this is a continution from the previous page
@ -400,7 +422,7 @@ class DocParser(object):
else :
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
else :
print 'Treating this like a "graphic" region'
print ' is a "graphic" region'
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)

View file

@ -77,7 +77,8 @@ def main(argv):
htmlFileName = "book.html"
htmlstr = '<html>\n'
htmlstr = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n'
htmlstr += '<html>\n'
filenames = os.listdir(pageDir)
filenames = sorted(filenames)
@ -85,6 +86,7 @@ def main(argv):
print 'Processing ... '
htmlstr += '<head>\n'
htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n'
# process metadata and retrieve fontSize info
print ' ', 'metadata0000.dat'
@ -93,6 +95,8 @@ def main(argv):
metastr = decode_meta.getMetaData(fname)
file(xname, 'wb').write(metastr)
meta_array = decode_meta.getMetaArray(fname)
htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n'
htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
@ -120,11 +124,9 @@ def main(argv):
fname = os.path.join(bookDir,'other0000.dat')
xname = os.path.join(bookDir, 'style.css')
xmlstr = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
htmlstr += '<style>\n'
cssstr , classlst = stylexml2css.convert2CSS(xmlstr, fontsize, ph, pw)
file(xname, 'wb').write(cssstr)
htmlstr += cssstr
htmlstr += '</style>\n'
htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n'
htmlstr += '</head>\n<body>\n'
for filename in filenames:

View file

@ -10,301 +10,306 @@ import decode_meta
class GParser(object):
def __init__(self, flatxml):
self.flatdoc = flatxml.split('\n')
self.dpi = 1440
self.gh = self.getData('info.glyph.h')
self.gw = self.getData('info.glyph.w')
self.guse = self.getData('info.glyph.use')
self.count = len(self.guse)
self.gvtx = self.getData('info.glyph.vtx')
self.glen = self.getData('info.glyph.len')
self.gdpi = self.getData('info.glyph.dpi')
self.vx = self.getData('info.vtx.x')
self.vy = self.getData('info.vtx.y')
self.vlen = self.getData('info.len.n')
self.glen.append(len(self.vlen))
self.gvtx.append(len(self.vx))
def __init__(self, flatxml):
self.flatdoc = flatxml.split('\n')
self.dpi = 1440
self.gh = self.getData('info.glyph.h')
self.gw = self.getData('info.glyph.w')
self.guse = self.getData('info.glyph.use')
self.count = len(self.guse)
self.gvtx = self.getData('info.glyph.vtx')
self.glen = self.getData('info.glyph.len')
self.gdpi = self.getData('info.glyph.dpi')
self.vx = self.getData('info.vtx.x')
self.vy = self.getData('info.vtx.y')
self.vlen = self.getData('info.len.n')
self.glen.append(len(self.vlen))
self.gvtx.append(len(self.vx))
def getData(self, path):
result = None
cnt = len(self.flatdoc)
for j in xrange(cnt):
item = self.flatdoc[j]
if item.find('=') >= 0:
(name, argt) = item.split('=')
argres = argt.split('|')
else:
name = item
argres = []
if (name == path):
result = argres
break
if (len(argres) > 0) :
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
return result
def getData(self, path):
result = None
cnt = len(self.flatdoc)
for j in xrange(cnt):
item = self.flatdoc[j]
if item.find('=') >= 0:
(name, argt) = item.split('=')
argres = argt.split('|')
else:
name = item
argres = []
if (name == path):
result = argres
break
if (len(argres) > 0) :
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
return result
def getPath(self, gly):
path = ''
if (gly < 0) or (gly >= self.count):
return path
tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]-1]
ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]-1]
p = 0
for k in xrange(self.glen[gly], self.glen[gly+1]):
if (p == 0):
zx = tx[0:self.vlen[k]+1]
zy = ty[0:self.vlen[k]+1]
else:
zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
p += 1
j = 0
while ( j < len(zx) ):
if (j == 0):
# Start Position.
path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
elif (j <= len(zx)-3):
# Cubic Bezier Curve
path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
j += 2
elif (j == len(zx)-2):
# Cubic Bezier Curve to Start Position
path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
j += 1
elif (j == len(zx)-1):
# Quadratic Bezier Curve to Start Position
path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
def getPath(self, gly):
path = ''
if (gly < 0) or (gly >= self.count):
return path
tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
p = 0
for k in xrange(self.glen[gly], self.glen[gly+1]):
if (p == 0):
zx = tx[0:self.vlen[k]+1]
zy = ty[0:self.vlen[k]+1]
else:
zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
p += 1
j = 0
while ( j < len(zx) ):
if (j == 0):
# Start Position.
path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
elif (j <= len(zx)-3):
# Cubic Bezier Curve
path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
j += 2
elif (j == len(zx)-2):
# Cubic Bezier Curve to Start Position
path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
j += 1
elif (j == len(zx)-1):
# Quadratic Bezier Curve to Start Position
path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
j += 1
path += 'z'
return path
j += 1
path += 'z'
return path
class PParser(object):
def __init__(self, flatxml):
self.flatdoc = flatxml.split('\n')
self.temp = []
foo = self.getData('page.h') or self.getData('book.h')
self.ph = foo[0]
foo = self.getData('page.w') or self.getData('book.w')
self.pw = foo[0]
self.gx = self.getData('info.glyph.x')
self.gy = self.getData('info.glyph.y')
self.gid = self.getData('info.glyph.glyphID')
def __init__(self, flatxml):
self.flatdoc = flatxml.split('\n')
self.temp = []
foo = self.getData('page.h') or self.getData('book.h')
self.ph = foo[0]
foo = self.getData('page.w') or self.getData('book.w')
self.pw = foo[0]
self.gx = self.getData('info.glyph.x')
self.gy = self.getData('info.glyph.y')
self.gid = self.getData('info.glyph.glyphID')
def getData(self, path):
result = None
cnt = len(self.flatdoc)
for j in xrange(cnt):
item = self.flatdoc[j]
if item.find('=') >= 0:
(name, argt) = item.split('=')
argres = argt.split('|')
else:
name = item
argres = []
if (name.endswith(path)):
result = argres
break
if (len(argres) > 0) :
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
return result
def getData(self, path):
result = None
cnt = len(self.flatdoc)
for j in xrange(cnt):
item = self.flatdoc[j]
if item.find('=') >= 0:
(name, argt) = item.split('=')
argres = argt.split('|')
else:
name = item
argres = []
if (name.endswith(path)):
result = argres
break
if (len(argres) > 0) :
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
return result
def getDataTemp(self, path):
result = None
cnt = len(self.temp)
for j in xrange(cnt):
item = self.temp[j]
if item.find('=') >= 0:
(name, argt) = item.split('=')
argres = argt.split('|')
else:
name = item
argres = []
if (name.endswith(path)):
result = argres
self.temp.pop(j)
break
if (len(argres) > 0) :
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
return result
def getDataTemp(self, path):
result = None
cnt = len(self.temp)
for j in xrange(cnt):
item = self.temp[j]
if item.find('=') >= 0:
(name, argt) = item.split('=')
argres = argt.split('|')
else:
name = item
argres = []
if (name.endswith(path)):
result = argres
self.temp.pop(j)
break
if (len(argres) > 0) :
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
return result
def getImages(self):
result = []
self.temp = self.flatdoc
while (self.getDataTemp('img') != None):
h = self.getDataTemp('img.h')[0]
w = self.getDataTemp('img.w')[0]
x = self.getDataTemp('img.x')[0]
y = self.getDataTemp('img.y')[0]
src = self.getDataTemp('img.src')[0]
result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
return result
def getImages(self):
result = []
self.temp = self.flatdoc
while (self.getDataTemp('img') != None):
h = self.getDataTemp('img.h')[0]
w = self.getDataTemp('img.w')[0]
x = self.getDataTemp('img.x')[0]
y = self.getDataTemp('img.y')[0]
src = self.getDataTemp('img.src')[0]
result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
return result
def getGlyphs(self,glyfname):
result = []
if (self.gid != None) and (len(self.gid) > 0):
glyphs = []
for j in set(self.gid):
glyphs.append(j)
glyphs.sort()
gfile = open(glyfname, 'r')
j = 0
while True :
inp = gfile.readline()
if (inp == ''):
break
id='id="gl%d"' % glyphs[j]
if (inp.find(id) > 0):
result.append(inp)
j += 1
if (j == len(glyphs)):
break
gfile.close()
return result
def getGlyphs(self,glyfname):
result = []
if (self.gid != None) and (len(self.gid) > 0):
glyphs = []
for j in set(self.gid):
glyphs.append(j)
glyphs.sort()
gfile = open(glyfname, 'r')
j = 0
while True :
inp = gfile.readline()
if (inp == ''):
break
id='id="gl%d"' % glyphs[j]
if (inp.find(id) > 0):
result.append(inp)
j += 1
if (j == len(glyphs)):
break
gfile.close()
return result
def usage():
print 'Usage: '
print ' '
print ' gensvg.py unencryptedBookDir'
print ' '
print 'Usage: '
print ' '
print ' gensvg.py unencryptedBookDir'
print ' '
def main(argv):
bookDir = ''
bookDir = ''
if len(argv) == 0:
argv = sys.argv
else :
argv = argv.split()
if len(argv) == 0:
argv = sys.argv
else :
argv = argv.split()
try:
opts, args = getopt.getopt(argv[1:], "h:")
try:
opts, args = getopt.getopt(argv[1:], "h:")
except getopt.GetoptError, err:
print str(err)
usage()
sys.exit(2)
except getopt.GetoptError, err:
print str(err)
usage()
sys.exit(2)
if len(opts) == 0 and len(args) == 0 :
usage()
sys.exit(2)
if len(opts) == 0 and len(args) == 0 :
usage()
sys.exit(2)
for o, a in opts:
if o =="-h":
usage()
sys.exit(0)
for o, a in opts:
if o =="-h":
usage()
sys.exit(0)
bookDir = args[0]
bookDir = args[0]
if not os.path.exists(bookDir) :
print "Can not find directory with unencrypted book"
sys.exit(-1)
if not os.path.exists(bookDir) :
print "Can not find directory with unencrypted book"
sys.exit(-1)
dictFile = os.path.join(bookDir,'dict0000.dat')
dictFile = os.path.join(bookDir,'dict0000.dat')
if not os.path.exists(dictFile) :
print "Can not find dict0000.dat file"
sys.exit(-1)
if not os.path.exists(dictFile) :
print "Can not find dict0000.dat file"
sys.exit(-1)
pageDir = os.path.join(bookDir,'page')
if not os.path.exists(pageDir) :
print "Can not find page directory in unencrypted book"
sys.exit(-1)
pageDir = os.path.join(bookDir,'page')
if not os.path.exists(pageDir) :
print "Can not find page directory in unencrypted book"
sys.exit(-1)
imgDir = os.path.join(bookDir,'img')
if not os.path.exists(imgDir) :
print "Can not find image directory in unencrypted book"
sys.exit(-1)
imgDir = os.path.join(bookDir,'img')
if not os.path.exists(imgDir) :
print "Can not find image directory in unencrypted book"
sys.exit(-1)
glyphsDir = os.path.join(bookDir,'glyphs')
if not os.path.exists(glyphsDir) :
print "Can not find glyphs directory in unencrypted book"
sys.exit(-1)
glyphsDir = os.path.join(bookDir,'glyphs')
if not os.path.exists(glyphsDir) :
print "Can not find glyphs directory in unencrypted book"
sys.exit(-1)
metaFile = os.path.join(bookDir,'metadata0000.dat')
if not os.path.exists(metaFile) :
print "Can not find metadata0000.dat in unencrypted book"
sys.exit(-1)
metaFile = os.path.join(bookDir,'metadata0000.dat')
if not os.path.exists(metaFile) :
print "Can not find metadata0000.dat in unencrypted book"
sys.exit(-1)
svgDir = os.path.join(bookDir,'svg')
if not os.path.exists(svgDir) :
os.makedirs(svgDir)
svgDir = os.path.join(bookDir,'svg')
if not os.path.exists(svgDir) :
os.makedirs(svgDir)
print 'Processing Meta Data ... '
print 'Processing Meta Data ... '
print ' ', 'metadata0000.dat'
fname = os.path.join(bookDir,'metadata0000.dat')
metadata = decode_meta.getMetaArray(fname)
print ' ', 'metadata0000.dat'
fname = os.path.join(bookDir,'metadata0000.dat')
metadata = decode_meta.getMetaArray(fname)
print 'Processing Glyphs ... '
print 'Processing Glyphs ... '
filenames = os.listdir(glyphsDir)
filenames = sorted(filenames)
filenames = os.listdir(glyphsDir)
filenames = sorted(filenames)
glyfname = os.path.join(svgDir,'glyphs.svg')
glyfile = open(glyfname, 'w')
glyfile.write('<?xml version="1.0" standalone="no"?>\n')
glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
glyfile.write('<title>Glyphs for %s</title>\n' % metadata['Title'])
glyfile.write('<defs>\n')
counter = 0
for filename in filenames:
print ' ', filename
fname = os.path.join(glyphsDir,filename)
flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
gp = GParser(flat_xml)
for i in xrange(0, gp.count):
path = gp.getPath(i)
glyfile.write('<path id="gl%d" d="%s" fill="black" />\n' % (counter * 256 + i, path))
counter += 1
glyfile.write('</defs>\n')
glyfile.write('</svg>\n')
glyfile.close()
glyfname = os.path.join(svgDir,'glyphs.svg')
glyfile = open(glyfname, 'w')
glyfile.write('<?xml version="1.0" standalone="no"?>\n')
glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
glyfile.write('<title>Glyphs for %s</title>\n' % metadata['Title'])
glyfile.write('<defs>\n')
counter = 0
for filename in filenames:
print ' ', filename
fname = os.path.join(glyphsDir,filename)
flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
gp = GParser(flat_xml)
for i in xrange(0, gp.count):
path = gp.getPath(i)
glyfile.write('<path id="gl%d" d="%s" fill="black" />\n' % (counter * 256 + i, path))
counter += 1
glyfile.write('</defs>\n')
glyfile.write('</svg>\n')
glyfile.close()
print 'Processing Pages ... '
print 'Processing Pages ... '
scaledpi = 720
filenames = os.listdir(pageDir)
filenames = sorted(filenames)
counter = 0
for filename in filenames:
print ' ', filename
fname = os.path.join(pageDir,filename)
flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
pp = PParser(flat_xml)
pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
pfile.write('<?xml version="1.0" standalone="no"?>\n')
pfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
pfile.write('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
pfile.write('<title>Page %d - %s by %s</title>\n' % (counter, metadata['Title'],metadata['Authors']))
if (pp.gid != None):
pfile.write('<defs>\n')
gdefs = pp.getGlyphs(glyfname)
for j in xrange(0,len(gdefs)):
pfile.write(gdefs[j])
pfile.write('</defs>\n')
for j in xrange(0,len(pp.gid)):
pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
img = pp.getImages()
if (img != None):
for j in xrange(0,len(img)):
pfile.write(img[j])
pfile.write('</svg>')
pfile.close()
counter += 1
# Books are at 1440 DPI. This is rendering at twice that size for
# readability when rendering to the screen.
scaledpi = 720
filenames = os.listdir(pageDir)
filenames = sorted(filenames)
counter = 0
for filename in filenames:
print ' ', filename
fname = os.path.join(pageDir,filename)
flat_xml = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
pp = PParser(flat_xml)
pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
pfile.write('<?xml version="1.0" standalone="no"?>\n')
pfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
pfile.write('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
pfile.write('<title>Page %d - %s by %s</title>\n' % (counter, metadata['Title'],metadata['Authors']))
if (pp.gid != None):
pfile.write('<defs>\n')
gdefs = pp.getGlyphs(glyfname)
for j in xrange(0,len(gdefs)):
pfile.write(gdefs[j])
pfile.write('</defs>\n')
img = pp.getImages()
if (img != None):
for j in xrange(0,len(img)):
pfile.write(img[j])
if (pp.gid != None):
for j in xrange(0,len(pp.gid)):
pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
pfile.write('<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content. (gensvg.py)</text>\n');
pfile.write('</svg>')
pfile.close()
counter += 1
print 'Processing Complete'
print 'Processing Complete'
return 0
return 0
if __name__ == '__main__':
sys.exit(main(''))
sys.exit(main(''))

View file

@ -6,7 +6,7 @@ Contributors:
DiapDealer - for extensive testing and feedback, and standalone linux/macosx version of cmbtc_dump
stewball - for extensive testing and feedback
and others for posting, feedback and testing
and many others for posting, feedback and testing
This is experimental and it will probably not work for you but...