topazscripts 1.7

This commit is contained in:
some_updates 2009-01-22 12:15:33 +00:00 committed by Apprentice Alf
parent 58e9c973ab
commit 66933f6972
6 changed files with 316 additions and 272 deletions

View file

@ -1,3 +1,12 @@
Changes in version 1.7
- gensvg.py has been improved so that the glyphs render exactly (ClarkNova)
- gensvg.py has fixed a render order "bug" that allowed some images to cover or hide text. (ClarkNova)
- change generated html to use external stylesheet via a link to "style.css"
- add missing <title> tag
- make xhtml compliant doctype and minor changes to write correct xhtml
- make divs that act as anchors be hidden visually and to take up 0 height and 0 width to prevent any impact on layout
- added support for new version of the <_span> tag called <span>
Changes in version 1.6
- support for books whose paragraphs have no styles
- support to run cmbtc_dump on Linux and Mac OSX provided you know your PID of your ipod or standalone Kindle

View file

@ -249,11 +249,17 @@ class PageParser(object):
'word' : (1, 'snippets', 1, 0),
'word.type' : (1, 'scalar_text', 0, 0),
'word.class' : (1, 'scalar_text', 0, 0),
'word.firstGlyph' : (1, 'scalar_number', 0, 0),
'word.lastGlyph' : (1, 'scalar_number', 0, 0),
'_span' : (1, 'snippets', 1, 0),
'_span.firstWord' : (1, 'scalar_number', 0, 0),
'-span.lastWord' : (1, 'scalar_number', 0, 0),
'span' : (1, 'snippets', 1, 0),
'span.firstWord' : (1, 'scalar_number', 0, 0),
'span.lastWord' : (1, 'scalar_number', 0, 0),
'extratokens' : (1, 'snippets', 1, 0),
'extratokens.type' : (1, 'scalar_text', 0, 0),
'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0),

View file

@ -119,6 +119,7 @@ class DocParser(object):
# this type of paragrph may be made up of multiple _spans, inline
# word monograms (images) and words with semantic meaning
# and now a new type "span" versus the old "_span"
# need to parse this type line by line
line = start + 1
@ -132,10 +133,10 @@ class DocParser(object):
(name, argres) = self.lineinDoc(line)
if name.endswith('_span.firstWord') :
if name.endswith('span.firstWord') :
first = int(argres)
(name, argres) = self.lineinDoc(line+1)
if not name.endswith('_span.lastWord'):
if not name.endswith('span.lastWord'):
print 'Error: - incorrect _span ordering inside paragraph'
last = int(argres)
for wordnum in xrange(first, last):
@ -175,7 +176,7 @@ class DocParser(object):
if pclass :
classres = ' class="' + pclass + '"'
br_lb = (regtype == 'fixed') or (regtype == 'chapterheading')
br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
handle_links = len(self.link_id) > 0
@ -317,7 +318,7 @@ class DocParser(object):
# set anchor for link target on this page
if not anchorSet and not first_para_continued:
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '">&nbsp</div>\n'
htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="' + self.id + '" title="pagetype_' + pagetype + '"></div>\n'
anchorSet = True
if regtype == 'graphic' :
@ -343,7 +344,7 @@ class DocParser(object):
htmlpage += '</' + tag + '>'
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem') :
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
ptype = 'full'
# check to see if this is a continution from the previous page
if first_para_continued :
@ -371,6 +372,27 @@ class DocParser(object):
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
elif (regtype == 'vertical') :
ptype = 'full'
if first_para_continued :
ptype = 'end'
first_para_continued = False
(pclass, pdesc) = self.getParaDescription(start,end)
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
elif (regtype == 'table') :
ptype = 'full'
if first_para_continued :
ptype = 'end'
first_para_continued = False
(pclass, pdesc) = self.getParaDescription(start,end)
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
print "Warnings - Table Conversions are notoriously poor"
print "Strongly recommend taking a screen capture image of the "
print "table in %s.svg and using it to replace this attempt at a table" % self.id
elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
@ -378,10 +400,10 @@ class DocParser(object):
else :
print 'Warning: Unknown region type', regtype
print 'Warning: region type', regtype
(pos, temp) = self.findinDoc('paragraph',start,end)
if temp:
print 'Treating this like a "text" region'
if pos != -1:
print ' is a "text" region'
regtype = 'fixed'
ptype = 'full'
# check to see if this is a continution from the previous page
@ -400,7 +422,7 @@ class DocParser(object):
else :
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
else :
print 'Treating this like a "graphic" region'
print ' is a "graphic" region'
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)

View file

@ -77,7 +77,8 @@ def main(argv):
htmlFileName = "book.html"
htmlstr = '<html>\n'
htmlstr = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n'
htmlstr += '<html>\n'
filenames = os.listdir(pageDir)
filenames = sorted(filenames)
@ -85,6 +86,7 @@ def main(argv):
print 'Processing ... '
htmlstr += '<head>\n'
htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n'
# process metadata and retrieve fontSize info
print ' ', 'metadata0000.dat'
@ -93,6 +95,8 @@ def main(argv):
metastr = decode_meta.getMetaData(fname)
file(xname, 'wb').write(metastr)
meta_array = decode_meta.getMetaArray(fname)
htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n'
htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
@ -120,11 +124,9 @@ def main(argv):
fname = os.path.join(bookDir,'other0000.dat')
xname = os.path.join(bookDir, 'style.css')
xmlstr = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
htmlstr += '<style>\n'
cssstr , classlst = stylexml2css.convert2CSS(xmlstr, fontsize, ph, pw)
file(xname, 'wb').write(cssstr)
htmlstr += cssstr
htmlstr += '</style>\n'
htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n'
htmlstr += '</head>\n<body>\n'
for filename in filenames:

View file

@ -49,8 +49,8 @@ class GParser(object):
path = ''
if (gly < 0) or (gly >= self.count):
return path
tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]-1]
ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]-1]
tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
p = 0
for k in xrange(self.glen[gly], self.glen[gly+1]):
if (p == 0):
@ -272,6 +272,8 @@ def main(argv):
print 'Processing Pages ... '
# Books are at 1440 DPI. This is rendering at twice that size for
# readability when rendering to the screen.
scaledpi = 720
filenames = os.listdir(pageDir)
filenames = sorted(filenames)
@ -292,12 +294,15 @@ def main(argv):
for j in xrange(0,len(gdefs)):
pfile.write(gdefs[j])
pfile.write('</defs>\n')
for j in xrange(0,len(pp.gid)):
pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
img = pp.getImages()
if (img != None):
for j in xrange(0,len(img)):
pfile.write(img[j])
if (pp.gid != None):
for j in xrange(0,len(pp.gid)):
pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
pfile.write('<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content. (gensvg.py)</text>\n');
pfile.write('</svg>')
pfile.close()
counter += 1

View file

@ -6,7 +6,7 @@ Contributors:
DiapDealer - for extensive testing and feedback, and standalone linux/macosx version of cmbtc_dump
stewball - for extensive testing and feedback
and others for posting, feedback and testing
and many others for posting, feedback and testing
This is experimental and it will probably not work for you but...