mirror of
https://github.com/noDRM/DeDRM_tools
synced 2025-01-15 15:42:03 +01:00
topazscripts 1.7
This commit is contained in:
parent
58e9c973ab
commit
66933f6972
6 changed files with 316 additions and 272 deletions
|
@ -1,3 +1,12 @@
|
|||
Changes in version 1.7
|
||||
- gensvg.py has been improved so that the glyphs render exactly (ClarkNova)
|
||||
- gensvg.py has fixed a render order "bug" that allowed some images to cover or hide text. (ClarkNova)
|
||||
- change generated html to use external stylesheet via a link to "style.css"
|
||||
- add missing <title> tag
|
||||
- make xhtml compliant doctype and minor changes to write correct xhtml
|
||||
- make divs that act as anchors be hidden visually and to take up 0 height and 0 width to prevent any impact on layout
|
||||
- added support for new version of the <_span> tag called <span>
|
||||
|
||||
Changes in version 1.6
|
||||
- support for books whose paragraphs have no styles
|
||||
- support to run cmbtc_dump on Linux and Mac OSX provided you know your PID of your ipod or standalone Kindle
|
||||
|
|
|
@ -249,11 +249,17 @@ class PageParser(object):
|
|||
'word' : (1, 'snippets', 1, 0),
|
||||
'word.type' : (1, 'scalar_text', 0, 0),
|
||||
'word.class' : (1, 'scalar_text', 0, 0),
|
||||
'word.firstGlyph' : (1, 'scalar_number', 0, 0),
|
||||
'word.lastGlyph' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'_span' : (1, 'snippets', 1, 0),
|
||||
'_span.firstWord' : (1, 'scalar_number', 0, 0),
|
||||
'-span.lastWord' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'span' : (1, 'snippets', 1, 0),
|
||||
'span.firstWord' : (1, 'scalar_number', 0, 0),
|
||||
'span.lastWord' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'extratokens' : (1, 'snippets', 1, 0),
|
||||
'extratokens.type' : (1, 'scalar_text', 0, 0),
|
||||
'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0),
|
||||
|
|
|
@ -119,6 +119,7 @@ class DocParser(object):
|
|||
|
||||
# this type of paragrph may be made up of multiple _spans, inline
|
||||
# word monograms (images) and words with semantic meaning
|
||||
# and now a new type "span" versus the old "_span"
|
||||
|
||||
# need to parse this type line by line
|
||||
line = start + 1
|
||||
|
@ -132,10 +133,10 @@ class DocParser(object):
|
|||
|
||||
(name, argres) = self.lineinDoc(line)
|
||||
|
||||
if name.endswith('_span.firstWord') :
|
||||
if name.endswith('span.firstWord') :
|
||||
first = int(argres)
|
||||
(name, argres) = self.lineinDoc(line+1)
|
||||
if not name.endswith('_span.lastWord'):
|
||||
if not name.endswith('span.lastWord'):
|
||||
print 'Error: - incorrect _span ordering inside paragraph'
|
||||
last = int(argres)
|
||||
for wordnum in xrange(first, last):
|
||||
|
@ -175,7 +176,7 @@ class DocParser(object):
|
|||
if pclass :
|
||||
classres = ' class="' + pclass + '"'
|
||||
|
||||
br_lb = (regtype == 'fixed') or (regtype == 'chapterheading')
|
||||
br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
|
||||
|
||||
handle_links = len(self.link_id) > 0
|
||||
|
||||
|
@ -317,7 +318,7 @@ class DocParser(object):
|
|||
|
||||
# set anchor for link target on this page
|
||||
if not anchorSet and not first_para_continued:
|
||||
htmlpage += '<div id="' + self.id + '" class="page_' + pagetype + '"> </div>\n'
|
||||
htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="' + self.id + '" title="pagetype_' + pagetype + '"></div>\n'
|
||||
anchorSet = True
|
||||
|
||||
if regtype == 'graphic' :
|
||||
|
@ -343,7 +344,7 @@ class DocParser(object):
|
|||
htmlpage += '</' + tag + '>'
|
||||
|
||||
|
||||
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem') :
|
||||
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
|
||||
ptype = 'full'
|
||||
# check to see if this is a continution from the previous page
|
||||
if first_para_continued :
|
||||
|
@ -371,6 +372,27 @@ class DocParser(object):
|
|||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||
|
||||
|
||||
elif (regtype == 'vertical') :
|
||||
ptype = 'full'
|
||||
if first_para_continued :
|
||||
ptype = 'end'
|
||||
first_para_continued = False
|
||||
(pclass, pdesc) = self.getParaDescription(start,end)
|
||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||
|
||||
|
||||
elif (regtype == 'table') :
|
||||
ptype = 'full'
|
||||
if first_para_continued :
|
||||
ptype = 'end'
|
||||
first_para_continued = False
|
||||
(pclass, pdesc) = self.getParaDescription(start,end)
|
||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||
print "Warnings - Table Conversions are notoriously poor"
|
||||
print "Strongly recommend taking a screen capture image of the "
|
||||
print "table in %s.svg and using it to replace this attempt at a table" % self.id
|
||||
|
||||
|
||||
elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
|
||||
(pos, simgsrc) = self.findinDoc('img.src',start,end)
|
||||
if simgsrc:
|
||||
|
@ -378,10 +400,10 @@ class DocParser(object):
|
|||
|
||||
|
||||
else :
|
||||
print 'Warning: Unknown region type', regtype
|
||||
print 'Warning: region type', regtype
|
||||
(pos, temp) = self.findinDoc('paragraph',start,end)
|
||||
if temp:
|
||||
print 'Treating this like a "text" region'
|
||||
if pos != -1:
|
||||
print ' is a "text" region'
|
||||
regtype = 'fixed'
|
||||
ptype = 'full'
|
||||
# check to see if this is a continution from the previous page
|
||||
|
@ -400,7 +422,7 @@ class DocParser(object):
|
|||
else :
|
||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||
else :
|
||||
print 'Treating this like a "graphic" region'
|
||||
print ' is a "graphic" region'
|
||||
(pos, simgsrc) = self.findinDoc('img.src',start,end)
|
||||
if simgsrc:
|
||||
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
||||
|
|
|
@ -77,7 +77,8 @@ def main(argv):
|
|||
|
||||
|
||||
htmlFileName = "book.html"
|
||||
htmlstr = '<html>\n'
|
||||
htmlstr = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n'
|
||||
htmlstr += '<html>\n'
|
||||
|
||||
filenames = os.listdir(pageDir)
|
||||
filenames = sorted(filenames)
|
||||
|
@ -85,6 +86,7 @@ def main(argv):
|
|||
print 'Processing ... '
|
||||
|
||||
htmlstr += '<head>\n'
|
||||
htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n'
|
||||
|
||||
# process metadata and retrieve fontSize info
|
||||
print ' ', 'metadata0000.dat'
|
||||
|
@ -93,6 +95,8 @@ def main(argv):
|
|||
metastr = decode_meta.getMetaData(fname)
|
||||
file(xname, 'wb').write(metastr)
|
||||
meta_array = decode_meta.getMetaArray(fname)
|
||||
|
||||
htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n'
|
||||
htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
|
||||
htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
|
||||
|
||||
|
@ -120,11 +124,9 @@ def main(argv):
|
|||
fname = os.path.join(bookDir,'other0000.dat')
|
||||
xname = os.path.join(bookDir, 'style.css')
|
||||
xmlstr = convert2xml.main('convert2xml.py --flat-xml ' + dictFile + ' ' + fname)
|
||||
htmlstr += '<style>\n'
|
||||
cssstr , classlst = stylexml2css.convert2CSS(xmlstr, fontsize, ph, pw)
|
||||
file(xname, 'wb').write(cssstr)
|
||||
htmlstr += cssstr
|
||||
htmlstr += '</style>\n'
|
||||
htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n'
|
||||
htmlstr += '</head>\n<body>\n'
|
||||
|
||||
for filename in filenames:
|
||||
|
|
|
@ -49,8 +49,8 @@ class GParser(object):
|
|||
path = ''
|
||||
if (gly < 0) or (gly >= self.count):
|
||||
return path
|
||||
tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]-1]
|
||||
ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]-1]
|
||||
tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
|
||||
ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
|
||||
p = 0
|
||||
for k in xrange(self.glen[gly], self.glen[gly+1]):
|
||||
if (p == 0):
|
||||
|
@ -272,6 +272,8 @@ def main(argv):
|
|||
|
||||
print 'Processing Pages ... '
|
||||
|
||||
# Books are at 1440 DPI. This is rendering at twice that size for
|
||||
# readability when rendering to the screen.
|
||||
scaledpi = 720
|
||||
filenames = os.listdir(pageDir)
|
||||
filenames = sorted(filenames)
|
||||
|
@ -292,12 +294,15 @@ def main(argv):
|
|||
for j in xrange(0,len(gdefs)):
|
||||
pfile.write(gdefs[j])
|
||||
pfile.write('</defs>\n')
|
||||
for j in xrange(0,len(pp.gid)):
|
||||
pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
|
||||
img = pp.getImages()
|
||||
if (img != None):
|
||||
for j in xrange(0,len(img)):
|
||||
pfile.write(img[j])
|
||||
if (pp.gid != None):
|
||||
for j in xrange(0,len(pp.gid)):
|
||||
pfile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
|
||||
if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
|
||||
pfile.write('<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content. (gensvg.py)</text>\n');
|
||||
pfile.write('</svg>')
|
||||
pfile.close()
|
||||
counter += 1
|
||||
|
|
|
@ -6,7 +6,7 @@ Contributors:
|
|||
DiapDealer - for extensive testing and feedback, and standalone linux/macosx version of cmbtc_dump
|
||||
stewball - for extensive testing and feedback
|
||||
|
||||
and others for posting, feedback and testing
|
||||
and many others for posting, feedback and testing
|
||||
|
||||
|
||||
This is experimental and it will probably not work for you but...
|
||||
|
|
Loading…
Reference in a new issue