diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py b/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py
index 7081e78..30c1e13 100644
--- a/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py
+++ b/Calibre_Plugins/K4MobiDeDRM_plugin/__init__.py
@@ -19,7 +19,7 @@ class K4DeDRM(FileTypePlugin):
description = 'Removes DRM from Mobipocket, Kindle/Mobi, Kindle/Topaz and Kindle/Print Replica files. Provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc.'
supported_platforms = ['osx', 'windows', 'linux'] # Platforms this plugin will run on
author = 'DiapDealer, SomeUpdates' # The author of this plugin
- version = (0, 3, 7) # The version number of this plugin
+ version = (0, 3, 8) # The version number of this plugin
file_types = set(['prc','mobi','azw','azw1','azw4','tpz']) # The file types that this plugin will be applied to
on_import = True # Run this plugin during the import
priority = 210 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm
diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py b/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py
index 3c27ed0..0328206 100644
--- a/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py
+++ b/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py
@@ -20,6 +20,8 @@ import getopt
from struct import pack
from struct import unpack
+class TpzDRMError(Exception):
+ pass
# Get a 7 bit encoded number from string. The most
# significant byte comes first and has the high bit (8th) set
@@ -138,7 +140,8 @@ class Dictionary(object):
return self.stable[self.pos]
else:
print "Error - %d outside of string table limits" % val
- sys.exit(-1)
+ raise TpzDRMError('outside of string table limits')
+ # sys.exit(-1)
def getSize(self):
return self.size
@@ -258,6 +261,11 @@ class PageParser(object):
'paragraph.class' : (1, 'scalar_text', 0, 0),
'paragraph.firstWord' : (1, 'scalar_number', 0, 0),
'paragraph.lastWord' : (1, 'scalar_number', 0, 0),
+ 'paragraph.lastWord' : (1, 'scalar_number', 0, 0),
+ 'paragraph.gridSize' : (1, 'scalar_number', 0, 0),
+ 'paragraph.gridBottomCenter' : (1, 'scalar_number', 0, 0),
+ 'paragraph.gridTopCenter' : (1, 'scalar_number', 0, 0),
+
'word_semantic' : (1, 'snippets', 1, 1),
'word_semantic.type' : (1, 'scalar_text', 0, 0),
@@ -272,11 +280,17 @@ class PageParser(object):
'_span' : (1, 'snippets', 1, 0),
'_span.firstWord' : (1, 'scalar_number', 0, 0),
- '-span.lastWord' : (1, 'scalar_number', 0, 0),
+ '_span.lastWord' : (1, 'scalar_number', 0, 0),
+ '_span.gridSize' : (1, 'scalar_number', 0, 0),
+ '_span.gridBottomCenter' : (1, 'scalar_number', 0, 0),
+ '_span.gridTopCenter' : (1, 'scalar_number', 0, 0),
'span' : (1, 'snippets', 1, 0),
'span.firstWord' : (1, 'scalar_number', 0, 0),
'span.lastWord' : (1, 'scalar_number', 0, 0),
+ 'span.gridSize' : (1, 'scalar_number', 0, 0),
+ 'span.gridBottomCenter' : (1, 'scalar_number', 0, 0),
+ 'span.gridTopCenter' : (1, 'scalar_number', 0, 0),
'extratokens' : (1, 'snippets', 1, 0),
'extratokens.type' : (1, 'scalar_text', 0, 0),
diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py
index ae2c8dd..3b32fc0 100644
--- a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py
+++ b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py
@@ -271,6 +271,9 @@ class DocParser(object):
pclass = self.getClass(pclass)
+ # if paragraph uses extratokens (extra glyphs) then make it fixed
+ (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end)
+
# build up a description of the paragraph in result and return it
# first check for the basic - all words paragraph
(pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
@@ -280,6 +283,7 @@ class DocParser(object):
last = int(slast)
makeImage = (regtype == 'vertical') or (regtype == 'table')
+ makeImage = makeImage or (extraglyphs != None)
if self.fixedimage:
makeImage = makeImage or (regtype == 'fixed')
@@ -353,6 +357,8 @@ class DocParser(object):
word_class = ''
+ word_semantic_type = ''
+
while (line < end) :
(name, argres) = self.lineinDoc(line)
@@ -512,6 +518,72 @@ class DocParser(object):
return parares
+ def buildTOCEntry(self, pdesc) :
+ parares = ''
+ sep =''
+ tocentry = ''
+ handle_links = len(self.link_id) > 0
+
+ lstart = 0
+
+ cnt = len(pdesc)
+ for j in xrange( 0, cnt) :
+
+ (wtype, num) = pdesc[j]
+
+ if wtype == 'ocr' :
+ word = self.ocrtext[num]
+ sep = ' '
+
+ if handle_links:
+ link = self.link_id[num]
+ if (link > 0):
+ linktype = self.link_type[link-1]
+ title = self.link_title[link-1]
+ title = title.rstrip('. ')
+ alt_title = parares[lstart:]
+ alt_title = alt_title.strip()
+ # now strip off the actual printed page number
+ alt_title = alt_title.rstrip('01234567890ivxldIVXLD-.')
+ alt_title = alt_title.rstrip('. ')
+ # skip over any external links - can't have them in a books toc
+ if linktype == 'external' :
+ title = ''
+ alt_title = ''
+ linkpage = ''
+ else :
+ if len(self.link_page) >= link :
+ ptarget = self.link_page[link-1] - 1
+ linkpage = '%04d' % ptarget
+ else :
+ # just link to the current page
+ linkpage = self.id[4:]
+ if len(alt_title) >= len(title):
+ title = alt_title
+ if title != '' and linkpage != '':
+ tocentry += title + '|' + linkpage + '\n'
+ lstart = len(parares)
+ if word == '_link_' : word = ''
+ elif (link < 0) :
+ if word == '_link_' : word = ''
+
+ if word == '_lb_':
+ word = ''
+ sep = ''
+
+ if num in self.dehyphen_rootid :
+ word = word[0:-1]
+ sep = ''
+
+ parares += word + sep
+
+ else :
+ continue
+
+ return tocentry
+
+
+
# walk the document tree collecting the information needed
# to build an html page using the ocrText
@@ -519,6 +591,7 @@ class DocParser(object):
def process(self):
htmlpage = ''
+ tocinfo = ''
# get the ocr text
(pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
@@ -644,9 +717,9 @@ class DocParser(object):
ptype = 'end'
first_para_continued = False
(pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ tocinfo += self.buildTOCEntry(pdesc)
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
-
elif (regtype == 'vertical') or (regtype == 'table') :
ptype = 'full'
if inGroup:
@@ -704,12 +777,11 @@ class DocParser(object):
htmlpage = htmlpage[0:-4]
last_para_continued = False
- return htmlpage
-
+ return htmlpage, tocinfo
def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage):
# create a document parser
dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage)
- htmlpage = dp.process()
- return htmlpage
+ htmlpage, tocinfo = dp.process()
+ return htmlpage, tocinfo
diff --git a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py
index 6f6795d..49cf6f5 100644
--- a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py
+++ b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py
@@ -10,17 +10,94 @@ from struct import unpack
class PParser(object):
- def __init__(self, gd, flatxml):
+ def __init__(self, gd, flatxml, meta_array):
self.gd = gd
self.flatdoc = flatxml.split('\n')
+ self.docSize = len(self.flatdoc)
self.temp = []
- foo = self.getData('page.h') or self.getData('book.h')
- self.ph = foo[0]
- foo = self.getData('page.w') or self.getData('book.w')
- self.pw = foo[0]
- self.gx = self.getData('info.glyph.x')
- self.gy = self.getData('info.glyph.y')
- self.gid = self.getData('info.glyph.glyphID')
+
+ self.ph = -1
+ self.pw = -1
+ startpos = self.posinDoc('page.h') or self.posinDoc('book.h')
+ for p in startpos:
+ (name, argres) = self.lineinDoc(p)
+ self.ph = max(self.ph, int(argres))
+ startpos = self.posinDoc('page.w') or self.posinDoc('book.w')
+ for p in startpos:
+ (name, argres) = self.lineinDoc(p)
+ self.pw = max(self.pw, int(argres))
+
+ if self.ph <= 0:
+ self.ph = int(meta_array.get('pageHeight', '11000'))
+ if self.pw <= 0:
+ self.pw = int(meta_array.get('pageWidth', '8500'))
+
+ res = []
+ startpos = self.posinDoc('info.glyph.x')
+ for p in startpos:
+ argres = self.getDataatPos('info.glyph.x', p)
+ res.extend(argres)
+ self.gx = res
+
+ res = []
+ startpos = self.posinDoc('info.glyph.y')
+ for p in startpos:
+ argres = self.getDataatPos('info.glyph.y', p)
+ res.extend(argres)
+ self.gy = res
+
+ res = []
+ startpos = self.posinDoc('info.glyph.glyphID')
+ for p in startpos:
+ argres = self.getDataatPos('info.glyph.glyphID', p)
+ res.extend(argres)
+ self.gid = res
+
+
+ # return tag at line pos in document
+ def lineinDoc(self, pos) :
+ if (pos >= 0) and (pos < self.docSize) :
+ item = self.flatdoc[pos]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=',1)
+ else :
+ name = item
+ argres = ''
+ return name, argres
+
+ # find tag in doc if within pos to end inclusive
+ def findinDoc(self, tagpath, pos, end) :
+ result = None
+ if end == -1 :
+ end = self.docSize
+ else:
+ end = min(self.docSize, end)
+ foundat = -1
+ for j in xrange(pos, end):
+ item = self.flatdoc[j]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=',1)
+ else :
+ name = item
+ argres = ''
+ if name.endswith(tagpath) :
+ result = argres
+ foundat = j
+ break
+ return foundat, result
+
+ # return list of start positions for the tagpath
+ def posinDoc(self, tagpath):
+ startpos = []
+ pos = 0
+ res = ""
+ while res != None :
+ (foundpos, res) = self.findinDoc(tagpath, pos, -1)
+ if res != None :
+ startpos.append(foundpos)
+ pos = foundpos + 1
+ return startpos
+
def getData(self, path):
result = None
cnt = len(self.flatdoc)
@@ -39,6 +116,23 @@ class PParser(object):
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
return result
+
+ def getDataatPos(self, path, pos):
+ result = None
+ item = self.flatdoc[pos]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ if (name.endswith(path)):
+ result = argres
+ return result
+
def getDataTemp(self, path):
result = None
cnt = len(self.temp)
@@ -58,6 +152,7 @@ class PParser(object):
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
return result
+
def getImages(self):
result = []
self.temp = self.flatdoc
@@ -69,6 +164,7 @@ class PParser(object):
src = self.getDataTemp('img.src')[0]
result.append('\n' % (src, x, y, w, h))
return result
+
def getGlyphs(self):
result = []
if (self.gid != None) and (len(self.gid) > 0):
@@ -84,25 +180,25 @@ class PParser(object):
return result
-def convert2SVG(gdict, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi):
+def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi):
ml = ''
- pp = PParser(gdict, flat_xml)
+ pp = PParser(gdict, flat_xml, meta_array)
ml += '\n'
if (raw):
ml += '\n'
ml += '