mirror of
https://github.com/noDRM/DeDRM_tools
synced 2025-01-01 06:21:00 +01:00
afa4ac5716
THIS IS ON THE MASTER BRANCH. The Master branch will be Python 3.0 from now on. While Python 2.7 support will not be deliberately broken, all efforts should now focus on Python 3.0 compatibility. I can see a lot of work has been done. There's more to do. I've bumped the version number of everything I came across to the next major number for Python 3.0 compatibility indication. Thanks everyone. I hope to update here at least once a week until we have a stable 7.0 release for calibre 5.0
540 lines
18 KiB
Python
540 lines
18 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# topazextract.py
|
|
# Mostly written by some_updates based on code from many others
|
|
|
|
# Changelog
|
|
# 4.9 - moved unicode_argv call inside main for Windows DeDRM compatibility
|
|
# 5.0 - Fixed potential unicode problem with command line interface
|
|
# 6.0 - Added Python 3 compatibility for calibre 5.0
|
|
|
|
from __future__ import print_function
|
|
__version__ = '6.0'
|
|
|
|
import sys
|
|
import os, csv, getopt
|
|
import zlib, zipfile, tempfile, shutil
|
|
import traceback
|
|
from struct import pack
|
|
from struct import unpack
|
|
from calibre_plugins.dedrm.alfcrypto import Topaz_Cipher
|
|
|
|
class SafeUnbuffered:
|
|
def __init__(self, stream):
|
|
self.stream = stream
|
|
self.encoding = stream.encoding
|
|
if self.encoding == None:
|
|
self.encoding = "utf-8"
|
|
def write(self, data):
|
|
if isinstance(data,bytes):
|
|
data = data.encode(self.encoding,"replace")
|
|
self.stream.write(data)
|
|
self.stream.flush()
|
|
def __getattr__(self, attr):
|
|
return getattr(self.stream, attr)
|
|
|
|
iswindows = sys.platform.startswith('win')
|
|
isosx = sys.platform.startswith('darwin')
|
|
|
|
def unicode_argv():
|
|
if iswindows:
|
|
# Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
|
|
# strings.
|
|
|
|
# Versions 2.x of Python don't support Unicode in sys.argv on
|
|
# Windows, with the underlying Windows API instead replacing multi-byte
|
|
# characters with '?'.
|
|
|
|
|
|
from ctypes import POINTER, byref, cdll, c_int, windll
|
|
from ctypes.wintypes import LPCWSTR, LPWSTR
|
|
|
|
GetCommandLineW = cdll.kernel32.GetCommandLineW
|
|
GetCommandLineW.argtypes = []
|
|
GetCommandLineW.restype = LPCWSTR
|
|
|
|
CommandLineToArgvW = windll.shell32.CommandLineToArgvW
|
|
CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
|
|
CommandLineToArgvW.restype = POINTER(LPWSTR)
|
|
|
|
cmd = GetCommandLineW()
|
|
argc = c_int(0)
|
|
argv = CommandLineToArgvW(cmd, byref(argc))
|
|
if argc.value > 0:
|
|
# Remove Python executable and commands if present
|
|
start = argc.value - len(sys.argv)
|
|
return [argv[i] for i in
|
|
range(start, argc.value)]
|
|
# if we don't have any arguments at all, just pass back script name
|
|
# this should never happen
|
|
return [u"mobidedrm.py"]
|
|
else:
|
|
argvencoding = sys.stdin.encoding
|
|
if argvencoding == None:
|
|
argvencoding = 'utf-8'
|
|
return argv
|
|
|
|
#global switch
|
|
debug = False
|
|
|
|
if 'calibre' in sys.modules:
|
|
inCalibre = True
|
|
from calibre_plugins.dedrm import kgenpids
|
|
else:
|
|
inCalibre = False
|
|
import kgenpids
|
|
|
|
|
|
class DrmException(Exception):
|
|
pass
|
|
|
|
|
|
# recursive zip creation support routine
|
|
def zipUpDir(myzip, tdir, localname):
|
|
currentdir = tdir
|
|
if localname != u"":
|
|
currentdir = os.path.join(currentdir,localname)
|
|
list = os.listdir(currentdir)
|
|
for file in list:
|
|
afilename = file
|
|
localfilePath = os.path.join(localname, afilename)
|
|
realfilePath = os.path.join(currentdir,file)
|
|
if os.path.isfile(realfilePath):
|
|
myzip.write(realfilePath, localfilePath)
|
|
elif os.path.isdir(realfilePath):
|
|
zipUpDir(myzip, tdir, localfilePath)
|
|
|
|
#
|
|
# Utility routines
|
|
#
|
|
|
|
# Get a 7 bit encoded number from file
|
|
def bookReadEncodedNumber(fo):
|
|
flag = False
|
|
data = ord(fo.read(1))
|
|
if data == 0xFF:
|
|
flag = True
|
|
data = ord(fo.read(1))
|
|
if data >= 0x80:
|
|
datax = (data & 0x7F)
|
|
while data >= 0x80 :
|
|
data = ord(fo.read(1))
|
|
datax = (datax <<7) + (data & 0x7F)
|
|
data = datax
|
|
if flag:
|
|
data = -data
|
|
return data
|
|
|
|
# Get a length prefixed string from file
|
|
def bookReadString(fo):
|
|
stringLength = bookReadEncodedNumber(fo)
|
|
return unpack(str(stringLength)+'s',fo.read(stringLength))[0]
|
|
|
|
#
|
|
# crypto routines
|
|
#
|
|
|
|
# Context initialisation for the Topaz Crypto
|
|
def topazCryptoInit(key):
|
|
return Topaz_Cipher().ctx_init(key)
|
|
|
|
# ctx1 = 0x0CAFFE19E
|
|
# for keyChar in key:
|
|
# keyByte = ord(keyChar)
|
|
# ctx2 = ctx1
|
|
# ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
|
|
# return [ctx1,ctx2]
|
|
|
|
# decrypt data with the context prepared by topazCryptoInit()
|
|
def topazCryptoDecrypt(data, ctx):
|
|
return Topaz_Cipher().decrypt(data, ctx)
|
|
# ctx1 = ctx[0]
|
|
# ctx2 = ctx[1]
|
|
# plainText = ""
|
|
# for dataChar in data:
|
|
# dataByte = ord(dataChar)
|
|
# m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
|
|
# ctx2 = ctx1
|
|
# ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
|
|
# plainText += chr(m)
|
|
# return plainText
|
|
|
|
# Decrypt data with the PID
|
|
def decryptRecord(data,PID):
|
|
ctx = topazCryptoInit(PID)
|
|
return topazCryptoDecrypt(data, ctx)
|
|
|
|
# Try to decrypt a dkey record (contains the bookPID)
|
|
def decryptDkeyRecord(data,PID):
|
|
record = decryptRecord(data,PID)
|
|
fields = unpack('3sB8sB8s3s',record)
|
|
if fields[0] != 'PID' or fields[5] != 'pid' :
|
|
raise DrmException(u"Didn't find PID magic numbers in record")
|
|
elif fields[1] != 8 or fields[3] != 8 :
|
|
raise DrmException(u"Record didn't contain correct length fields")
|
|
elif fields[2] != PID :
|
|
raise DrmException(u"Record didn't contain PID")
|
|
return fields[4]
|
|
|
|
# Decrypt all dkey records (contain the book PID)
|
|
def decryptDkeyRecords(data,PID):
|
|
nbKeyRecords = ord(data[0])
|
|
records = []
|
|
data = data[1:]
|
|
for i in range (0,nbKeyRecords):
|
|
length = ord(data[0])
|
|
try:
|
|
key = decryptDkeyRecord(data[1:length+1],PID)
|
|
records.append(key)
|
|
except DrmException:
|
|
pass
|
|
data = data[1+length:]
|
|
if len(records) == 0:
|
|
raise DrmException(u"BookKey Not Found")
|
|
return records
|
|
|
|
|
|
class TopazBook:
|
|
def __init__(self, filename):
|
|
self.fo = open(filename, 'rb')
|
|
self.outdir = tempfile.mkdtemp()
|
|
# self.outdir = 'rawdat'
|
|
self.bookPayloadOffset = 0
|
|
self.bookHeaderRecords = {}
|
|
self.bookMetadata = {}
|
|
self.bookKey = None
|
|
magic = unpack('4s',self.fo.read(4))[0]
|
|
if magic != 'TPZ0':
|
|
raise DrmException(u"Parse Error : Invalid Header, not a Topaz file")
|
|
self.parseTopazHeaders()
|
|
self.parseMetadata()
|
|
|
|
def parseTopazHeaders(self):
|
|
def bookReadHeaderRecordData():
|
|
# Read and return the data of one header record at the current book file position
|
|
# [[offset,decompressedLength,compressedLength],...]
|
|
nbValues = bookReadEncodedNumber(self.fo)
|
|
if debug: print("%d records in header " % nbValues, end=' ')
|
|
values = []
|
|
for i in range (0,nbValues):
|
|
values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)])
|
|
return values
|
|
def parseTopazHeaderRecord():
|
|
# Read and parse one header record at the current book file position and return the associated data
|
|
# [[offset,decompressedLength,compressedLength],...]
|
|
if ord(self.fo.read(1)) != 0x63:
|
|
raise DrmException(u"Parse Error : Invalid Header")
|
|
tag = bookReadString(self.fo)
|
|
record = bookReadHeaderRecordData()
|
|
return [tag,record]
|
|
nbRecords = bookReadEncodedNumber(self.fo)
|
|
if debug: print("Headers: %d" % nbRecords)
|
|
for i in range (0,nbRecords):
|
|
result = parseTopazHeaderRecord()
|
|
if debug: print(result[0], ": ", result[1])
|
|
self.bookHeaderRecords[result[0]] = result[1]
|
|
if ord(self.fo.read(1)) != 0x64 :
|
|
raise DrmException(u"Parse Error : Invalid Header")
|
|
self.bookPayloadOffset = self.fo.tell()
|
|
|
|
def parseMetadata(self):
|
|
# Parse the metadata record from the book payload and return a list of [key,values]
|
|
self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords['metadata'][0][0])
|
|
tag = bookReadString(self.fo)
|
|
if tag != 'metadata' :
|
|
raise DrmException(u"Parse Error : Record Names Don't Match")
|
|
flags = ord(self.fo.read(1))
|
|
nbRecords = ord(self.fo.read(1))
|
|
if debug: print("Metadata Records: %d" % nbRecords)
|
|
for i in range (0,nbRecords) :
|
|
keyval = bookReadString(self.fo)
|
|
content = bookReadString(self.fo)
|
|
if debug: print(keyval)
|
|
if debug: print(content)
|
|
self.bookMetadata[keyval] = content
|
|
return self.bookMetadata
|
|
|
|
def getPIDMetaInfo(self):
|
|
keysRecord = self.bookMetadata.get('keys','')
|
|
keysRecordRecord = ''
|
|
if keysRecord != '':
|
|
keylst = keysRecord.split(',')
|
|
for keyval in keylst:
|
|
keysRecordRecord += self.bookMetadata.get(keyval,'')
|
|
return keysRecord, keysRecordRecord
|
|
|
|
def getBookTitle(self):
|
|
title = ''
|
|
if 'Title' in self.bookMetadata:
|
|
title = self.bookMetadata['Title']
|
|
return title.decode('utf-8')
|
|
|
|
def setBookKey(self, key):
|
|
self.bookKey = key
|
|
|
|
def getBookPayloadRecord(self, name, index):
|
|
# Get a record in the book payload, given its name and index.
|
|
# decrypted and decompressed if necessary
|
|
encrypted = False
|
|
compressed = False
|
|
try:
|
|
recordOffset = self.bookHeaderRecords[name][index][0]
|
|
except:
|
|
raise DrmException("Parse Error : Invalid Record, record not found")
|
|
|
|
self.fo.seek(self.bookPayloadOffset + recordOffset)
|
|
|
|
tag = bookReadString(self.fo)
|
|
if tag != name :
|
|
raise DrmException("Parse Error : Invalid Record, record name doesn't match")
|
|
|
|
recordIndex = bookReadEncodedNumber(self.fo)
|
|
if recordIndex < 0 :
|
|
encrypted = True
|
|
recordIndex = -recordIndex -1
|
|
|
|
if recordIndex != index :
|
|
raise DrmException("Parse Error : Invalid Record, index doesn't match")
|
|
|
|
if (self.bookHeaderRecords[name][index][2] > 0):
|
|
compressed = True
|
|
record = self.fo.read(self.bookHeaderRecords[name][index][2])
|
|
else:
|
|
record = self.fo.read(self.bookHeaderRecords[name][index][1])
|
|
|
|
if encrypted:
|
|
if self.bookKey:
|
|
ctx = topazCryptoInit(self.bookKey)
|
|
record = topazCryptoDecrypt(record,ctx)
|
|
else :
|
|
raise DrmException("Error: Attempt to decrypt without bookKey")
|
|
|
|
if compressed:
|
|
record = zlib.decompress(record)
|
|
|
|
return record
|
|
|
|
def processBook(self, pidlst):
|
|
raw = 0
|
|
fixedimage=True
|
|
try:
|
|
keydata = self.getBookPayloadRecord('dkey', 0)
|
|
except DrmException as e:
|
|
print(u"no dkey record found, book may not be encrypted")
|
|
print(u"attempting to extrct files without a book key")
|
|
self.createBookDirectory()
|
|
self.extractFiles()
|
|
print(u"Successfully Extracted Topaz contents")
|
|
if inCalibre:
|
|
from calibre_plugins.dedrm import genbook
|
|
else:
|
|
import genbook
|
|
|
|
rv = genbook.generateBook(self.outdir, raw, fixedimage)
|
|
if rv == 0:
|
|
print(u"Book Successfully generated.")
|
|
return rv
|
|
|
|
# try each pid to decode the file
|
|
bookKey = None
|
|
for pid in pidlst:
|
|
# use 8 digit pids here
|
|
pid = pid[0:8]
|
|
print(u"Trying: {0}".format(pid))
|
|
bookKeys = []
|
|
data = keydata
|
|
try:
|
|
bookKeys+=decryptDkeyRecords(data,pid)
|
|
except DrmException as e:
|
|
pass
|
|
else:
|
|
bookKey = bookKeys[0]
|
|
print(u"Book Key Found! ({0})".format(bookKey.encode('hex')))
|
|
break
|
|
|
|
if not bookKey:
|
|
raise DrmException(u"No key found in {0:d} keys tried. Read the FAQs at Harper's repository: https://github.com/apprenticeharper/DeDRM_tools/blob/master/FAQs.md".format(len(pidlst)))
|
|
|
|
self.setBookKey(bookKey)
|
|
self.createBookDirectory()
|
|
self.extractFiles()
|
|
print(u"Successfully Extracted Topaz contents")
|
|
if inCalibre:
|
|
from calibre_plugins.dedrm import genbook
|
|
else:
|
|
import genbook
|
|
|
|
rv = genbook.generateBook(self.outdir, raw, fixedimage)
|
|
if rv == 0:
|
|
print(u"Book Successfully generated")
|
|
return rv
|
|
|
|
def createBookDirectory(self):
|
|
outdir = self.outdir
|
|
# create output directory structure
|
|
if not os.path.exists(outdir):
|
|
os.makedirs(outdir)
|
|
destdir = os.path.join(outdir,u"img")
|
|
if not os.path.exists(destdir):
|
|
os.makedirs(destdir)
|
|
destdir = os.path.join(outdir,u"color_img")
|
|
if not os.path.exists(destdir):
|
|
os.makedirs(destdir)
|
|
destdir = os.path.join(outdir,u"page")
|
|
if not os.path.exists(destdir):
|
|
os.makedirs(destdir)
|
|
destdir = os.path.join(outdir,u"glyphs")
|
|
if not os.path.exists(destdir):
|
|
os.makedirs(destdir)
|
|
|
|
def extractFiles(self):
|
|
outdir = self.outdir
|
|
for headerRecord in self.bookHeaderRecords:
|
|
name = headerRecord
|
|
if name != 'dkey':
|
|
ext = u".dat"
|
|
if name == 'img': ext = u".jpg"
|
|
if name == 'color' : ext = u".jpg"
|
|
print(u"Processing Section: {0}\n. . .".format(name), end=' ')
|
|
for index in range (0,len(self.bookHeaderRecords[name])) :
|
|
fname = u"{0}{1:04d}{2}".format(name,index,ext)
|
|
destdir = outdir
|
|
if name == 'img':
|
|
destdir = os.path.join(outdir,u"img")
|
|
if name == 'color':
|
|
destdir = os.path.join(outdir,u"color_img")
|
|
if name == 'page':
|
|
destdir = os.path.join(outdir,u"page")
|
|
if name == 'glyphs':
|
|
destdir = os.path.join(outdir,u"glyphs")
|
|
outputFile = os.path.join(destdir,fname)
|
|
print(u".", end=' ')
|
|
record = self.getBookPayloadRecord(name,index)
|
|
if record != '':
|
|
open(outputFile, 'wb').write(record)
|
|
print(u" ")
|
|
|
|
def getFile(self, zipname):
|
|
htmlzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
|
|
htmlzip.write(os.path.join(self.outdir,u"book.html"),u"book.html")
|
|
htmlzip.write(os.path.join(self.outdir,u"book.opf"),u"book.opf")
|
|
if os.path.isfile(os.path.join(self.outdir,u"cover.jpg")):
|
|
htmlzip.write(os.path.join(self.outdir,u"cover.jpg"),u"cover.jpg")
|
|
htmlzip.write(os.path.join(self.outdir,u"style.css"),u"style.css")
|
|
zipUpDir(htmlzip, self.outdir, u"img")
|
|
htmlzip.close()
|
|
|
|
def getBookType(self):
|
|
return u"Topaz"
|
|
|
|
def getBookExtension(self):
|
|
return u".htmlz"
|
|
|
|
def getSVGZip(self, zipname):
|
|
svgzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
|
|
svgzip.write(os.path.join(self.outdir,u"index_svg.xhtml"),u"index_svg.xhtml")
|
|
zipUpDir(svgzip, self.outdir, u"svg")
|
|
zipUpDir(svgzip, self.outdir, u"img")
|
|
svgzip.close()
|
|
|
|
def cleanup(self):
|
|
if os.path.isdir(self.outdir):
|
|
shutil.rmtree(self.outdir, True)
|
|
|
|
def usage(progname):
|
|
print(u"Removes DRM protection from Topaz ebooks and extracts the contents")
|
|
print(u"Usage:")
|
|
print(u" {0} [-k <kindle.k4i>] [-p <comma separated PIDs>] [-s <comma separated Kindle serial numbers>] <infile> <outdir>".format(progname))
|
|
|
|
# Main
|
|
def cli_main():
|
|
argv=unicode_argv()
|
|
progname = os.path.basename(argv[0])
|
|
print(u"TopazExtract v{0}.".format(__version__))
|
|
|
|
try:
|
|
opts, args = getopt.getopt(argv[1:], "k:p:s:x")
|
|
except getopt.GetoptError as err:
|
|
print(u"Error in options or arguments: {0}".format(err.args[0]))
|
|
usage(progname)
|
|
return 1
|
|
if len(args)<2:
|
|
usage(progname)
|
|
return 1
|
|
|
|
infile = args[0]
|
|
outdir = args[1]
|
|
if not os.path.isfile(infile):
|
|
print(u"Input File {0} Does Not Exist.".format(infile))
|
|
return 1
|
|
|
|
if not os.path.exists(outdir):
|
|
print(u"Output Directory {0} Does Not Exist.".format(outdir))
|
|
return 1
|
|
|
|
kDatabaseFiles = []
|
|
serials = []
|
|
pids = []
|
|
|
|
for o, a in opts:
|
|
if o == '-k':
|
|
if a == None :
|
|
raise DrmException("Invalid parameter for -k")
|
|
kDatabaseFiles.append(a)
|
|
if o == '-p':
|
|
if a == None :
|
|
raise DrmException("Invalid parameter for -p")
|
|
pids = a.split(',')
|
|
if o == '-s':
|
|
if a == None :
|
|
raise DrmException("Invalid parameter for -s")
|
|
serials = [serial.replace(" ","") for serial in a.split(',')]
|
|
|
|
bookname = os.path.splitext(os.path.basename(infile))[0]
|
|
|
|
tb = TopazBook(infile)
|
|
title = tb.getBookTitle()
|
|
print(u"Processing Book: {0}".format(title))
|
|
md1, md2 = tb.getPIDMetaInfo()
|
|
pids.extend(kgenpids.getPidList(md1, md2, serials, kDatabaseFiles))
|
|
|
|
try:
|
|
print(u"Decrypting Book")
|
|
tb.processBook(pids)
|
|
|
|
print(u" Creating HTML ZIP Archive")
|
|
zipname = os.path.join(outdir, bookname + u"_nodrm.htmlz")
|
|
tb.getFile(zipname)
|
|
|
|
print(u" Creating SVG ZIP Archive")
|
|
zipname = os.path.join(outdir, bookname + u"_SVG.zip")
|
|
tb.getSVGZip(zipname)
|
|
|
|
# removing internal temporary directory of pieces
|
|
tb.cleanup()
|
|
|
|
except DrmException as e:
|
|
print(u"Decryption failed\n{0}".format(traceback.format_exc()))
|
|
|
|
try:
|
|
tb.cleanup()
|
|
except:
|
|
pass
|
|
return 1
|
|
|
|
except Exception as e:
|
|
print(u"Decryption failed\n{0}".format(traceback.format_exc()))
|
|
try:
|
|
tb.cleanup()
|
|
except:
|
|
pass
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.stdout=SafeUnbuffered(sys.stdout)
|
|
sys.stderr=SafeUnbuffered(sys.stderr)
|
|
sys.exit(cli_main())
|