2020-09-27 12:54:49 +02:00
#!/usr/bin/env python3
2013-10-02 20:59:40 +02:00
# -*- coding: utf-8 -*-
# topazextract.py
# Mostly written by some_updates based on code from many others
# Changelog
# 4.9 - moved unicode_argv call inside main for Windows DeDRM compatibility
# 5.0 - Fixed potential unicode problem with command line interface
2020-09-26 22:22:47 +02:00
# 6.0 - Added Python 3 compatibility for calibre 5.0
2013-10-02 20:59:40 +02:00
2019-06-24 18:49:38 +02:00
from __future__ import print_function
2020-09-26 22:22:47 +02:00
__version__ = ' 6.0 '
2013-10-02 20:59:40 +02:00
import sys
import os , csv , getopt
import zlib , zipfile , tempfile , shutil
import traceback
from struct import pack
from struct import unpack
2020-10-04 21:36:12 +02:00
try :
from calibre_plugins . dedrm . alfcrypto import Topaz_Cipher
except :
from alfcrypto import Topaz_Cipher
2013-10-02 20:59:40 +02:00
class SafeUnbuffered :
def __init__ ( self , stream ) :
self . stream = stream
self . encoding = stream . encoding
if self . encoding == None :
self . encoding = " utf-8 "
def write ( self , data ) :
2020-09-26 22:22:47 +02:00
if isinstance ( data , bytes ) :
2013-10-02 20:59:40 +02:00
data = data . encode ( self . encoding , " replace " )
self . stream . write ( data )
self . stream . flush ( )
def __getattr__ ( self , attr ) :
return getattr ( self . stream , attr )
iswindows = sys . platform . startswith ( ' win ' )
isosx = sys . platform . startswith ( ' darwin ' )
def unicode_argv ( ) :
if iswindows :
# Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
# strings.
# Versions 2.x of Python don't support Unicode in sys.argv on
# Windows, with the underlying Windows API instead replacing multi-byte
# characters with '?'.
from ctypes import POINTER , byref , cdll , c_int , windll
from ctypes . wintypes import LPCWSTR , LPWSTR
GetCommandLineW = cdll . kernel32 . GetCommandLineW
GetCommandLineW . argtypes = [ ]
GetCommandLineW . restype = LPCWSTR
CommandLineToArgvW = windll . shell32 . CommandLineToArgvW
CommandLineToArgvW . argtypes = [ LPCWSTR , POINTER ( c_int ) ]
CommandLineToArgvW . restype = POINTER ( LPWSTR )
cmd = GetCommandLineW ( )
argc = c_int ( 0 )
argv = CommandLineToArgvW ( cmd , byref ( argc ) )
if argc . value > 0 :
# Remove Python executable and commands if present
start = argc . value - len ( sys . argv )
return [ argv [ i ] for i in
2020-09-26 22:22:47 +02:00
range ( start , argc . value ) ]
2013-10-02 20:59:40 +02:00
# if we don't have any arguments at all, just pass back script name
# this should never happen
2020-09-27 12:54:49 +02:00
return [ " mobidedrm.py " ]
2013-10-02 20:59:40 +02:00
else :
2020-10-04 21:36:12 +02:00
argvencoding = sys . stdin . encoding or " utf-8 "
return [ arg if isinstance ( arg , str ) else str ( arg , argvencoding ) for arg in sys . argv ]
2013-10-02 20:59:40 +02:00
#global switch
debug = False
if ' calibre ' in sys . modules :
inCalibre = True
from calibre_plugins . dedrm import kgenpids
else :
inCalibre = False
import kgenpids
class DrmException ( Exception ) :
pass
# recursive zip creation support routine
def zipUpDir ( myzip , tdir , localname ) :
currentdir = tdir
if localname != u " " :
currentdir = os . path . join ( currentdir , localname )
list = os . listdir ( currentdir )
for file in list :
afilename = file
localfilePath = os . path . join ( localname , afilename )
realfilePath = os . path . join ( currentdir , file )
if os . path . isfile ( realfilePath ) :
myzip . write ( realfilePath , localfilePath )
elif os . path . isdir ( realfilePath ) :
zipUpDir ( myzip , tdir , localfilePath )
#
# Utility routines
#
# Get a 7 bit encoded number from file
def bookReadEncodedNumber ( fo ) :
flag = False
data = ord ( fo . read ( 1 ) )
if data == 0xFF :
flag = True
data = ord ( fo . read ( 1 ) )
if data > = 0x80 :
datax = ( data & 0x7F )
while data > = 0x80 :
data = ord ( fo . read ( 1 ) )
datax = ( datax << 7 ) + ( data & 0x7F )
data = datax
if flag :
data = - data
return data
# Get a length prefixed string from file
def bookReadString ( fo ) :
stringLength = bookReadEncodedNumber ( fo )
return unpack ( str ( stringLength ) + ' s ' , fo . read ( stringLength ) ) [ 0 ]
#
# crypto routines
#
# Context initialisation for the Topaz Crypto
def topazCryptoInit ( key ) :
return Topaz_Cipher ( ) . ctx_init ( key )
# ctx1 = 0x0CAFFE19E
# for keyChar in key:
# keyByte = ord(keyChar)
# ctx2 = ctx1
# ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
# return [ctx1,ctx2]
# decrypt data with the context prepared by topazCryptoInit()
def topazCryptoDecrypt ( data , ctx ) :
return Topaz_Cipher ( ) . decrypt ( data , ctx )
# ctx1 = ctx[0]
# ctx2 = ctx[1]
# plainText = ""
# for dataChar in data:
# dataByte = ord(dataChar)
# m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
# ctx2 = ctx1
# ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
# plainText += chr(m)
# return plainText
# Decrypt data with the PID
def decryptRecord ( data , PID ) :
ctx = topazCryptoInit ( PID )
return topazCryptoDecrypt ( data , ctx )
# Try to decrypt a dkey record (contains the bookPID)
def decryptDkeyRecord ( data , PID ) :
record = decryptRecord ( data , PID )
fields = unpack ( ' 3sB8sB8s3s ' , record )
if fields [ 0 ] != ' PID ' or fields [ 5 ] != ' pid ' :
2020-09-27 12:54:49 +02:00
raise DrmException ( " Didn ' t find PID magic numbers in record " )
2013-10-02 20:59:40 +02:00
elif fields [ 1 ] != 8 or fields [ 3 ] != 8 :
2020-09-27 12:54:49 +02:00
raise DrmException ( " Record didn ' t contain correct length fields " )
2013-10-02 20:59:40 +02:00
elif fields [ 2 ] != PID :
2020-09-27 12:54:49 +02:00
raise DrmException ( " Record didn ' t contain PID " )
2013-10-02 20:59:40 +02:00
return fields [ 4 ]
# Decrypt all dkey records (contain the book PID)
def decryptDkeyRecords ( data , PID ) :
nbKeyRecords = ord ( data [ 0 ] )
records = [ ]
data = data [ 1 : ]
for i in range ( 0 , nbKeyRecords ) :
length = ord ( data [ 0 ] )
try :
key = decryptDkeyRecord ( data [ 1 : length + 1 ] , PID )
records . append ( key )
except DrmException :
pass
data = data [ 1 + length : ]
if len ( records ) == 0 :
2020-09-27 12:54:49 +02:00
raise DrmException ( " BookKey Not Found " )
2013-10-02 20:59:40 +02:00
return records
class TopazBook :
def __init__ ( self , filename ) :
2020-09-26 22:22:47 +02:00
self . fo = open ( filename , ' rb ' )
2013-10-02 20:59:40 +02:00
self . outdir = tempfile . mkdtemp ( )
# self.outdir = 'rawdat'
self . bookPayloadOffset = 0
self . bookHeaderRecords = { }
self . bookMetadata = { }
self . bookKey = None
magic = unpack ( ' 4s ' , self . fo . read ( 4 ) ) [ 0 ]
if magic != ' TPZ0 ' :
2020-09-27 12:54:49 +02:00
raise DrmException ( " Parse Error : Invalid Header, not a Topaz file " )
2013-10-02 20:59:40 +02:00
self . parseTopazHeaders ( )
self . parseMetadata ( )
def parseTopazHeaders ( self ) :
def bookReadHeaderRecordData ( ) :
# Read and return the data of one header record at the current book file position
# [[offset,decompressedLength,compressedLength],...]
nbValues = bookReadEncodedNumber ( self . fo )
2019-06-24 18:49:38 +02:00
if debug : print ( " %d records in header " % nbValues , end = ' ' )
2013-10-02 20:59:40 +02:00
values = [ ]
for i in range ( 0 , nbValues ) :
values . append ( [ bookReadEncodedNumber ( self . fo ) , bookReadEncodedNumber ( self . fo ) , bookReadEncodedNumber ( self . fo ) ] )
return values
def parseTopazHeaderRecord ( ) :
# Read and parse one header record at the current book file position and return the associated data
# [[offset,decompressedLength,compressedLength],...]
if ord ( self . fo . read ( 1 ) ) != 0x63 :
2020-09-27 12:54:49 +02:00
raise DrmException ( " Parse Error : Invalid Header " )
2013-10-02 20:59:40 +02:00
tag = bookReadString ( self . fo )
record = bookReadHeaderRecordData ( )
return [ tag , record ]
nbRecords = bookReadEncodedNumber ( self . fo )
2019-06-24 18:49:38 +02:00
if debug : print ( " Headers: %d " % nbRecords )
2013-10-02 20:59:40 +02:00
for i in range ( 0 , nbRecords ) :
result = parseTopazHeaderRecord ( )
2019-06-24 18:49:38 +02:00
if debug : print ( result [ 0 ] , " : " , result [ 1 ] )
2013-10-02 20:59:40 +02:00
self . bookHeaderRecords [ result [ 0 ] ] = result [ 1 ]
if ord ( self . fo . read ( 1 ) ) != 0x64 :
2020-09-27 12:54:49 +02:00
raise DrmException ( " Parse Error : Invalid Header " )
2013-10-02 20:59:40 +02:00
self . bookPayloadOffset = self . fo . tell ( )
def parseMetadata ( self ) :
# Parse the metadata record from the book payload and return a list of [key,values]
self . fo . seek ( self . bookPayloadOffset + self . bookHeaderRecords [ ' metadata ' ] [ 0 ] [ 0 ] )
tag = bookReadString ( self . fo )
if tag != ' metadata ' :
2020-09-27 12:54:49 +02:00
raise DrmException ( " Parse Error : Record Names Don ' t Match " )
2013-10-02 20:59:40 +02:00
flags = ord ( self . fo . read ( 1 ) )
nbRecords = ord ( self . fo . read ( 1 ) )
2019-06-24 18:49:38 +02:00
if debug : print ( " Metadata Records: %d " % nbRecords )
2013-10-02 20:59:40 +02:00
for i in range ( 0 , nbRecords ) :
keyval = bookReadString ( self . fo )
content = bookReadString ( self . fo )
2019-06-24 18:49:38 +02:00
if debug : print ( keyval )
if debug : print ( content )
2013-10-02 20:59:40 +02:00
self . bookMetadata [ keyval ] = content
return self . bookMetadata
def getPIDMetaInfo ( self ) :
keysRecord = self . bookMetadata . get ( ' keys ' , ' ' )
keysRecordRecord = ' '
if keysRecord != ' ' :
keylst = keysRecord . split ( ' , ' )
for keyval in keylst :
keysRecordRecord + = self . bookMetadata . get ( keyval , ' ' )
return keysRecord , keysRecordRecord
def getBookTitle ( self ) :
title = ' '
if ' Title ' in self . bookMetadata :
title = self . bookMetadata [ ' Title ' ]
return title . decode ( ' utf-8 ' )
def setBookKey ( self , key ) :
self . bookKey = key
def getBookPayloadRecord ( self , name , index ) :
# Get a record in the book payload, given its name and index.
# decrypted and decompressed if necessary
encrypted = False
compressed = False
try :
recordOffset = self . bookHeaderRecords [ name ] [ index ] [ 0 ]
except :
raise DrmException ( " Parse Error : Invalid Record, record not found " )
self . fo . seek ( self . bookPayloadOffset + recordOffset )
tag = bookReadString ( self . fo )
if tag != name :
raise DrmException ( " Parse Error : Invalid Record, record name doesn ' t match " )
recordIndex = bookReadEncodedNumber ( self . fo )
if recordIndex < 0 :
encrypted = True
recordIndex = - recordIndex - 1
if recordIndex != index :
raise DrmException ( " Parse Error : Invalid Record, index doesn ' t match " )
if ( self . bookHeaderRecords [ name ] [ index ] [ 2 ] > 0 ) :
compressed = True
record = self . fo . read ( self . bookHeaderRecords [ name ] [ index ] [ 2 ] )
2012-11-20 14:28:12 +01:00
else :
2013-10-02 20:59:40 +02:00
record = self . fo . read ( self . bookHeaderRecords [ name ] [ index ] [ 1 ] )
if encrypted :
if self . bookKey :
ctx = topazCryptoInit ( self . bookKey )
record = topazCryptoDecrypt ( record , ctx )
else :
raise DrmException ( " Error: Attempt to decrypt without bookKey " )
if compressed :
record = zlib . decompress ( record )
return record
def processBook ( self , pidlst ) :
raw = 0
fixedimage = True
try :
keydata = self . getBookPayloadRecord ( ' dkey ' , 0 )
2020-09-26 22:22:47 +02:00
except DrmException as e :
2020-09-27 12:54:49 +02:00
print ( " no dkey record found, book may not be encrypted " )
print ( " attempting to extrct files without a book key " )
2013-10-02 20:59:40 +02:00
self . createBookDirectory ( )
self . extractFiles ( )
2020-09-27 12:54:49 +02:00
print ( " Successfully Extracted Topaz contents " )
2013-10-02 20:59:40 +02:00
if inCalibre :
from calibre_plugins . dedrm import genbook
else :
import genbook
rv = genbook . generateBook ( self . outdir , raw , fixedimage )
if rv == 0 :
2020-09-27 12:54:49 +02:00
print ( " Book Successfully generated. " )
2013-10-02 20:59:40 +02:00
return rv
# try each pid to decode the file
bookKey = None
for pid in pidlst :
# use 8 digit pids here
pid = pid [ 0 : 8 ]
2020-09-27 12:54:49 +02:00
print ( " Trying: {0} " . format ( pid ) )
2013-10-02 20:59:40 +02:00
bookKeys = [ ]
data = keydata
try :
bookKeys + = decryptDkeyRecords ( data , pid )
2020-09-26 22:22:47 +02:00
except DrmException as e :
2013-10-02 20:59:40 +02:00
pass
else :
bookKey = bookKeys [ 0 ]
2020-09-27 12:54:49 +02:00
print ( " Book Key Found! ( {0} ) " . format ( bookKey . encode ( ' hex ' ) ) )
2013-04-05 18:44:48 +02:00
break
2013-10-02 20:59:40 +02:00
if not bookKey :
2020-09-27 12:54:49 +02:00
raise DrmException ( " No key found in {0:d} keys tried. Read the FAQs at Harper ' s repository: https://github.com/apprenticeharper/DeDRM_tools/blob/master/FAQs.md " . format ( len ( pidlst ) ) )
2013-10-02 20:59:40 +02:00
self . setBookKey ( bookKey )
self . createBookDirectory ( )
2020-09-26 22:22:47 +02:00
self . extractFiles ( )
2020-09-27 12:54:49 +02:00
print ( " Successfully Extracted Topaz contents " )
2013-10-02 20:59:40 +02:00
if inCalibre :
from calibre_plugins . dedrm import genbook
else :
import genbook
rv = genbook . generateBook ( self . outdir , raw , fixedimage )
if rv == 0 :
2020-09-27 12:54:49 +02:00
print ( " Book Successfully generated " )
2013-10-02 20:59:40 +02:00
return rv
def createBookDirectory ( self ) :
outdir = self . outdir
# create output directory structure
if not os . path . exists ( outdir ) :
os . makedirs ( outdir )
2020-09-27 12:54:49 +02:00
destdir = os . path . join ( outdir , " img " )
2013-10-02 20:59:40 +02:00
if not os . path . exists ( destdir ) :
os . makedirs ( destdir )
2020-09-27 12:54:49 +02:00
destdir = os . path . join ( outdir , " color_img " )
2013-10-02 20:59:40 +02:00
if not os . path . exists ( destdir ) :
os . makedirs ( destdir )
2020-09-27 12:54:49 +02:00
destdir = os . path . join ( outdir , " page " )
2013-10-02 20:59:40 +02:00
if not os . path . exists ( destdir ) :
os . makedirs ( destdir )
2020-09-27 12:54:49 +02:00
destdir = os . path . join ( outdir , " glyphs " )
2013-10-02 20:59:40 +02:00
if not os . path . exists ( destdir ) :
os . makedirs ( destdir )
def extractFiles ( self ) :
outdir = self . outdir
for headerRecord in self . bookHeaderRecords :
name = headerRecord
if name != ' dkey ' :
2020-09-27 12:54:49 +02:00
ext = " .dat "
if name == ' img ' : ext = " .jpg "
if name == ' color ' : ext = " .jpg "
print ( " Processing Section: {0} \n . . . " . format ( name ) , end = ' ' )
2013-10-02 20:59:40 +02:00
for index in range ( 0 , len ( self . bookHeaderRecords [ name ] ) ) :
2020-09-27 12:54:49 +02:00
fname = " {0} {1:04d} {2} " . format ( name , index , ext )
2013-10-02 20:59:40 +02:00
destdir = outdir
if name == ' img ' :
2020-09-27 12:54:49 +02:00
destdir = os . path . join ( outdir , " img " )
2013-10-02 20:59:40 +02:00
if name == ' color ' :
2020-09-27 12:54:49 +02:00
destdir = os . path . join ( outdir , " color_img " )
2013-10-02 20:59:40 +02:00
if name == ' page ' :
2020-09-27 12:54:49 +02:00
destdir = os . path . join ( outdir , " page " )
2013-10-02 20:59:40 +02:00
if name == ' glyphs ' :
2020-09-27 12:54:49 +02:00
destdir = os . path . join ( outdir , " glyphs " )
2013-10-02 20:59:40 +02:00
outputFile = os . path . join ( destdir , fname )
2020-09-27 12:54:49 +02:00
print ( " . " , end = ' ' )
2013-10-02 20:59:40 +02:00
record = self . getBookPayloadRecord ( name , index )
if record != ' ' :
2020-09-26 22:22:47 +02:00
open ( outputFile , ' wb ' ) . write ( record )
2020-09-27 12:54:49 +02:00
print ( " " )
2013-10-02 20:59:40 +02:00
def getFile ( self , zipname ) :
htmlzip = zipfile . ZipFile ( zipname , ' w ' , zipfile . ZIP_DEFLATED , False )
2020-09-27 12:54:49 +02:00
htmlzip . write ( os . path . join ( self . outdir , " book.html " ) , " book.html " )
htmlzip . write ( os . path . join ( self . outdir , " book.opf " ) , " book.opf " )
if os . path . isfile ( os . path . join ( self . outdir , " cover.jpg " ) ) :
htmlzip . write ( os . path . join ( self . outdir , " cover.jpg " ) , " cover.jpg " )
htmlzip . write ( os . path . join ( self . outdir , " style.css " ) , " style.css " )
zipUpDir ( htmlzip , self . outdir , " img " )
2013-10-02 20:59:40 +02:00
htmlzip . close ( )
def getBookType ( self ) :
2020-09-27 12:54:49 +02:00
return " Topaz "
2013-10-02 20:59:40 +02:00
def getBookExtension ( self ) :
2020-09-27 12:54:49 +02:00
return " .htmlz "
2013-10-02 20:59:40 +02:00
def getSVGZip ( self , zipname ) :
svgzip = zipfile . ZipFile ( zipname , ' w ' , zipfile . ZIP_DEFLATED , False )
2020-09-27 12:54:49 +02:00
svgzip . write ( os . path . join ( self . outdir , " index_svg.xhtml " ) , " index_svg.xhtml " )
zipUpDir ( svgzip , self . outdir , " svg " )
zipUpDir ( svgzip , self . outdir , " img " )
2013-10-02 20:59:40 +02:00
svgzip . close ( )
def cleanup ( self ) :
if os . path . isdir ( self . outdir ) :
shutil . rmtree ( self . outdir , True )
def usage ( progname ) :
2020-09-27 12:54:49 +02:00
print ( " Removes DRM protection from Topaz ebooks and extracts the contents " )
print ( " Usage: " )
print ( " {0} [-k <kindle.k4i>] [-p <comma separated PIDs>] [-s <comma separated Kindle serial numbers>] <infile> <outdir> " . format ( progname ) )
2013-10-02 20:59:40 +02:00
# Main
def cli_main ( ) :
argv = unicode_argv ( )
progname = os . path . basename ( argv [ 0 ] )
2020-09-27 12:54:49 +02:00
print ( " TopazExtract v {0} . " . format ( __version__ ) )
2013-10-02 20:59:40 +02:00
try :
opts , args = getopt . getopt ( argv [ 1 : ] , " k:p:s:x " )
2020-09-26 22:22:47 +02:00
except getopt . GetoptError as err :
2020-09-27 12:54:49 +02:00
print ( " Error in options or arguments: {0} " . format ( err . args [ 0 ] ) )
2013-10-02 20:59:40 +02:00
usage ( progname )
return 1
if len ( args ) < 2 :
usage ( progname )
return 1
infile = args [ 0 ]
outdir = args [ 1 ]
if not os . path . isfile ( infile ) :
2020-09-27 12:54:49 +02:00
print ( " Input File {0} Does Not Exist. " . format ( infile ) )
2013-10-02 20:59:40 +02:00
return 1
if not os . path . exists ( outdir ) :
2020-09-27 12:54:49 +02:00
print ( " Output Directory {0} Does Not Exist. " . format ( outdir ) )
2013-10-02 20:59:40 +02:00
return 1
kDatabaseFiles = [ ]
serials = [ ]
pids = [ ]
for o , a in opts :
if o == ' -k ' :
if a == None :
raise DrmException ( " Invalid parameter for -k " )
kDatabaseFiles . append ( a )
if o == ' -p ' :
if a == None :
raise DrmException ( " Invalid parameter for -p " )
pids = a . split ( ' , ' )
if o == ' -s ' :
if a == None :
raise DrmException ( " Invalid parameter for -s " )
serials = [ serial . replace ( " " , " " ) for serial in a . split ( ' , ' ) ]
bookname = os . path . splitext ( os . path . basename ( infile ) ) [ 0 ]
tb = TopazBook ( infile )
title = tb . getBookTitle ( )
2020-09-27 12:54:49 +02:00
print ( " Processing Book: {0} " . format ( title ) )
2013-10-02 20:59:40 +02:00
md1 , md2 = tb . getPIDMetaInfo ( )
pids . extend ( kgenpids . getPidList ( md1 , md2 , serials , kDatabaseFiles ) )
try :
2020-09-27 12:54:49 +02:00
print ( " Decrypting Book " )
2013-10-02 20:59:40 +02:00
tb . processBook ( pids )
2020-09-27 12:54:49 +02:00
print ( " Creating HTML ZIP Archive " )
zipname = os . path . join ( outdir , bookname + " _nodrm.htmlz " )
2013-10-02 20:59:40 +02:00
tb . getFile ( zipname )
2020-09-27 12:54:49 +02:00
print ( " Creating SVG ZIP Archive " )
zipname = os . path . join ( outdir , bookname + " _SVG.zip " )
2013-10-02 20:59:40 +02:00
tb . getSVGZip ( zipname )
# removing internal temporary directory of pieces
tb . cleanup ( )
2020-09-26 22:22:47 +02:00
except DrmException as e :
2020-09-27 12:54:49 +02:00
print ( " Decryption failed \n {0} " . format ( traceback . format_exc ( ) ) )
2013-10-02 20:59:40 +02:00
try :
tb . cleanup ( )
except :
pass
return 1
2020-09-26 22:22:47 +02:00
except Exception as e :
2020-09-27 12:54:49 +02:00
print ( " Decryption failed \n {0} " . format ( traceback . format_exc ( ) ) )
2013-10-02 20:59:40 +02:00
try :
tb . cleanup ( )
except :
pass
return 1
return 0
if __name__ == ' __main__ ' :
sys . stdout = SafeUnbuffered ( sys . stdout )
sys . stderr = SafeUnbuffered ( sys . stderr )
sys . exit ( cli_main ( ) )