mirror of
https://github.com/apprenticeharper/DeDRM_tools
synced 2024-12-28 09:58:56 +01:00
afa4ac5716
THIS IS ON THE MASTER BRANCH. The Master branch will be Python 3.0 from now on. While Python 2.7 support will not be deliberately broken, all efforts should now focus on Python 3.0 compatibility. I can see a lot of work has been done. There's more to do. I've bumped the version number of everything I came across to the next major number for Python 3.0 compatibility indication. Thanks everyone. I hope to update here at least once a week until we have a stable 7.0 release for calibre 5.0
361 lines
13 KiB
Python
361 lines
13 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
|
||
from __future__ import with_statement
|
||
|
||
# k4mobidedrm.py
|
||
# Copyright © 2008-2020 by Apprentice Harper et al.
|
||
|
||
__license__ = 'GPL v3'
|
||
__version__ = '6.0'
|
||
|
||
# Engine to remove drm from Kindle and Mobipocket ebooks
|
||
# for personal use for archiving and converting your ebooks
|
||
|
||
# PLEASE DO NOT PIRATE EBOOKS!
|
||
|
||
# We want all authors and publishers, and ebook stores to live
|
||
# long and prosperous lives but at the same time we just want to
|
||
# be able to read OUR books on whatever device we want and to keep
|
||
# readable for a long, long time
|
||
|
||
# This borrows very heavily from works by CMBDTC, IHeartCabbages, skindle,
|
||
# unswindle, DarkReverser, ApprenticeAlf, and many many others
|
||
|
||
# Special thanks to The Dark Reverser for MobiDeDrm and CMBDTC for cmbdtc_dump
|
||
# from which this script borrows most unashamedly.
|
||
|
||
# Changelog
|
||
# 1.0 - Name change to k4mobidedrm. Adds Mac support, Adds plugin code
|
||
# 1.1 - Adds support for additional kindle.info files
|
||
# 1.2 - Better error handling for older Mobipocket
|
||
# 1.3 - Don't try to decrypt Topaz books
|
||
# 1.7 - Add support for Topaz books and Kindle serial numbers. Split code.
|
||
# 1.9 - Tidy up after Topaz, minor exception changes
|
||
# 2.1 - Topaz fix and filename sanitizing
|
||
# 2.2 - Topaz Fix and minor Mac code fix
|
||
# 2.3 - More Topaz fixes
|
||
# 2.4 - K4PC/Mac key generation fix
|
||
# 2.6 - Better handling of non-K4PC/Mac ebooks
|
||
# 2.7 - Better trailing bytes handling in mobidedrm
|
||
# 2.8 - Moved parsing of kindle.info files to mac & pc util files.
|
||
# 3.1 - Updated for new calibre interface. Now __init__ in plugin.
|
||
# 3.5 - Now support Kindle for PC/Mac 1.6
|
||
# 3.6 - Even better trailing bytes handling in mobidedrm
|
||
# 3.7 - Add support for Amazon Print Replica ebooks.
|
||
# 3.8 - Improved Topaz support
|
||
# 4.1 - Improved Topaz support and faster decryption with alfcrypto
|
||
# 4.2 - Added support for Amazon's KF8 format ebooks
|
||
# 4.4 - Linux calls to Wine added, and improved configuration dialog
|
||
# 4.5 - Linux works again without Wine. Some Mac key file search changes
|
||
# 4.6 - First attempt to handle unicode properly
|
||
# 4.7 - Added timing reports, and changed search for Mac key files
|
||
# 4.8 - Much better unicode handling, matching the updated inept and ignoble scripts
|
||
# - Moved back into plugin, __init__ in plugin now only contains plugin code.
|
||
# 4.9 - Missed some invalid characters in cleanup_name
|
||
# 5.0 - Extraction of info from Kindle for PC/Mac moved into kindlekey.py
|
||
# - tweaked GetDecryptedBook interface to leave passed parameters unchanged
|
||
# 5.1 - moved unicode_argv call inside main for Windows DeDRM compatibility
|
||
# 5.2 - Fixed error in command line processing of unicode arguments
|
||
# 5.3 - Changed Android support to allow passing of backup .ab files
|
||
# 5.4 - Recognise KFX files masquerading as azw, even if we can't decrypt them yet.
|
||
# 5.5 - Added GPL v3 licence explicitly.
|
||
# 5.6 - Invoke KFXZipBook to handle zipped KFX files
|
||
# 5.7 - Revamp cleanup_name
|
||
# 6.0 - Added Python 3 compatibility for calibre 5.0
|
||
|
||
|
||
import sys, os, re
|
||
import csv
|
||
import getopt
|
||
import re
|
||
import traceback
|
||
import time
|
||
import html.entities
|
||
import json
|
||
|
||
class DrmException(Exception):
|
||
pass
|
||
|
||
if 'calibre' in sys.modules:
|
||
inCalibre = True
|
||
else:
|
||
inCalibre = False
|
||
|
||
if inCalibre:
|
||
from calibre_plugins.dedrm import mobidedrm
|
||
from calibre_plugins.dedrm import topazextract
|
||
from calibre_plugins.dedrm import kgenpids
|
||
from calibre_plugins.dedrm import androidkindlekey
|
||
from calibre_plugins.dedrm import kfxdedrm
|
||
else:
|
||
import mobidedrm
|
||
import topazextract
|
||
import kgenpids
|
||
import androidkindlekey
|
||
import kfxdedrm
|
||
|
||
# Wrap a stream so that output gets flushed immediately
|
||
# and also make sure that any unicode strings get
|
||
# encoded using "replace" before writing them.
|
||
class SafeUnbuffered:
|
||
def __init__(self, stream):
|
||
self.stream = stream
|
||
self.encoding = stream.encoding
|
||
if self.encoding == None:
|
||
self.encoding = "utf-8"
|
||
def write(self, data):
|
||
if isinstance(data,bytes):
|
||
data = data.encode(self.encoding,"replace")
|
||
self.stream.write(data)
|
||
self.stream.flush()
|
||
def __getattr__(self, attr):
|
||
return getattr(self.stream, attr)
|
||
|
||
iswindows = sys.platform.startswith('win')
|
||
isosx = sys.platform.startswith('darwin')
|
||
|
||
def unicode_argv():
|
||
if iswindows:
|
||
# Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
|
||
# strings.
|
||
|
||
# Versions 2.x of Python don't support Unicode in sys.argv on
|
||
# Windows, with the underlying Windows API instead replacing multi-byte
|
||
# characters with '?'.
|
||
|
||
|
||
from ctypes import POINTER, byref, cdll, c_int, windll
|
||
from ctypes.wintypes import LPCWSTR, LPWSTR
|
||
|
||
GetCommandLineW = cdll.kernel32.GetCommandLineW
|
||
GetCommandLineW.argtypes = []
|
||
GetCommandLineW.restype = LPCWSTR
|
||
|
||
CommandLineToArgvW = windll.shell32.CommandLineToArgvW
|
||
CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
|
||
CommandLineToArgvW.restype = POINTER(LPWSTR)
|
||
|
||
cmd = GetCommandLineW()
|
||
argc = c_int(0)
|
||
argv = CommandLineToArgvW(cmd, byref(argc))
|
||
if argc.value > 0:
|
||
# Remove Python executable and commands if present
|
||
start = argc.value - len(sys.argv)
|
||
return [argv[i] for i in
|
||
range(start, argc.value)]
|
||
# if we don't have any arguments at all, just pass back script name
|
||
# this should never happen
|
||
return [u"mobidedrm.py"]
|
||
else:
|
||
argvencoding = sys.stdin.encoding
|
||
if argvencoding == None:
|
||
argvencoding = "utf-8"
|
||
return argv
|
||
|
||
# cleanup unicode filenames
|
||
# borrowed from calibre from calibre/src/calibre/__init__.py
|
||
# added in removal of control (<32) chars
|
||
# and removal of . at start and end
|
||
# and with some (heavily edited) code from Paul Durrant's kindlenamer.py
|
||
# and some improvements suggested by jhaisley
|
||
def cleanup_name(name):
|
||
# substitute filename unfriendly characters
|
||
name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'").replace(u"*",u"_").replace(u"?",u"")
|
||
# white space to single space, delete leading and trailing while space
|
||
name = re.sub(r"\s", u" ", name).strip()
|
||
# delete control characters
|
||
name = u"".join(char for char in name if ord(char)>=32)
|
||
# delete non-ascii characters
|
||
name = u"".join(char for char in name if ord(char)<=126)
|
||
# remove leading dots
|
||
while len(name)>0 and name[0] == u".":
|
||
name = name[1:]
|
||
# remove trailing dots (Windows doesn't like them)
|
||
while name.endswith(u'.'):
|
||
name = name[:-1]
|
||
if len(name)==0:
|
||
name=u"DecryptedBook"
|
||
return name
|
||
|
||
# must be passed unicode
|
||
def unescape(text):
|
||
def fixup(m):
|
||
text = m.group(0)
|
||
if text[:2] == u"&#":
|
||
# character reference
|
||
try:
|
||
if text[:3] == u"&#x":
|
||
return chr(int(text[3:-1], 16))
|
||
else:
|
||
return chr(int(text[2:-1]))
|
||
except ValueError:
|
||
pass
|
||
else:
|
||
# named entity
|
||
try:
|
||
text = chr(htmlentitydefs.name2codepoint[text[1:-1]])
|
||
except KeyError:
|
||
pass
|
||
return text # leave as is
|
||
return re.sub(u"&#?\\w+;", fixup, text)
|
||
|
||
def GetDecryptedBook(infile, kDatabases, androidFiles, serials, pids, starttime = time.time()):
|
||
# handle the obvious cases at the beginning
|
||
if not os.path.isfile(infile):
|
||
raise DrmException(u"Input file does not exist.")
|
||
|
||
mobi = True
|
||
magic8 = open(infile,'rb').read(8)
|
||
if magic8 == '\xeaDRMION\xee':
|
||
raise DrmException(u"The .kfx DRMION file cannot be decrypted by itself. A .kfx-zip archive containing a DRM voucher is required.")
|
||
|
||
magic3 = magic8[:3]
|
||
if magic3 == 'TPZ':
|
||
mobi = False
|
||
|
||
if magic8[:4] == 'PK\x03\x04':
|
||
mb = kfxdedrm.KFXZipBook(infile)
|
||
elif mobi:
|
||
mb = mobidedrm.MobiBook(infile)
|
||
else:
|
||
mb = topazextract.TopazBook(infile)
|
||
|
||
bookname = unescape(mb.getBookTitle())
|
||
print(u"Decrypting {1} ebook: {0}".format(bookname, mb.getBookType()))
|
||
|
||
# copy list of pids
|
||
totalpids = list(pids)
|
||
# extend list of serials with serials from android databases
|
||
for aFile in androidFiles:
|
||
serials.extend(androidkindlekey.get_serials(aFile))
|
||
# extend PID list with book-specific PIDs from seriala and kDatabases
|
||
md1, md2 = mb.getPIDMetaInfo()
|
||
totalpids.extend(kgenpids.getPidList(md1, md2, serials, kDatabases))
|
||
# remove any duplicates
|
||
totalpids = list(set(totalpids))
|
||
print(u"Found {1:d} keys to try after {0:.1f} seconds".format(time.time()-starttime, len(totalpids)))
|
||
#print totalpids
|
||
|
||
try:
|
||
mb.processBook(totalpids)
|
||
except:
|
||
mb.cleanup
|
||
raise
|
||
|
||
print(u"Decryption succeeded after {0:.1f} seconds".format(time.time()-starttime))
|
||
return mb
|
||
|
||
|
||
# kDatabaseFiles is a list of files created by kindlekey
|
||
def decryptBook(infile, outdir, kDatabaseFiles, androidFiles, serials, pids):
|
||
starttime = time.time()
|
||
kDatabases = []
|
||
for dbfile in kDatabaseFiles:
|
||
kindleDatabase = {}
|
||
try:
|
||
with open(dbfile, 'r') as keyfilein:
|
||
kindleDatabase = json.loads(keyfilein.read())
|
||
kDatabases.append([dbfile,kindleDatabase])
|
||
except Exception as e:
|
||
print(u"Error getting database from file {0:s}: {1:s}".format(dbfile,e))
|
||
traceback.print_exc()
|
||
|
||
|
||
|
||
try:
|
||
book = GetDecryptedBook(infile, kDatabases, androidFiles, serials, pids, starttime)
|
||
except Exception as e:
|
||
print(u"Error decrypting book after {1:.1f} seconds: {0}".format(e.args[0],time.time()-starttime))
|
||
traceback.print_exc()
|
||
return 1
|
||
|
||
# Try to infer a reasonable name
|
||
orig_fn_root = os.path.splitext(os.path.basename(infile))[0]
|
||
if (
|
||
re.match('^B[A-Z0-9]{9}(_EBOK|_EBSP|_sample)?$', orig_fn_root) or
|
||
re.match('^{0-9A-F-}{36}$', orig_fn_root)
|
||
): # Kindle for PC / Mac / Android / Fire / iOS
|
||
clean_title = cleanup_name(book.getBookTitle())
|
||
outfilename = u'{}_{}'.format(orig_fn_root, clean_title)
|
||
else: # E Ink Kindle, which already uses a reasonable name
|
||
outfilename = orig_fn_root
|
||
|
||
# avoid excessively long file names
|
||
if len(outfilename)>150:
|
||
outfilename = outfilename[:99]+"--"+outfilename[-49:]
|
||
|
||
outfilename = outfilename+u"_nodrm"
|
||
outfile = os.path.join(outdir, outfilename + book.getBookExtension())
|
||
|
||
book.getFile(outfile)
|
||
print(u"Saved decrypted book {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename))
|
||
|
||
if book.getBookType()==u"Topaz":
|
||
zipname = os.path.join(outdir, outfilename + u"_SVG.zip")
|
||
book.getSVGZip(zipname)
|
||
print(u"Saved SVG ZIP Archive for {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename))
|
||
|
||
# remove internal temporary directory of Topaz pieces
|
||
book.cleanup()
|
||
return 0
|
||
|
||
|
||
def usage(progname):
|
||
print(u"Removes DRM protection from Mobipocket, Amazon KF8, Amazon Print Replica and Amazon Topaz ebooks")
|
||
print(u"Usage:")
|
||
print(u" {0} [-k <kindle.k4i>] [-p <comma separated PIDs>] [-s <comma separated Kindle serial numbers>] [ -a <AmazonSecureStorage.xml|backup.ab> ] <infile> <outdir>".format(progname))
|
||
|
||
#
|
||
# Main
|
||
#
|
||
def cli_main():
|
||
argv=unicode_argv()
|
||
progname = os.path.basename(argv[0])
|
||
print(u"K4MobiDeDrm v{0}.\nCopyright © 2008-2017 Apprentice Harper et al.".format(__version__))
|
||
|
||
try:
|
||
opts, args = getopt.getopt(argv[1:], "k:p:s:a:")
|
||
except getopt.GetoptError as err:
|
||
print(u"Error in options or arguments: {0}".format(err.args[0]))
|
||
usage(progname)
|
||
sys.exit(2)
|
||
if len(args)<2:
|
||
usage(progname)
|
||
sys.exit(2)
|
||
|
||
infile = args[0]
|
||
outdir = args[1]
|
||
kDatabaseFiles = []
|
||
androidFiles = []
|
||
serials = []
|
||
pids = []
|
||
|
||
for o, a in opts:
|
||
if o == "-k":
|
||
if a == None :
|
||
raise DrmException("Invalid parameter for -k")
|
||
kDatabaseFiles.append(a)
|
||
if o == "-p":
|
||
if a == None :
|
||
raise DrmException("Invalid parameter for -p")
|
||
pids = a.split(',')
|
||
if o == "-s":
|
||
if a == None :
|
||
raise DrmException("Invalid parameter for -s")
|
||
serials = a.split(',')
|
||
if o == '-a':
|
||
if a == None:
|
||
raise DrmException("Invalid parameter for -a")
|
||
androidFiles.append(a)
|
||
|
||
# try with built in Kindle Info files if not on Linux
|
||
k4 = not sys.platform.startswith('linux')
|
||
|
||
return decryptBook(infile, outdir, kDatabaseFiles, androidFiles, serials, pids)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
sys.stdout=SafeUnbuffered(sys.stdout)
|
||
sys.stderr=SafeUnbuffered(sys.stderr)
|
||
sys.exit(cli_main())
|