Add back Python2 support (ADEPT)

This commit is contained in:
NoDRM 2021-11-16 11:09:03 +01:00
parent 88dd1350c0
commit 90910ab106
21 changed files with 481 additions and 310 deletions

View file

@ -115,14 +115,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data,str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
try: try:
self.stream.buffer.write(data) buffer = getattr(self.stream, 'buffer', self.stream)
self.stream.buffer.flush() # self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except: except:
# We can do nothing if a write fails # We can do nothing if a write fails
pass raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -132,7 +135,8 @@ class DeDRM(FileTypePlugin):
supported_platforms = ['linux', 'osx', 'windows'] supported_platforms = ['linux', 'osx', 'windows']
author = "Apprentice Alf, Apprentice Harper, NoDRM, The Dark Reverser and i♥cabbages" author = "Apprentice Alf, Apprentice Harper, NoDRM, The Dark Reverser and i♥cabbages"
version = PLUGIN_VERSION_TUPLE version = PLUGIN_VERSION_TUPLE
minimum_calibre_version = (5, 0, 0) # Python 3. #minimum_calibre_version = (5, 0, 0) # Python 3.
minimum_calibre_version = (2, 0, 0) # Needs Calibre 1.0 minimum. 1.X untested.
file_types = set(['epub','pdf','pdb','prc','mobi','pobi','azw','azw1','azw3','azw4','azw8','tpz','kfx','kfx-zip']) file_types = set(['epub','pdf','pdb','prc','mobi','pobi','azw','azw1','azw3','azw4','azw8','tpz','kfx','kfx-zip'])
on_import = True on_import = True
on_preprocess = True on_preprocess = True
@ -151,6 +155,7 @@ class DeDRM(FileTypePlugin):
The extraction only happens once per version of the plugin The extraction only happens once per version of the plugin
Also perform upgrade of preferences once per version Also perform upgrade of preferences once per version
""" """
try: try:
self.pluginsdir = os.path.join(config_dir,"plugins") self.pluginsdir = os.path.join(config_dir,"plugins")
if not os.path.exists(self.pluginsdir): if not os.path.exists(self.pluginsdir):
@ -237,7 +242,7 @@ class DeDRM(FileTypePlugin):
fr.fix() fr.fix()
except Exception as e: except Exception as e:
print("{0} v{1}: Error \'{2}\' when checking zip archive".format(PLUGIN_NAME, PLUGIN_VERSION, e.args[0])) print("{0} v{1}: Error \'{2}\' when checking zip archive".format(PLUGIN_NAME, PLUGIN_VERSION, e.args[0]))
raise Exception(e) raise
# import the decryption keys # import the decryption keys
import calibre_plugins.dedrm.prefs as prefs import calibre_plugins.dedrm.prefs as prefs

View file

@ -51,11 +51,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -99,7 +105,7 @@ def unicode_argv():
return ["adobekey.py"] return ["adobekey.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
class ADEPTError(Exception): class ADEPTError(Exception):
pass pass

View file

@ -45,11 +45,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data,str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -93,7 +99,7 @@ def unicode_argv():
return ["kindlekey.py"] return ["kindlekey.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
class DrmException(Exception): class DrmException(Exception):
pass pass

View file

@ -41,7 +41,7 @@ def unicode_argv():
return ["DeDRM.py"] return ["DeDRM.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
def add_cp65001_codec(): def add_cp65001_codec():

View file

@ -15,11 +15,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)

View file

@ -66,11 +66,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -114,7 +120,7 @@ def unicode_argv():
return ["epubtest.py"] return ["epubtest.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
_FILENAME_LEN_OFFSET = 26 _FILENAME_LEN_OFFSET = 26
_EXTRA_LEN_OFFSET = 28 _EXTRA_LEN_OFFSET = 28

View file

@ -85,10 +85,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data,str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -129,7 +136,7 @@ def unicode_argv():
return ["mobidedrm.py"] return ["mobidedrm.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
Des = None Des = None
if iswindows: if iswindows:

View file

@ -14,11 +14,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)

View file

@ -52,10 +52,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data,str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -97,7 +104,7 @@ def unicode_argv():
return ["ineptepub.py"] return ["ineptepub.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
class IGNOBLEError(Exception): class IGNOBLEError(Exception):

View file

@ -37,11 +37,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -85,7 +91,7 @@ def unicode_argv():
return ["ignoblekey.py"] return ["ignoblekey.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
class DrmException(Exception): class DrmException(Exception):
pass pass

View file

@ -44,11 +44,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -92,7 +98,7 @@ def unicode_argv():
return ["ignoblekeyfetch.py"] return ["ignoblekeyfetch.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
class IGNOBLEError(Exception): class IGNOBLEError(Exception):

View file

@ -54,11 +54,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -102,7 +108,7 @@ def unicode_argv():
return ["ignoblekeygen.py"] return ["ignoblekeygen.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
class IGNOBLEError(Exception): class IGNOBLEError(Exception):

View file

@ -14,6 +14,7 @@
# Revision history: # Revision history:
# 0.1 - Initial alpha testing release 2020 by Pu D. Pud # 0.1 - Initial alpha testing release 2020 by Pu D. Pud
# 0.2 - Python 3 for calibre 5.0 (in testing) # 0.2 - Python 3 for calibre 5.0 (in testing)
# 0.3 - More Python3 fixes
""" """
@ -21,7 +22,7 @@ Decrypts Barnes & Noble encrypted PDF files.
""" """
__license__ = 'GPL v3' __license__ = 'GPL v3'
__version__ = "0.2" __version__ = "0.3"
import sys import sys
import os import os
@ -29,8 +30,9 @@ import re
import zlib import zlib
import struct import struct
import hashlib import hashlib
from decimal import * from io import BytesIO
from itertools import chain, islice from decimal import Decimal
import itertools
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
# Wrap a stream so that output gets flushed immediately # Wrap a stream so that output gets flushed immediately
@ -43,11 +45,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -86,7 +94,7 @@ def unicode_argv():
return ["ignoblepdf.py"] return ["ignoblepdf.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
class IGNOBLEError(Exception): class IGNOBLEError(Exception):
@ -236,7 +244,6 @@ def _load_crypto():
ARC4, AES = _load_crypto() ARC4, AES = _load_crypto()
from io import BytesIO
# Do we generate cross reference streams on output? # Do we generate cross reference streams on output?
@ -273,7 +280,7 @@ def nunpack(s, default=0):
elif l == 2: elif l == 2:
return struct.unpack('>H', s)[0] return struct.unpack('>H', s)[0]
elif l == 3: elif l == 3:
return struct.unpack('>L', '\x00'+s)[0] return struct.unpack('>L', bytes([0]) + s)[0]
elif l == 4: elif l == 4:
return struct.unpack('>L', s)[0] return struct.unpack('>L', s)[0]
else: else:
@ -324,7 +331,7 @@ class PSKeyword(PSObject):
Use PSKeywordTable.intern() instead. Use PSKeywordTable.intern() instead.
''' '''
def __init__(self, name): def __init__(self, name):
self.name = name self.name = name.decode('utf-8')
return return
def __repr__(self): def __repr__(self):
@ -354,12 +361,12 @@ PSLiteralTable = PSSymbolTable(PSLiteral)
PSKeywordTable = PSSymbolTable(PSKeyword) PSKeywordTable = PSSymbolTable(PSKeyword)
LIT = PSLiteralTable.intern LIT = PSLiteralTable.intern
KWD = PSKeywordTable.intern KWD = PSKeywordTable.intern
KEYWORD_BRACE_BEGIN = KWD('{') KEYWORD_BRACE_BEGIN = KWD(b'{')
KEYWORD_BRACE_END = KWD('}') KEYWORD_BRACE_END = KWD(b'}')
KEYWORD_ARRAY_BEGIN = KWD('[') KEYWORD_ARRAY_BEGIN = KWD(b'[')
KEYWORD_ARRAY_END = KWD(']') KEYWORD_ARRAY_END = KWD(b']')
KEYWORD_DICT_BEGIN = KWD('<<') KEYWORD_DICT_BEGIN = KWD(b'<<')
KEYWORD_DICT_END = KWD('>>') KEYWORD_DICT_END = KWD(b'>>')
def literal_name(x): def literal_name(x):
@ -381,18 +388,18 @@ def keyword_name(x):
## PSBaseParser ## PSBaseParser
## ##
EOL = re.compile(r'[\r\n]') EOL = re.compile(br'[\r\n]')
SPC = re.compile(r'\s') SPC = re.compile(br'\s')
NONSPC = re.compile(r'\S') NONSPC = re.compile(br'\S')
HEX = re.compile(r'[0-9a-fA-F]') HEX = re.compile(br'[0-9a-fA-F]')
END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]') END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]')
END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]') END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]')
HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.') HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.')
END_NUMBER = re.compile(r'[^0-9]') END_NUMBER = re.compile(br'[^0-9]')
END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]') END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]')
END_STRING = re.compile(r'[()\134]') END_STRING = re.compile(br'[()\\]')
OCT_STRING = re.compile(r'[0-7]') OCT_STRING = re.compile(br'[0-7]')
ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 } ESC_STRING = { b'b':8, b't':9, b'n':10, b'f':12, b'r':13, b'(':40, b')':41, b'\\':92 }
class PSBaseParser(object): class PSBaseParser(object):
@ -435,7 +442,7 @@ class PSBaseParser(object):
self.fp.seek(pos) self.fp.seek(pos)
# reset the status for nextline() # reset the status for nextline()
self.bufpos = pos self.bufpos = pos
self.buf = '' self.buf = b''
self.charpos = 0 self.charpos = 0
# reset the status for nexttoken() # reset the status for nexttoken()
self.parse1 = self.parse_main self.parse1 = self.parse_main
@ -457,32 +464,37 @@ class PSBaseParser(object):
if not m: if not m:
return (self.parse_main, len(s)) return (self.parse_main, len(s))
j = m.start(0) j = m.start(0)
c = s[j] if isinstance(s[j], str):
# Python 2
c = s[j]
else:
# Python 3
c = bytes([s[j]])
self.tokenstart = self.bufpos+j self.tokenstart = self.bufpos+j
if c == '%': if c == b'%':
self.token = '%' self.token = c
return (self.parse_comment, j+1) return (self.parse_comment, j+1)
if c == '/': if c == b'/':
self.token = '' self.token = b''
return (self.parse_literal, j+1) return (self.parse_literal, j+1)
if c in '-+' or c.isdigit(): if c in b'-+' or c.isdigit():
self.token = c self.token = c
return (self.parse_number, j+1) return (self.parse_number, j+1)
if c == '.': if c == b'.':
self.token = c self.token = c
return (self.parse_decimal, j+1) return (self.parse_decimal, j+1)
if c.isalpha(): if c.isalpha():
self.token = c self.token = c
return (self.parse_keyword, j+1) return (self.parse_keyword, j+1)
if c == '(': if c == b'(':
self.token = '' self.token = b''
self.paren = 1 self.paren = 1
return (self.parse_string, j+1) return (self.parse_string, j+1)
if c == '<': if c == b'<':
self.token = '' self.token = b''
return (self.parse_wopen, j+1) return (self.parse_wopen, j+1)
if c == '>': if c == b'>':
self.token = '' self.token = b''
return (self.parse_wclose, j+1) return (self.parse_wclose, j+1)
self.add_token(KWD(c)) self.add_token(KWD(c))
return (self.parse_main, j+1) return (self.parse_main, j+1)
@ -509,20 +521,26 @@ class PSBaseParser(object):
return (self.parse_literal, len(s)) return (self.parse_literal, len(s))
j = m.start(0) j = m.start(0)
self.token += s[i:j] self.token += s[i:j]
c = s[j] if isinstance(s[j], str):
if c == '#': c = s[j]
self.hex = '' else:
c = bytes([s[j]])
if c == b'#':
self.hex = b''
return (self.parse_literal_hex, j+1) return (self.parse_literal_hex, j+1)
self.add_token(LIT(self.token)) self.add_token(PSLiteralTable.intern(self.token))
return (self.parse_main, j) return (self.parse_main, j)
def parse_literal_hex(self, s, i): def parse_literal_hex(self, s, i):
c = s[i] if isinstance(s[i], str):
c = s[i]
else:
c = bytes([s[i]])
if HEX.match(c) and len(self.hex) < 2: if HEX.match(c) and len(self.hex) < 2:
self.hex += c self.hex += c
return (self.parse_literal_hex, i+1) return (self.parse_literal_hex, i+1)
if self.hex: if self.hex:
self.token += chr(int(self.hex, 16)) self.token += bytes([int(self.hex, 16)])
return (self.parse_literal, i) return (self.parse_literal, i)
def parse_number(self, s, i): def parse_number(self, s, i):
@ -532,8 +550,11 @@ class PSBaseParser(object):
return (self.parse_number, len(s)) return (self.parse_number, len(s))
j = m.start(0) j = m.start(0)
self.token += s[i:j] self.token += s[i:j]
c = s[j] if isinstance(s[j], str):
if c == '.': c = s[j]
else:
c = bytes([s[j]])
if c == b'.':
self.token += c self.token += c
return (self.parse_decimal, j+1) return (self.parse_decimal, j+1)
try: try:
@ -549,7 +570,7 @@ class PSBaseParser(object):
return (self.parse_decimal, len(s)) return (self.parse_decimal, len(s))
j = m.start(0) j = m.start(0)
self.token += s[i:j] self.token += s[i:j]
self.add_token(Decimal(self.token)) self.add_token(Decimal(self.token.decode('utf-8')))
return (self.parse_main, j) return (self.parse_main, j)
def parse_keyword(self, s, i): def parse_keyword(self, s, i):
@ -575,15 +596,18 @@ class PSBaseParser(object):
return (self.parse_string, len(s)) return (self.parse_string, len(s))
j = m.start(0) j = m.start(0)
self.token += s[i:j] self.token += s[i:j]
c = s[j] if isinstance(s[j], str):
if c == '\\': c = s[j]
else:
c = bytes([s[j]])
if c == b'\\':
self.oct = '' self.oct = ''
return (self.parse_string_1, j+1) return (self.parse_string_1, j+1)
if c == '(': if c == b'(':
self.paren += 1 self.paren += 1
self.token += c self.token += c
return (self.parse_string, j+1) return (self.parse_string, j+1)
if c == ')': if c == b')':
self.paren -= 1 self.paren -= 1
if self.paren: if self.paren:
self.token += c self.token += c
@ -591,42 +615,51 @@ class PSBaseParser(object):
self.add_token(self.token) self.add_token(self.token)
return (self.parse_main, j+1) return (self.parse_main, j+1)
def parse_string_1(self, s, i): def parse_string_1(self, s, i):
c = s[i] if isinstance(s[i], str):
c = s[i]
else:
c = bytes([s[i]])
if OCT_STRING.match(c) and len(self.oct) < 3: if OCT_STRING.match(c) and len(self.oct) < 3:
self.oct += c self.oct += c
return (self.parse_string_1, i+1) return (self.parse_string_1, i+1)
if self.oct: if self.oct:
self.token += chr(int(self.oct, 8)) self.token += bytes([int(self.oct, 8)])
return (self.parse_string, i) return (self.parse_string, i)
if c in ESC_STRING: if c in ESC_STRING:
self.token += chr(ESC_STRING[c]) self.token += bytes([ESC_STRING[c]])
return (self.parse_string, i+1) return (self.parse_string, i+1)
def parse_wopen(self, s, i): def parse_wopen(self, s, i):
c = s[i] if isinstance(s[i], str):
c = s[i]
else:
c = bytes([s[i]])
if c.isspace() or HEX.match(c): if c.isspace() or HEX.match(c):
return (self.parse_hexstring, i) return (self.parse_hexstring, i)
if c == '<': if c == b'<':
self.add_token(KEYWORD_DICT_BEGIN) self.add_token(KEYWORD_DICT_BEGIN)
i += 1 i += 1
return (self.parse_main, i) return (self.parse_main, i)
def parse_wclose(self, s, i): def parse_wclose(self, s, i):
c = s[i] if isinstance(s[i], str):
if c == '>': c = s[i]
else:
c = bytes([s[i]])
if c == b'>':
self.add_token(KEYWORD_DICT_END) self.add_token(KEYWORD_DICT_END)
i += 1 i += 1
return (self.parse_main, i) return (self.parse_main, i)
def parse_hexstring(self, s, i): def parse_hexstring(self, s, i):
m = END_HEX_STRING.search(s, i) m1 = END_HEX_STRING.search(s, i)
if not m: if not m1:
self.token += s[i:] self.token += s[i:]
return (self.parse_hexstring, len(s)) return (self.parse_hexstring, len(s))
j = m.start(0) j = m1.start(0)
self.token += s[i:j] self.token += s[i:j]
token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)), token = HEX_PAIR.sub(lambda m2: bytes([int(m2.group(0), 16)]),
SPC.sub('', self.token)) SPC.sub(b'', self.token))
self.add_token(token) self.add_token(token)
return (self.parse_main, j) return (self.parse_main, j)
@ -641,15 +674,15 @@ class PSBaseParser(object):
''' '''
Fetches a next line that ends either with \\r or \\n. Fetches a next line that ends either with \\r or \\n.
''' '''
linebuf = '' linebuf = b''
linepos = self.bufpos + self.charpos linepos = self.bufpos + self.charpos
eol = False eol = False
while 1: while 1:
self.fillbuf() self.fillbuf()
if eol: if eol:
c = self.buf[self.charpos] c = bytes([self.buf[self.charpos]])
# handle '\r\n' # handle '\r\n'
if c == '\n': if c == b'\n':
linebuf += c linebuf += c
self.charpos += 1 self.charpos += 1
break break
@ -657,7 +690,7 @@ class PSBaseParser(object):
if m: if m:
linebuf += self.buf[self.charpos:m.end(0)] linebuf += self.buf[self.charpos:m.end(0)]
self.charpos = m.end(0) self.charpos = m.end(0)
if linebuf[-1] == '\r': if bytes([linebuf[-1]]) == b'\r':
eol = True eol = True
else: else:
break break
@ -673,7 +706,7 @@ class PSBaseParser(object):
''' '''
self.fp.seek(0, 2) self.fp.seek(0, 2)
pos = self.fp.tell() pos = self.fp.tell()
buf = '' buf = b''
while 0 < pos: while 0 < pos:
prevpos = pos prevpos = pos
pos = max(0, pos-self.BUFSIZ) pos = max(0, pos-self.BUFSIZ)
@ -681,13 +714,13 @@ class PSBaseParser(object):
s = self.fp.read(prevpos-pos) s = self.fp.read(prevpos-pos)
if not s: break if not s: break
while 1: while 1:
n = max(s.rfind('\r'), s.rfind('\n')) n = max(s.rfind(b'\r'), s.rfind(b'\n'))
if n == -1: if n == -1:
buf = s + buf buf = s + buf
break break
yield s[n:]+buf yield s[n:]+buf
s = s[:n] s = s[:n]
buf = '' buf = b''
return return
@ -743,7 +776,7 @@ class PSStackParser(PSBaseParser):
def nextobject(self, direct=False): def nextobject(self, direct=False):
''' '''
Yields a list of objects: keywords, literals, strings, Yields a list of objects: keywords, literals, strings (byte arrays),
numbers, arrays and dictionaries. Arrays and dictionaries numbers, arrays and dictionaries. Arrays and dictionaries
are represented as Python sequence and dictionaries. are represented as Python sequence and dictionaries.
''' '''
@ -753,6 +786,8 @@ class PSStackParser(PSBaseParser):
if (isinstance(token, int) or if (isinstance(token, int) or
isinstance(token, Decimal) or isinstance(token, Decimal) or
isinstance(token, bool) or isinstance(token, bool) or
isinstance(token, bytearray) or
isinstance(token, bytes) or
isinstance(token, str) or isinstance(token, str) or
isinstance(token, PSLiteral)): isinstance(token, PSLiteral)):
# normal token # normal token
@ -796,10 +831,10 @@ class PSStackParser(PSBaseParser):
return obj return obj
LITERAL_CRYPT = PSLiteralTable.intern('Crypt') LITERAL_CRYPT = PSLiteralTable.intern(b'Crypt')
LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl')) LITERALS_FLATE_DECODE = (PSLiteralTable.intern(b'FlateDecode'), PSLiteralTable.intern(b'Fl'))
LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW')) LITERALS_LZW_DECODE = (PSLiteralTable.intern(b'LZWDecode'), PSLiteralTable.intern(b'LZW'))
LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85')) LITERALS_ASCII85_DECODE = (PSLiteralTable.intern(b'ASCII85Decode'), PSLiteralTable.intern(b'A85'))
## PDF Objects ## PDF Objects
@ -853,7 +888,7 @@ def resolve_all(x):
if isinstance(x, list): if isinstance(x, list):
x = [ resolve_all(v) for v in x ] x = [ resolve_all(v) for v in x ]
elif isinstance(x, dict): elif isinstance(x, dict):
for (k,v) in x.iteritems(): for (k,v) in iter(x.items()):
x[k] = resolve_all(v) x[k] = resolve_all(v)
return x return x
@ -861,13 +896,13 @@ def decipher_all(decipher, objid, genno, x):
''' '''
Recursively decipher X. Recursively decipher X.
''' '''
if isinstance(x, str): if isinstance(x, bytearray) or isinstance(x,bytes) or isinstance(x,str):
return decipher(objid, genno, x) return decipher(objid, genno, x)
decf = lambda v: decipher_all(decipher, objid, genno, v) decf = lambda v: decipher_all(decipher, objid, genno, v)
if isinstance(x, list): if isinstance(x, list):
x = [decf(v) for v in x] x = [decf(v) for v in x]
elif isinstance(x, dict): elif isinstance(x, dict):
x = dict((k, decf(v)) for (k, v) in x.iteritems()) x = dict((k, decf(v)) for (k, v) in iter(x.items()))
return x return x
@ -898,7 +933,7 @@ def num_value(x):
def str_value(x): def str_value(x):
x = resolve1(x) x = resolve1(x)
if not isinstance(x, str): if not (isinstance(x, bytearray) or isinstance(x, bytes) or isinstance(x, str)):
if STRICT: if STRICT:
raise PDFTypeError('String required: %r' % x) raise PDFTypeError('String required: %r' % x)
return '' return ''
@ -931,18 +966,18 @@ def stream_value(x):
# ascii85decode(data) # ascii85decode(data)
def ascii85decode(data): def ascii85decode(data):
n = b = 0 n = b = 0
out = '' out = b''
for c in data: for c in data:
if '!' <= c and c <= 'u': if b'!' <= c and c <= b'u':
n += 1 n += 1
b = b*85+(ord(c)-33) b = b*85+(c-33)
if n == 5: if n == 5:
out += struct.pack('>L',b) out += struct.pack('>L',b)
n = b = 0 n = b = 0
elif c == 'z': elif c == b'z':
assert n == 0 assert n == 0
out += '\0\0\0\0' out += b'\0\0\0\0'
elif c == '~': elif c == b'~':
if n: if n:
for _ in range(5-n): for _ in range(5-n):
b = b*85+84 b = b*85+84
@ -963,7 +998,7 @@ class PDFStream(PDFObject):
cutdiv = len(rawdata) // 16 cutdiv = len(rawdata) // 16
rawdata = rawdata[:16*cutdiv] rawdata = rawdata[:16*cutdiv]
else: else:
if eol in ('\r', '\n', '\r\n'): if eol in (b'\r', b'\n', b'\r\n'):
rawdata = rawdata[:length] rawdata = rawdata[:length]
self.dic = dic self.dic = dic
@ -1009,7 +1044,7 @@ class PDFStream(PDFObject):
# will get errors if the document is encrypted. # will get errors if the document is encrypted.
data = zlib.decompress(data) data = zlib.decompress(data)
elif f in LITERALS_LZW_DECODE: elif f in LITERALS_LZW_DECODE:
data = ''.join(LZWDecoder(BytesIO(data)).run()) data = b''.join(LZWDecoder(BytesIO(data)).run())
elif f in LITERALS_ASCII85_DECODE: elif f in LITERALS_ASCII85_DECODE:
data = ascii85decode(data) data = ascii85decode(data)
elif f == LITERAL_CRYPT: elif f == LITERAL_CRYPT:
@ -1031,14 +1066,14 @@ class PDFStream(PDFObject):
raise PDFValueError( raise PDFValueError(
'Columns undefined for predictor=12') 'Columns undefined for predictor=12')
columns = int_value(params['Columns']) columns = int_value(params['Columns'])
buf = '' buf = b''
ent0 = '\x00' * columns ent0 = b'\x00' * columns
for i in range(0, len(data), columns+1): for i in range(0, len(data), columns+1):
pred = data[i] pred = data[i]
ent1 = data[i+1:i+1+columns] ent1 = data[i+1:i+1+columns]
if pred == '\x02': if pred == 2:
ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \ ent1 = b''.join(bytes([(a+b) & 255]) \
for (a,b) in zip(ent0,ent1)) for (a,b) in zip(ent0,ent1))
buf += ent1 buf += ent1
ent0 = ent1 ent0 = ent1
data = buf data = buf
@ -1072,11 +1107,11 @@ class PDFEncryptionError(PDFException): pass
class PDFPasswordIncorrect(PDFEncryptionError): pass class PDFPasswordIncorrect(PDFEncryptionError): pass
# some predefined literals and keywords. # some predefined literals and keywords.
LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm') LITERAL_OBJSTM = PSLiteralTable.intern(b'ObjStm')
LITERAL_XREF = PSLiteralTable.intern('XRef') LITERAL_XREF = PSLiteralTable.intern(b'XRef')
LITERAL_PAGE = PSLiteralTable.intern('Page') LITERAL_PAGE = PSLiteralTable.intern(b'Page')
LITERAL_PAGES = PSLiteralTable.intern('Pages') LITERAL_PAGES = PSLiteralTable.intern(b'Pages')
LITERAL_CATALOG = PSLiteralTable.intern('Catalog') LITERAL_CATALOG = PSLiteralTable.intern(b'Catalog')
## XRefs ## XRefs
@ -1094,7 +1129,7 @@ class PDFXRef(object):
return '<PDFXRef: objs=%d>' % len(self.offsets) return '<PDFXRef: objs=%d>' % len(self.offsets)
def objids(self): def objids(self):
return self.offsets.iterkeys() return iter(self.offsets.keys())
def load(self, parser): def load(self, parser):
self.offsets = {} self.offsets = {}
@ -1105,10 +1140,10 @@ class PDFXRef(object):
raise PDFNoValidXRef('Unexpected EOF - file corrupted?') raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
if not line: if not line:
raise PDFNoValidXRef('Premature eof: %r' % parser) raise PDFNoValidXRef('Premature eof: %r' % parser)
if line.startswith('trailer'): if line.startswith(b'trailer'):
parser.seek(pos) parser.seek(pos)
break break
f = line.strip().split(' ') f = line.strip().split(b' ')
if len(f) != 2: if len(f) != 2:
raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line)) raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line))
try: try:
@ -1120,16 +1155,17 @@ class PDFXRef(object):
(_, line) = parser.nextline() (_, line) = parser.nextline()
except PSEOF: except PSEOF:
raise PDFNoValidXRef('Unexpected EOF - file corrupted?') raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
f = line.strip().split(' ') f = line.strip().split(b' ')
if len(f) != 3: if len(f) != 3:
raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line)) raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
(pos, genno, use) = f (pos, genno, use) = f
if use != 'n': continue if use != b'n':
self.offsets[objid] = (int(genno), int(pos)) continue
self.offsets[objid] = (int(genno.decode('utf-8')), int(pos.decode('utf-8')))
self.load_trailer(parser) self.load_trailer(parser)
return return
KEYWORD_TRAILER = PSKeywordTable.intern('trailer') KEYWORD_TRAILER = PSKeywordTable.intern(b'trailer')
def load_trailer(self, parser): def load_trailer(self, parser):
try: try:
(_,kwd) = parser.nexttoken() (_,kwd) = parser.nexttoken()
@ -1180,8 +1216,8 @@ class PDFXRefStream(object):
raise PDFNoValidXRef('Invalid PDF stream spec.') raise PDFNoValidXRef('Invalid PDF stream spec.')
size = stream.dic['Size'] size = stream.dic['Size']
index = stream.dic.get('Index', (0,size)) index = stream.dic.get('Index', (0,size))
self.index = zip(islice(index, 0, None, 2), self.index = list(zip(itertools.islice(index, 0, None, 2),
islice(index, 1, None, 2)) itertools.islice(index, 1, None, 2)))
(self.fl1, self.fl2, self.fl3) = stream.dic['W'] (self.fl1, self.fl2, self.fl3) = stream.dic['W']
self.data = stream.get_data() self.data = stream.get_data()
self.entlen = self.fl1+self.fl2+self.fl3 self.entlen = self.fl1+self.fl2+self.fl3
@ -1234,7 +1270,8 @@ class PDFDocument(object):
# set_parser(parser) # set_parser(parser)
# Associates the document with an (already initialized) parser object. # Associates the document with an (already initialized) parser object.
def set_parser(self, parser): def set_parser(self, parser):
if self.parser: return if self.parser:
return
self.parser = parser self.parser = parser
# The document is set to be temporarily ready during collecting # The document is set to be temporarily ready during collecting
# all the basic information about the document, e.g. # all the basic information about the document, e.g.
@ -1256,13 +1293,13 @@ class PDFDocument(object):
dict_value(trailer['Encrypt'])) dict_value(trailer['Encrypt']))
# fix for bad files # fix for bad files
except: except:
self.encryption = ('ffffffffffffffffffffffffffffffffffff', self.encryption = (b'ffffffffffffffffffffffffffffffffffff',
dict_value(trailer['Encrypt'])) dict_value(trailer['Encrypt']))
if 'Root' in trailer: if 'Root' in trailer:
self.set_root(dict_value(trailer['Root'])) self.set_root(dict_value(trailer['Root']))
break break
else: else:
raise PDFSyntaxError('No /Root object! - Is this really a PDF?') raise PDFSyntaxError('No /Root object! - Is this really a PDF?')
# The document is set to be non-ready again, until all the # The document is set to be non-ready again, until all the
# proper initialization (asking the password key and # proper initialization (asking the password key and
# verifying the access permission, so on) is finished. # verifying the access permission, so on) is finished.
@ -1283,7 +1320,7 @@ class PDFDocument(object):
# Perform the initialization with a given password. # Perform the initialization with a given password.
# This step is mandatory even if there's no password associated # This step is mandatory even if there's no password associated
# with the document. # with the document.
def initialize(self, password=''): def initialize(self, password=b''):
if not self.encryption: if not self.encryption:
self.is_printable = self.is_modifiable = self.is_extractable = True self.is_printable = self.is_modifiable = self.is_extractable = True
self.ready = True self.ready = True
@ -1310,14 +1347,14 @@ class PDFDocument(object):
def genkey_adobe_ps(self, param): def genkey_adobe_ps(self, param):
# nice little offline principal keys dictionary # nice little offline principal keys dictionary
# global static principal key for German Onleihe / Bibliothek Digital # global static principal key for German Onleihe / Bibliothek Digital
principalkeys = { 'bibliothek-digital.de': 'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw='.decode('base64')} principalkeys = { b'bibliothek-digital.de': codecs.decode(b'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw=','base64')}
self.is_printable = self.is_modifiable = self.is_extractable = True self.is_printable = self.is_modifiable = self.is_extractable = True
length = int_value(param.get('Length', 0)) / 8 length = int_value(param.get('Length', 0)) // 8
edcdata = str_value(param.get('EDCData')).decode('base64') edcdata = str_value(param.get('EDCData')).decode('base64')
pdrllic = str_value(param.get('PDRLLic')).decode('base64') pdrllic = str_value(param.get('PDRLLic')).decode('base64')
pdrlpol = str_value(param.get('PDRLPol')).decode('base64') pdrlpol = str_value(param.get('PDRLPol')).decode('base64')
edclist = [] edclist = []
for pair in edcdata.split('\n'): for pair in edcdata.split(b'\n'):
edclist.append(pair) edclist.append(pair)
# principal key request # principal key request
for key in principalkeys: for key in principalkeys:
@ -1326,20 +1363,20 @@ class PDFDocument(object):
else: else:
raise IGNOBLEError('Cannot find principal key for this pdf') raise IGNOBLEError('Cannot find principal key for this pdf')
shakey = SHA256(principalkey) shakey = SHA256(principalkey)
ivector = 16 * chr(0) ivector = bytes(16)
plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64')) plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64'))
if plaintext[-16:] != 16 * chr(16): if plaintext[-16:] != bytearray(b'\0x10')*16:
raise IGNOBLEError('Offlinekey cannot be decrypted, aborting ...') raise IGNOBLEError('Offlinekey cannot be decrypted, aborting ...')
pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol) pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol)
if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16: if pdrlpol[-1] < 1 or pdrlpol[-1] > 16:
raise IGNOBLEError('Could not decrypt PDRLPol, aborting ...') raise IGNOBLEError('Could not decrypt PDRLPol, aborting ...')
else: else:
cutter = -1 * ord(pdrlpol[-1]) cutter = -1 * pdrlpol[-1]
pdrlpol = pdrlpol[:cutter] pdrlpol = pdrlpol[:cutter]
return plaintext[:16] return plaintext[:16]
PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \ PASSWORD_PADDING = b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \
'\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz' b'\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
# experimental aes pw support # experimental aes pw support
def initialize_standard(self, password, docid, param): def initialize_standard(self, password, docid, param):
# copy from a global variable # copy from a global variable
@ -1356,7 +1393,7 @@ class PDFDocument(object):
try: try:
EncMetadata = str_value(param['EncryptMetadata']) EncMetadata = str_value(param['EncryptMetadata'])
except: except:
EncMetadata = 'True' EncMetadata = b'True'
self.is_printable = bool(P & 4) self.is_printable = bool(P & 4)
self.is_modifiable = bool(P & 8) self.is_modifiable = bool(P & 8)
self.is_extractable = bool(P & 16) self.is_extractable = bool(P & 16)
@ -1373,12 +1410,12 @@ class PDFDocument(object):
hash.update(docid[0]) # 5 hash.update(docid[0]) # 5
# aes special handling if metadata isn't encrypted # aes special handling if metadata isn't encrypted
if EncMetadata == ('False' or 'false'): if EncMetadata == ('False' or 'false'):
hash.update('ffffffff'.decode('hex')) hash.update(codecs.decode(b'ffffffff','hex'))
if 5 <= R: if 5 <= R:
# 8 # 8
for _ in range(50): for _ in range(50):
hash = hashlib.md5(hash.digest()[:length/8]) hash = hashlib.md5(hash.digest()[:length//8])
key = hash.digest()[:length/8] key = hash.digest()[:length//8]
if R == 2: if R == 2:
# Algorithm 3.4 # Algorithm 3.4
u1 = ARC4.new(key).decrypt(password) u1 = ARC4.new(key).decrypt(password)
@ -1388,7 +1425,7 @@ class PDFDocument(object):
hash.update(docid[0]) # 3 hash.update(docid[0]) # 3
x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4 x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4
for i in range(1,19+1): for i in range(1,19+1):
k = ''.join( chr(ord(c) ^ i) for c in key ) k = b''.join(bytes([c ^ i]) for c in key )
x = ARC4.new(k).decrypt(x) x = ARC4.new(k).decrypt(x)
u1 = x+x # 32bytes total u1 = x+x # 32bytes total
if R == 2: if R == 2:
@ -1410,9 +1447,9 @@ class PDFDocument(object):
if V != 4: if V != 4:
self.decipher = self.decipher_rc4 # XXX may be AES self.decipher = self.decipher_rc4 # XXX may be AES
# aes # aes
elif V == 4 and Length == 128: elif V == 4 and length == 128:
elf.decipher = self.decipher_aes elf.decipher = self.decipher_aes
elif V == 4 and Length == 256: elif V == 4 and length == 256:
raise PDFNotImplementedError('AES256 encryption is currently unsupported') raise PDFNotImplementedError('AES256 encryption is currently unsupported')
self.ready = True self.ready = True
return return
@ -1429,6 +1466,9 @@ class PDFDocument(object):
bookkey = ''.join(rights.findtext(expr)).decode('base64') bookkey = ''.join(rights.findtext(expr)).decode('base64')
bookkey = aes.decrypt(bookkey) bookkey = aes.decrypt(bookkey)
bookkey = bookkey[:-ord(bookkey[-1])] bookkey = bookkey[:-ord(bookkey[-1])]
# todo: Take a look at this.
# This seems to be the only function that's different between ignoblepdf and ineptpdf.
# A ton of useless duplicated code .....
bookkey = bookkey[-16:] bookkey = bookkey[-16:]
ebx_V = int_value(param.get('V', 4)) ebx_V = int_value(param.get('V', 4))
ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6)) ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6))
@ -1440,12 +1480,12 @@ class PDFDocument(object):
else: else:
V = 2 V = 2
elif len(bookkey) == length + 1: elif len(bookkey) == length + 1:
V = ord(bookkey[0]) V = bookkey[0]
bookkey = bookkey[1:] bookkey = bookkey[1:]
else: else:
print("ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type)) print("ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type))
print("length is %d and len(bookkey) is %d" % (length, len(bookkey))) print("length is %d and len(bookkey) is %d" % (length, len(bookkey)))
print("bookkey[0] is %d" % ord(bookkey[0])) print("bookkey[0] is %d" % bookkey[0])
raise IGNOBLEError('error decrypting book session key - mismatched length') raise IGNOBLEError('error decrypting book session key - mismatched length')
else: else:
# proper length unknown try with whatever you have # proper length unknown try with whatever you have
@ -1475,7 +1515,7 @@ class PDFDocument(object):
objid = struct.pack('<L', objid ^ 0x3569ac) objid = struct.pack('<L', objid ^ 0x3569ac)
genno = struct.pack('<L', genno ^ 0xca96) genno = struct.pack('<L', genno ^ 0xca96)
key = self.decrypt_key key = self.decrypt_key
key += objid[0] + genno[0] + objid[1] + genno[1] + objid[2] + 'sAlT' key += objid[0] + genno[0] + objid[1] + genno[1] + objid[2] + b'sAlT'
hash = hashlib.md5(key) hash = hashlib.md5(key)
key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)] key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
return key return key
@ -1484,7 +1524,7 @@ class PDFDocument(object):
def genkey_v4(self, objid, genno): def genkey_v4(self, objid, genno):
objid = struct.pack('<L', objid)[:3] objid = struct.pack('<L', objid)[:3]
genno = struct.pack('<L', genno)[:2] genno = struct.pack('<L', genno)[:2]
key = self.decrypt_key + objid + genno + 'sAlT' key = self.decrypt_key + objid + genno + b'sAlT'
hash = hashlib.md5(key) hash = hashlib.md5(key)
key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)] key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
return key return key
@ -1495,8 +1535,7 @@ class PDFDocument(object):
data = data[16:] data = data[16:]
plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data) plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
# remove pkcs#5 aes padding # remove pkcs#5 aes padding
cutter = -1 * ord(plaintext[-1]) cutter = -1 * plaintext[-1]
#print(cutter)
plaintext = plaintext[:cutter] plaintext = plaintext[:cutter]
return plaintext return plaintext
@ -1506,8 +1545,7 @@ class PDFDocument(object):
data = data[16:] data = data[16:]
plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data) plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
# remove pkcs#5 aes padding # remove pkcs#5 aes padding
cutter = -1 * ord(plaintext[-1]) cutter = -1 * plaintext[-1]
#print(cutter)
plaintext = plaintext[:cutter] plaintext = plaintext[:cutter]
return plaintext return plaintext
@ -1516,7 +1554,7 @@ class PDFDocument(object):
return ARC4.new(key).decrypt(data) return ARC4.new(key).decrypt(data)
KEYWORD_OBJ = PSKeywordTable.intern('obj') KEYWORD_OBJ = PSKeywordTable.intern(b'obj')
def getobj(self, objid): def getobj(self, objid):
if not self.ready: if not self.ready:
@ -1623,11 +1661,11 @@ class PDFParser(PSStackParser):
def __repr__(self): def __repr__(self):
return '<PDFParser>' return '<PDFParser>'
KEYWORD_R = PSKeywordTable.intern('R') KEYWORD_R = PSKeywordTable.intern(b'R')
KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj') KEYWORD_ENDOBJ = PSKeywordTable.intern(b'endobj')
KEYWORD_STREAM = PSKeywordTable.intern('stream') KEYWORD_STREAM = PSKeywordTable.intern(b'stream')
KEYWORD_XREF = PSKeywordTable.intern('xref') KEYWORD_XREF = PSKeywordTable.intern(b'xref')
KEYWORD_STARTXREF = PSKeywordTable.intern('startxref') KEYWORD_STARTXREF = PSKeywordTable.intern(b'startxref')
def do_keyword(self, pos, token): def do_keyword(self, pos, token):
if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
self.add_results(*self.pop(1)) self.add_results(*self.pop(1))
@ -1675,8 +1713,8 @@ class PDFParser(PSStackParser):
if STRICT: if STRICT:
raise PDFSyntaxError('Unexpected EOF') raise PDFSyntaxError('Unexpected EOF')
break break
if 'endstream' in line: if b'endstream' in line:
i = line.index('endstream') i = line.index(b'endstream')
objlen += i objlen += i
data += line[:i] data += line[:i]
break break
@ -1696,7 +1734,7 @@ class PDFParser(PSStackParser):
prev = None prev = None
for line in self.revreadlines(): for line in self.revreadlines():
line = line.strip() line = line.strip()
if line == 'startxref': break if line == b'startxref': break
if line: if line:
prev = line prev = line
else: else:
@ -1748,7 +1786,7 @@ class PDFParser(PSStackParser):
except PDFNoValidXRef: except PDFNoValidXRef:
# fallback # fallback
self.seek(0) self.seek(0)
pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b') pat = re.compile(b'^(\\d+)\\s+(\\d+)\\s+obj\\b')
offsets = {} offsets = {}
xref = PDFXRef() xref = PDFXRef()
while 1: while 1:
@ -1756,7 +1794,7 @@ class PDFParser(PSStackParser):
(pos, line) = self.nextline() (pos, line) = self.nextline()
except PSEOF: except PSEOF:
break break
if line.startswith('trailer'): if line.startswith(b'trailer'):
trailerpos = pos # remember last trailer trailerpos = pos # remember last trailer
m = pat.match(line) m = pat.match(line)
if not m: continue if not m: continue
@ -1783,7 +1821,7 @@ class PDFObjStrmParser(PDFParser):
self.add_results(*self.popall()) self.add_results(*self.popall())
return return
KEYWORD_R = KWD('R') KEYWORD_R = KWD(b'R')
def do_keyword(self, pos, token): def do_keyword(self, pos, token):
if token is self.KEYWORD_R: if token is self.KEYWORD_R:
# reference to indirect object # reference to indirect object
@ -1826,7 +1864,7 @@ class PDFSerializer(object):
def dump(self, outf): def dump(self, outf):
self.outf = outf self.outf = outf
self.write(self.version) self.write(self.version)
self.write('\n%\xe2\xe3\xcf\xd3\n') self.write(b'\n%\xe2\xe3\xcf\xd3\n')
doc = self.doc doc = self.doc
objids = self.objids objids = self.objids
xrefs = {} xrefs = {}
@ -1848,18 +1886,18 @@ class PDFSerializer(object):
startxref = self.tell() startxref = self.tell()
if not gen_xref_stm: if not gen_xref_stm:
self.write('xref\n') self.write(b'xref\n')
self.write('0 %d\n' % (maxobj + 1,)) self.write(b'0 %d\n' % (maxobj + 1,))
for objid in range(0, maxobj + 1): for objid in range(0, maxobj + 1):
if objid in xrefs: if objid in xrefs:
# force the genno to be 0 # force the genno to be 0
self.write("%010d 00000 n \n" % xrefs[objid][0]) self.write(b"%010d 00000 n \n" % xrefs[objid][0])
else: else:
self.write("%010d %05d f \n" % (0, 65535)) self.write(b"%010d %05d f \n" % (0, 65535))
self.write('trailer\n') self.write(b'trailer\n')
self.serialize_object(trailer) self.serialize_object(trailer)
self.write('\nstartxref\n%d\n%%%%EOF' % startxref) self.write(b'\nstartxref\n%d\n%%%%EOF' % startxref)
else: # Generate crossref stream. else: # Generate crossref stream.
@ -1908,7 +1946,7 @@ class PDFSerializer(object):
data.append(struct.pack('>L', f2)[-fl2:]) data.append(struct.pack('>L', f2)[-fl2:])
data.append(struct.pack('>L', f3)[-fl3:]) data.append(struct.pack('>L', f3)[-fl3:])
index.extend((first, prev - first + 1)) index.extend((first, prev - first + 1))
data = zlib.compress(''.join(data)) data = zlib.compress(b''.join(data))
dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index, dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
'W': [1, fl2, fl3], 'Length': len(data), 'W': [1, fl2, fl3], 'Length': len(data),
'Filter': LITERALS_FLATE_DECODE[0], 'Filter': LITERALS_FLATE_DECODE[0],
@ -1917,7 +1955,7 @@ class PDFSerializer(object):
dic['Info'] = trailer['Info'] dic['Info'] = trailer['Info']
xrefstm = PDFStream(dic, data) xrefstm = PDFStream(dic, data)
self.serialize_indirect(maxobj, xrefstm) self.serialize_indirect(maxobj, xrefstm)
self.write('startxref\n%d\n%%%%EOF' % startxref) self.write(b'startxref\n%d\n%%%%EOF' % startxref)
def write(self, data): def write(self, data):
self.outf.write(data) self.outf.write(data)
self.last = data[-1:] self.last = data[-1:]
@ -1926,13 +1964,10 @@ class PDFSerializer(object):
return self.outf.tell() return self.outf.tell()
def escape_string(self, string): def escape_string(self, string):
string = string.replace('\\', '\\\\') string = string.replace(b'\\', b'\\\\')
string = string.replace('\n', r'\n') string = string.replace(b'\n', b'\\n')
string = string.replace('(', r'\(') string = string.replace(b'(', b'\\(')
string = string.replace(')', r'\)') string = string.replace(b')', b'\\)')
# get rid of ciando id
regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}')
if regularexp.match(string): return ('http://www.ciando.com')
return string return string
def serialize_object(self, obj): def serialize_object(self, obj):
@ -1943,34 +1978,38 @@ class PDFSerializer(object):
obj['Subtype'] = obj['Type'] obj['Subtype'] = obj['Type']
del obj['Type'] del obj['Type']
# end - hope this doesn't have bad effects # end - hope this doesn't have bad effects
self.write('<<') self.write(b'<<')
for key, val in obj.items(): for key, val in obj.items():
self.write('/%s' % key) self.write(str(PSLiteralTable.intern(key.encode('utf-8'))).encode('utf-8'))
self.serialize_object(val) self.serialize_object(val)
self.write('>>') self.write(b'>>')
elif isinstance(obj, list): elif isinstance(obj, list):
self.write('[') self.write(b'[')
for val in obj: for val in obj:
self.serialize_object(val) self.serialize_object(val)
self.write(']') self.write(b']')
elif isinstance(obj, bytearray):
self.write(b'(%s)' % self.escape_string(obj))
elif isinstance(obj, bytes):
self.write(b'(%s)' % self.escape_string(obj))
elif isinstance(obj, str): elif isinstance(obj, str):
self.write('(%s)' % self.escape_string(obj)) self.write(b'(%s)' % self.escape_string(obj.encode('utf-8')))
elif isinstance(obj, bool): elif isinstance(obj, bool):
if self.last.isalnum(): if self.last.isalnum():
self.write(' ') self.write(b' ')
self.write(str(obj).lower()) self.write(str(obj).lower().encode('utf-8'))
elif isinstance(obj, (int, long)): elif isinstance(obj, (int, long)):
if self.last.isalnum(): if self.last.isalnum():
self.write(' ') self.write(b' ')
self.write(str(obj)) self.write(str(obj).encode('utf-8'))
elif isinstance(obj, Decimal): elif isinstance(obj, Decimal):
if self.last.isalnum(): if self.last.isalnum():
self.write(' ') self.write(b' ')
self.write(str(obj)) self.write(str(obj).encode('utf-8'))
elif isinstance(obj, PDFObjRef): elif isinstance(obj, PDFObjRef):
if self.last.isalnum(): if self.last.isalnum():
self.write(' ') self.write(b' ')
self.write('%d %d R' % (obj.objid, 0)) self.write(b'%d %d R' % (obj.objid, 0))
elif isinstance(obj, PDFStream): elif isinstance(obj, PDFStream):
### If we don't generate cross ref streams the object streams ### If we don't generate cross ref streams the object streams
### are no longer useful, as we have extracted all objects from ### are no longer useful, as we have extracted all objects from
@ -1980,21 +2019,21 @@ class PDFSerializer(object):
else: else:
data = obj.get_decdata() data = obj.get_decdata()
self.serialize_object(obj.dic) self.serialize_object(obj.dic)
self.write('stream\n') self.write(b'stream\n')
self.write(data) self.write(data)
self.write('\nendstream') self.write(b'\nendstream')
else: else:
data = str(obj) data = str(obj).encode('utf-8')
if data[0].isalnum() and self.last.isalnum(): if bytes([data[0]]).isalnum() and self.last.isalnum():
self.write(' ') self.write(b' ')
self.write(data) self.write(data)
def serialize_indirect(self, objid, obj): def serialize_indirect(self, objid, obj):
self.write('%d 0 obj' % (objid,)) self.write(b'%d 0 obj' % (objid,))
self.serialize_object(obj) self.serialize_object(obj)
if self.last.isalnum(): if self.last.isalnum():
self.write('\n') self.write(b'\n')
self.write('endobj\n') self.write(b'endobj\n')
@ -2003,12 +2042,7 @@ def decryptBook(userkey, inpath, outpath):
if AES is None: if AES is None:
raise IGNOBLEError("PyCrypto or OpenSSL must be installed.") raise IGNOBLEError("PyCrypto or OpenSSL must be installed.")
with open(inpath, 'rb') as inf: with open(inpath, 'rb') as inf:
#try:
serializer = PDFSerializer(inf, userkey) serializer = PDFSerializer(inf, userkey)
#except:
# print("Error serializing pdf {0}. Probably wrong key.".format(os.path.basename(inpath)))
# return 2
# hope this will fix the 'bad file descriptor' problem
with open(outpath, 'wb') as outf: with open(outpath, 'wb') as outf:
# help construct to make sure the method runs to the end # help construct to make sure the method runs to the end
try: try:

View file

@ -58,11 +58,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -104,7 +110,7 @@ def unicode_argv():
return ["ineptepub.py"] return ["ineptepub.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
class ADEPTError(Exception): class ADEPTError(Exception):

View file

@ -76,11 +76,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -119,7 +125,7 @@ def unicode_argv():
return ["ineptpdf.py"] return ["ineptpdf.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
class ADEPTError(Exception): class ADEPTError(Exception):
@ -553,17 +559,17 @@ def keyword_name(x):
## PSBaseParser ## PSBaseParser
## ##
EOL = re.compile(rb'[\r\n]') EOL = re.compile(br'[\r\n]')
SPC = re.compile(rb'\s') SPC = re.compile(br'\s')
NONSPC = re.compile(rb'\S') NONSPC = re.compile(br'\S')
HEX = re.compile(rb'[0-9a-fA-F]') HEX = re.compile(br'[0-9a-fA-F]')
END_LITERAL = re.compile(rb'[#/%\[\]()<>{}\s]') END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]')
END_HEX_STRING = re.compile(rb'[^\s0-9a-fA-F]') END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]')
HEX_PAIR = re.compile(rb'[0-9a-fA-F]{2}|.') HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.')
END_NUMBER = re.compile(rb'[^0-9]') END_NUMBER = re.compile(br'[^0-9]')
END_KEYWORD = re.compile(rb'[#/%\[\]()<>{}\s]') END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]')
END_STRING = re.compile(rb'[()\\]') END_STRING = re.compile(br'[()\\]')
OCT_STRING = re.compile(rb'[0-7]') OCT_STRING = re.compile(br'[0-7]')
ESC_STRING = { b'b':8, b't':9, b'n':10, b'f':12, b'r':13, b'(':40, b')':41, b'\\':92 } ESC_STRING = { b'b':8, b't':9, b'n':10, b'f':12, b'r':13, b'(':40, b')':41, b'\\':92 }
class PSBaseParser(object): class PSBaseParser(object):
@ -628,7 +634,12 @@ class PSBaseParser(object):
if not m: if not m:
return (self.parse_main, len(s)) return (self.parse_main, len(s))
j = m.start(0) j = m.start(0)
c = bytes([s[j]]) if isinstance(s[j], str):
# Python 2
c = s[j]
else:
# Python 3
c = bytes([s[j]])
self.tokenstart = self.bufpos+j self.tokenstart = self.bufpos+j
if c == b'%': if c == b'%':
self.token = c self.token = c
@ -680,7 +691,10 @@ class PSBaseParser(object):
return (self.parse_literal, len(s)) return (self.parse_literal, len(s))
j = m.start(0) j = m.start(0)
self.token += s[i:j] self.token += s[i:j]
c = bytes([s[j]]) if isinstance(s[j], str):
c = s[j]
else:
c = bytes([s[j]])
if c == b'#': if c == b'#':
self.hex = b'' self.hex = b''
return (self.parse_literal_hex, j+1) return (self.parse_literal_hex, j+1)
@ -688,7 +702,10 @@ class PSBaseParser(object):
return (self.parse_main, j) return (self.parse_main, j)
def parse_literal_hex(self, s, i): def parse_literal_hex(self, s, i):
c = bytes([s[i]]) if isinstance(s[i], str):
c = s[i]
else:
c = bytes([s[i]])
if HEX.match(c) and len(self.hex) < 2: if HEX.match(c) and len(self.hex) < 2:
self.hex += c self.hex += c
return (self.parse_literal_hex, i+1) return (self.parse_literal_hex, i+1)
@ -703,7 +720,10 @@ class PSBaseParser(object):
return (self.parse_number, len(s)) return (self.parse_number, len(s))
j = m.start(0) j = m.start(0)
self.token += s[i:j] self.token += s[i:j]
c = bytes([s[j]]) if isinstance(s[j], str):
c = s[j]
else:
c = bytes([s[j]])
if c == b'.': if c == b'.':
self.token += c self.token += c
return (self.parse_decimal, j+1) return (self.parse_decimal, j+1)
@ -746,7 +766,10 @@ class PSBaseParser(object):
return (self.parse_string, len(s)) return (self.parse_string, len(s))
j = m.start(0) j = m.start(0)
self.token += s[i:j] self.token += s[i:j]
c = bytes([s[j]]) if isinstance(s[j], str):
c = s[j]
else:
c = bytes([s[j]])
if c == b'\\': if c == b'\\':
self.oct = '' self.oct = ''
return (self.parse_string_1, j+1) return (self.parse_string_1, j+1)
@ -763,7 +786,10 @@ class PSBaseParser(object):
return (self.parse_main, j+1) return (self.parse_main, j+1)
def parse_string_1(self, s, i): def parse_string_1(self, s, i):
c = bytes([s[i]]) if isinstance(s[i], str):
c = s[i]
else:
c = bytes([s[i]])
if OCT_STRING.match(c) and len(self.oct) < 3: if OCT_STRING.match(c) and len(self.oct) < 3:
self.oct += c self.oct += c
return (self.parse_string_1, i+1) return (self.parse_string_1, i+1)
@ -775,7 +801,10 @@ class PSBaseParser(object):
return (self.parse_string, i+1) return (self.parse_string, i+1)
def parse_wopen(self, s, i): def parse_wopen(self, s, i):
c = bytes([s[i]]) if isinstance(s[i], str):
c = s[i]
else:
c = bytes([s[i]])
if c.isspace() or HEX.match(c): if c.isspace() or HEX.match(c):
return (self.parse_hexstring, i) return (self.parse_hexstring, i)
if c == b'<': if c == b'<':
@ -784,7 +813,10 @@ class PSBaseParser(object):
return (self.parse_main, i) return (self.parse_main, i)
def parse_wclose(self, s, i): def parse_wclose(self, s, i):
c = bytes([s[i]]) if isinstance(s[i], str):
c = s[i]
else:
c = bytes([s[i]])
if c == b'>': if c == b'>':
self.add_token(KEYWORD_DICT_END) self.add_token(KEYWORD_DICT_END)
i += 1 i += 1
@ -926,6 +958,7 @@ class PSStackParser(PSBaseParser):
isinstance(token, bool) or isinstance(token, bool) or
isinstance(token, bytearray) or isinstance(token, bytearray) or
isinstance(token, bytes) or isinstance(token, bytes) or
isinstance(token, str) or
isinstance(token, PSLiteral)): isinstance(token, PSLiteral)):
# normal token # normal token
self.push((pos, token)) self.push((pos, token))
@ -1033,7 +1066,7 @@ def decipher_all(decipher, objid, genno, x):
''' '''
Recursively decipher X. Recursively decipher X.
''' '''
if isinstance(x, bytearray) or isinstance(x,bytes): if isinstance(x, bytearray) or isinstance(x,bytes) or isinstance(x,str):
return decipher(objid, genno, x) return decipher(objid, genno, x)
decf = lambda v: decipher_all(decipher, objid, genno, v) decf = lambda v: decipher_all(decipher, objid, genno, v)
if isinstance(x, list): if isinstance(x, list):
@ -1070,7 +1103,7 @@ def num_value(x):
def str_value(x): def str_value(x):
x = resolve1(x) x = resolve1(x)
if not (isinstance(x, bytearray) or isinstance(x, bytes)): if not (isinstance(x, bytearray) or isinstance(x, bytes) or isinstance(x, str)):
if STRICT: if STRICT:
raise PDFTypeError('String required: %r' % x) raise PDFTypeError('String required: %r' % x)
return '' return ''
@ -1420,7 +1453,6 @@ class PDFDocument(object):
for xref in self.xrefs: for xref in self.xrefs:
trailer = xref.trailer trailer = xref.trailer
if not trailer: continue if not trailer: continue
# If there's an encryption info, remember it. # If there's an encryption info, remember it.
if 'Encrypt' in trailer: if 'Encrypt' in trailer:
#assert not self.encryption #assert not self.encryption
@ -1953,7 +1985,7 @@ class PDFParser(PSStackParser):
except PDFNoValidXRef: except PDFNoValidXRef:
# fallback # fallback
self.seek(0) self.seek(0)
pat = re.compile(rb'^(\d+)\s+(\d+)\s+obj\b') pat = re.compile(b'^(\\d+)\\s+(\\d+)\\s+obj\\b')
offsets = {} offsets = {}
xref = PDFXRef() xref = PDFXRef()
while 1: while 1:
@ -2158,9 +2190,9 @@ class PDFSerializer(object):
def escape_string(self, string): def escape_string(self, string):
string = string.replace(b'\\', b'\\\\') string = string.replace(b'\\', b'\\\\')
string = string.replace(b'\n', rb'\n') string = string.replace(b'\n', b'\\n')
string = string.replace(b'(', rb'\(') string = string.replace(b'(', b'\\(')
string = string.replace(b')', rb'\)') string = string.replace(b')', b'\\)')
return string return string
def serialize_object(self, obj): def serialize_object(self, obj):

View file

@ -103,11 +103,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -148,7 +154,7 @@ def unicode_argv():
return ["mobidedrm.py"] return ["mobidedrm.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
# cleanup unicode filenames # cleanup unicode filenames
# borrowed from calibre from calibre/src/calibre/__init__.py # borrowed from calibre from calibre/src/calibre/__init__.py

View file

@ -61,11 +61,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -109,7 +115,7 @@ def unicode_argv():
return ["kindlekey.py"] return ["kindlekey.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
class DrmException(Exception): class DrmException(Exception):
pass pass

View file

@ -26,11 +26,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -71,7 +77,7 @@ def unicode_argv():
return ["kindlepid.py"] return ["kindlepid.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
letters = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789' letters = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789'

View file

@ -94,11 +94,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -139,7 +145,7 @@ def unicode_argv():
return ["mobidedrm.py"] return ["mobidedrm.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
class DrmException(Exception): class DrmException(Exception):

View file

@ -32,11 +32,17 @@ class SafeUnbuffered:
if self.encoding == None: if self.encoding == None:
self.encoding = "utf-8" self.encoding = "utf-8"
def write(self, data): def write(self, data):
if isinstance(data, str): if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace") data = data.encode(self.encoding,"replace")
self.stream.buffer.write(data) try:
self.stream.buffer.flush() buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
@ -77,7 +83,7 @@ def unicode_argv():
return ["mobidedrm.py"] return ["mobidedrm.py"]
else: else:
argvencoding = sys.stdin.encoding or "utf-8" argvencoding = sys.stdin.encoding or "utf-8"
return [arg if isinstance(arg, str) else str(arg, argvencoding) for arg in sys.argv] return [arg if (isinstance(arg, str) or isinstance(arg,unicode)) else str(arg, argvencoding) for arg in sys.argv]
#global switch #global switch
debug = False debug = False

View file

@ -209,6 +209,7 @@ def _EndRecData(fpin):
fpin.seek(-sizeEndCentDir, 2) fpin.seek(-sizeEndCentDir, 2)
except IOError: except IOError:
return None return None
data = fpin.read() data = fpin.read()
if data[0:4] == stringEndArchive and data[-2:] == "\000\000": if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
# the signature is correct and there's no comment, unpack structure # the signature is correct and there's no comment, unpack structure
@ -662,7 +663,8 @@ class ZipFile:
self.comment = b'' self.comment = b''
# Check if we were passed a file-like object # Check if we were passed a file-like object
if isinstance(file, str): # "str" is python3, "unicode" is python2
if isinstance(file, str) or isinstance(file, unicode):
self._filePassed = 0 self._filePassed = 0
self.filename = file self.filename = file
modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}