DeDRM_tools/Calibre_Plugins/ineptepub_plugin/zipfix.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import zlib
import zipfilerugged
import os
import os.path
import getopt
from struct import unpack


_FILENAME_LEN_OFFSET = 26
_EXTRA_LEN_OFFSET = 28
_FILENAME_OFFSET = 30
_MAX_SIZE = 64 * 1024
_MIMETYPE = 'application/epub+zip'

class ZipInfo(zipfilerugged.ZipInfo):
    def __init__(self, *args, **kwargs):
        if 'compress_type' in kwargs:
            compress_type = kwargs.pop('compress_type')
        super(ZipInfo, self).__init__(*args, **kwargs)
        self.compress_type = compress_type

class fixZip:
    def __init__(self, zinput, zoutput):
        self.ztype = 'zip'
        if zinput.lower().find('.epub') >= 0 :
            self.ztype = 'epub'
        self.inzip = zipfilerugged.ZipFile(zinput,'r')
        self.outzip = zipfilerugged.ZipFile(zoutput,'w')
        # open the input zip for reading only as a raw file
        self.bzf = file(zinput,'rb')

    def getlocalname(self, zi):
        local_header_offset = zi.header_offset
        self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)
        leninfo = self.bzf.read(2)
        local_name_length, = unpack('<H', leninfo)
        self.bzf.seek(local_header_offset + _FILENAME_OFFSET)
        local_name = self.bzf.read(local_name_length)
        return local_name

    def uncompress(self, cmpdata):
        dc = zlib.decompressobj(-15)
        data = ''
        while len(cmpdata) > 0:
            if len(cmpdata) > _MAX_SIZE :
                newdata = cmpdata[0:_MAX_SIZE]
                cmpdata = cmpdata[_MAX_SIZE:]
            else:
                newdata = cmpdata
                cmpdata = ''
            newdata = dc.decompress(newdata)
            unprocessed = dc.unconsumed_tail
            if len(unprocessed) == 0:
                newdata += dc.flush()
            data += newdata
            cmpdata += unprocessed
            unprocessed = ''
        return data

    def getfiledata(self, zi):
        # get file name length and exta data length to find start of file data
        local_header_offset = zi.header_offset

        self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)
        leninfo = self.bzf.read(2)
        local_name_length, = unpack('<H', leninfo)

        self.bzf.seek(local_header_offset + _EXTRA_LEN_OFFSET)
        exinfo = self.bzf.read(2)
        extra_field_length, = unpack('<H', exinfo)

        self.bzf.seek(local_header_offset + _FILENAME_OFFSET + local_name_length + extra_field_length)
        data = None

        # if not compressed we are good to go
        if zi.compress_type == zipfilerugged.ZIP_STORED:
            data = self.bzf.read(zi.file_size)

        # if compressed we must decompress it using zlib
        if zi.compress_type == zipfilerugged.ZIP_DEFLATED:
            cmpdata = self.bzf.read(zi.compress_size)
            data = self.uncompress(cmpdata)

        return data


    def fix(self):
        # get the zipinfo for each member of the input archive
        # and copy member over to output archive
        # if problems exist with local vs central filename, fix them

        # if epub write mimetype file first, with no compression
        if self.ztype == 'epub':
            nzinfo = ZipInfo('mimetype', compress_type=zipfilerugged.ZIP_STORED)
            self.outzip.writestr(nzinfo, _MIMETYPE)

        # write the rest of the files
        for zinfo in self.inzip.infolist():
            if zinfo.filename != "mimetype" or self.ztype == '.zip':
                data = None
                nzinfo = zinfo
                try:
                    data = self.inzip.read(zinfo.filename)
                except zipfilerugged.BadZipfile or zipfilerugged.error:
                    local_name = self.getlocalname(zinfo)
                    data = self.getfiledata(zinfo)
                    nzinfo.filename = local_name

                nzinfo.date_time = zinfo.date_time
                nzinfo.compress_type = zinfo.compress_type
                nzinfo.flag_bits = 0
                nzinfo.internal_attr = 0
                self.outzip.writestr(nzinfo,data)

        self.bzf.close()
        self.inzip.close()
        self.outzip.close()


def usage():
    print """usage: zipfix.py inputzip outputzip
     inputzip is the source zipfile to fix
     outputzip is the fixed zip archive
    """


def repairBook(infile, outfile):
    if not os.path.exists(infile):
        print "Error: Input Zip File does not exist"
        return 1
    try:
        fr = fixZip(infile, outfile)
        fr.fix()
        return 0
    except Exception, e:
        print "Error Occurred ", e
        return 2


def main(argv=sys.argv):
    if len(argv)!=3:
        usage()
        return 1
    infile = argv[1]
    outfile = argv[2]
    return repairBook(infile, outfile)


if __name__ == '__main__' :
    sys.exit(main())
tools v2.2 2010-11-11 23:11:36 +01:00			`#!/usr/bin/env python`
tools v5.5 Plugins now include unaltered stand-alone scripts, so no longer need to keep separate copies. 2012-12-19 14:48:11 +01:00			`# -- coding: utf-8 --`
tools v2.2 2010-11-11 23:11:36 +01:00
			`import sys`
			`import zlib`
tools v5.4 2012-11-07 14:14:25 +01:00			`import zipfilerugged`
tools v2.2 2010-11-11 23:11:36 +01:00			`import os`
			`import os.path`
			`import getopt`
			`from struct import unpack`


			`_FILENAME_LEN_OFFSET = 26`
			`_EXTRA_LEN_OFFSET = 28`
			`_FILENAME_OFFSET = 30`
			`_MAX_SIZE = 64 * 1024`
tools v3.4 2011-02-08 18:21:51 +01:00			`_MIMETYPE = 'application/epub+zip'`

tools v5.4 2012-11-07 14:14:25 +01:00			`class ZipInfo(zipfilerugged.ZipInfo):`
tools v3.4 2011-02-08 18:21:51 +01:00			`def __init__(self, args, *kwargs):`
			`if 'compress_type' in kwargs:`
			`compress_type = kwargs.pop('compress_type')`
			`super(ZipInfo, self).__init__(args, *kwargs)`
			`self.compress_type = compress_type`
tools v2.2 2010-11-11 23:11:36 +01:00
			`class fixZip:`
			`def __init__(self, zinput, zoutput):`
tools v3.4 2011-02-08 18:21:51 +01:00			`self.ztype = 'zip'`
			`if zinput.lower().find('.epub') >= 0 :`
			`self.ztype = 'epub'`
tools v5.4 2012-11-07 14:14:25 +01:00			`self.inzip = zipfilerugged.ZipFile(zinput,'r')`
			`self.outzip = zipfilerugged.ZipFile(zoutput,'w')`
tools v2.2 2010-11-11 23:11:36 +01:00			`# open the input zip for reading only as a raw file`
tools v5.4 2012-11-07 14:14:25 +01:00			`self.bzf = file(zinput,'rb')`

tools v2.2 2010-11-11 23:11:36 +01:00			`def getlocalname(self, zi):`
			`local_header_offset = zi.header_offset`
			`self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)`
			`leninfo = self.bzf.read(2)`
			`local_name_length, = unpack('<H', leninfo)`
			`self.bzf.seek(local_header_offset + _FILENAME_OFFSET)`
			`local_name = self.bzf.read(local_name_length)`
			`return local_name`

			`def uncompress(self, cmpdata):`
			`dc = zlib.decompressobj(-15)`
			`data = ''`
			`while len(cmpdata) > 0:`
			`if len(cmpdata) > _MAX_SIZE :`
			`newdata = cmpdata[0:_MAX_SIZE]`
			`cmpdata = cmpdata[_MAX_SIZE:]`
			`else:`
			`newdata = cmpdata`
			`cmpdata = ''`
			`newdata = dc.decompress(newdata)`
			`unprocessed = dc.unconsumed_tail`
			`if len(unprocessed) == 0:`
			`newdata += dc.flush()`
			`data += newdata`
			`cmpdata += unprocessed`
			`unprocessed = ''`
			`return data`

			`def getfiledata(self, zi):`
			`# get file name length and exta data length to find start of file data`
			`local_header_offset = zi.header_offset`

			`self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)`
			`leninfo = self.bzf.read(2)`
			`local_name_length, = unpack('<H', leninfo)`

			`self.bzf.seek(local_header_offset + _EXTRA_LEN_OFFSET)`
			`exinfo = self.bzf.read(2)`
			`extra_field_length, = unpack('<H', exinfo)`

			`self.bzf.seek(local_header_offset + _FILENAME_OFFSET + local_name_length + extra_field_length)`
			`data = None`

			`# if not compressed we are good to go`
tools v5.4 2012-11-07 14:14:25 +01:00			`if zi.compress_type == zipfilerugged.ZIP_STORED:`
tools v2.2 2010-11-11 23:11:36 +01:00			`data = self.bzf.read(zi.file_size)`

			`# if compressed we must decompress it using zlib`
tools v5.4 2012-11-07 14:14:25 +01:00			`if zi.compress_type == zipfilerugged.ZIP_DEFLATED:`
tools v2.2 2010-11-11 23:11:36 +01:00			`cmpdata = self.bzf.read(zi.compress_size)`
			`data = self.uncompress(cmpdata)`

			`return data`

tools v5.4 2012-11-07 14:14:25 +01:00
tools v2.2 2010-11-11 23:11:36 +01:00
			`def fix(self):`
			`# get the zipinfo for each member of the input archive`
			`# and copy member over to output archive`
			`# if problems exist with local vs central filename, fix them`
tools v3.3 2011-02-02 15:41:15 +01:00
tools v3.4 2011-02-08 18:21:51 +01:00			`# if epub write mimetype file first, with no compression`
			`if self.ztype == 'epub':`
tools v5.4 2012-11-07 14:14:25 +01:00			`nzinfo = ZipInfo('mimetype', compress_type=zipfilerugged.ZIP_STORED)`
tools v3.4 2011-02-08 18:21:51 +01:00			`self.outzip.writestr(nzinfo, _MIMETYPE)`
tools v3.3 2011-02-02 15:41:15 +01:00
			`# write the rest of the files`
			`for zinfo in self.inzip.infolist():`
tools v3.4 2011-02-08 18:21:51 +01:00			`if zinfo.filename != "mimetype" or self.ztype == '.zip':`
tools v3.3 2011-02-02 15:41:15 +01:00			`data = None`
			`nzinfo = zinfo`
tools v5.4 2012-11-07 14:14:25 +01:00			`try:`
tools v3.3 2011-02-02 15:41:15 +01:00			`data = self.inzip.read(zinfo.filename)`
tools v5.4 2012-11-07 14:14:25 +01:00			`except zipfilerugged.BadZipfile or zipfilerugged.error:`
tools v3.3 2011-02-02 15:41:15 +01:00			`local_name = self.getlocalname(zinfo)`
			`data = self.getfiledata(zinfo)`
			`nzinfo.filename = local_name`

			`nzinfo.date_time = zinfo.date_time`
			`nzinfo.compress_type = zinfo.compress_type`
			`nzinfo.flag_bits = 0`
			`nzinfo.internal_attr = 0`
			`self.outzip.writestr(nzinfo,data)`
tools v2.2 2010-11-11 23:11:36 +01:00
			`self.bzf.close()`
			`self.inzip.close()`
			`self.outzip.close()`


			`def usage():`
			`print """usage: zipfix.py inputzip outputzip`
			`inputzip is the source zipfile to fix`
			`outputzip is the fixed zip archive`
			`"""`
tools v5.4 2012-11-07 14:14:25 +01:00
tools v2.2 2010-11-11 23:11:36 +01:00
tools v3.3 2011-02-02 15:41:15 +01:00			`def repairBook(infile, outfile):`
tools v2.2 2010-11-11 23:11:36 +01:00			`if not os.path.exists(infile):`
			`print "Error: Input Zip File does not exist"`
			`return 1`
			`try:`
			`fr = fixZip(infile, outfile)`
			`fr.fix()`
			`return 0`
			`except Exception, e:`
			`print "Error Occurred ", e`
			`return 2`

tools v3.3 2011-02-02 15:41:15 +01:00
			`def main(argv=sys.argv):`
			`if len(argv)!=3:`
			`usage()`
			`return 1`
			`infile = argv[1]`
			`outfile = argv[2]`
			`return repairBook(infile, outfile)`


tools v2.2 2010-11-11 23:11:36 +01:00			`if __name__ == '__main__' :`
			`sys.exit(main())`