mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-27 21:59:17 +01:00
mold mp4_parser into mp4direct
This commit is contained in:
parent
2ed0b5568e
commit
302b23a9a3
2 changed files with 155 additions and 166 deletions
|
@ -1,157 +0,0 @@
|
||||||
import struct
|
|
||||||
|
|
||||||
from typing import Tuple
|
|
||||||
from io import BytesIO, RawIOBase
|
|
||||||
|
|
||||||
|
|
||||||
class LengthLimiter(RawIOBase):
|
|
||||||
"""
|
|
||||||
A bytes IO to limit length to be read.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, r: RawIOBase, size: int):
|
|
||||||
self.r = r
|
|
||||||
self.remaining = size
|
|
||||||
|
|
||||||
def read(self, sz: int = None) -> bytes:
|
|
||||||
if self.remaining == 0:
|
|
||||||
return b''
|
|
||||||
if sz in (-1, None):
|
|
||||||
sz = self.remaining
|
|
||||||
sz = min(sz, self.remaining)
|
|
||||||
ret = self.r.read(sz)
|
|
||||||
if ret:
|
|
||||||
self.remaining -= len(ret)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def readall(self) -> bytes:
|
|
||||||
if self.remaining == 0:
|
|
||||||
return b''
|
|
||||||
ret = self.read(self.remaining)
|
|
||||||
if ret:
|
|
||||||
self.remaining -= len(ret)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def readable(self) -> bool:
|
|
||||||
return bool(self.remaining)
|
|
||||||
|
|
||||||
|
|
||||||
def read_harder(r, size):
|
|
||||||
"""
|
|
||||||
Try to read from the stream.
|
|
||||||
|
|
||||||
@params r byte stream to read
|
|
||||||
@params size Number of bytes to read in total
|
|
||||||
"""
|
|
||||||
|
|
||||||
retry = 0
|
|
||||||
buf = b''
|
|
||||||
while len(buf) < size and retry < 3:
|
|
||||||
ret = r.read(size - len(buf))
|
|
||||||
if not ret:
|
|
||||||
retry += 1
|
|
||||||
continue
|
|
||||||
retry = 0
|
|
||||||
buf += ret
|
|
||||||
|
|
||||||
return buf
|
|
||||||
|
|
||||||
|
|
||||||
def pack_be32(value: int) -> bytes:
|
|
||||||
""" Pack value to 4-byte-long bytes in the big-endian byte order """
|
|
||||||
return struct.pack('>I', value)
|
|
||||||
|
|
||||||
|
|
||||||
def pack_be64(value: int) -> bytes:
|
|
||||||
""" Pack value to 8-byte-long bytes in the big-endian byte order """
|
|
||||||
return struct.pack('>L', value)
|
|
||||||
|
|
||||||
|
|
||||||
def unpack_be32(value: bytes) -> int:
|
|
||||||
""" Convert 4-byte-long bytes in the big-endian byte order, to an integer value """
|
|
||||||
return struct.unpack('>I', value)[0]
|
|
||||||
|
|
||||||
|
|
||||||
def unpack_be64(value: bytes) -> int:
|
|
||||||
""" Convert 8-byte-long bytes in the big-endian byte order, to an integer value """
|
|
||||||
return struct.unpack('>L', value)[0]
|
|
||||||
|
|
||||||
|
|
||||||
def unpack_ver_flags(value: bytes) -> Tuple[int, int]:
|
|
||||||
"""
|
|
||||||
Unpack 4-byte-long value into version and flags.
|
|
||||||
@returns (version, flags)
|
|
||||||
"""
|
|
||||||
|
|
||||||
ver, up_flag, down_flag = struct.unpack('>BBH', value)
|
|
||||||
return ver, (up_flag << 16 | down_flag)
|
|
||||||
|
|
||||||
|
|
||||||
# https://github.com/gpac/mp4box.js/blob/4e1bc23724d2603754971abc00c2bd5aede7be60/src/box.js#L13-L40
|
|
||||||
MP4_CONTAINER_BOXES = ('moov', 'trak', 'edts', 'mdia', 'minf', 'dinf', 'stbl', 'mvex', 'moof', 'traf', 'vttc', 'tref', 'iref', 'mfra', 'meco', 'hnti', 'hinf', 'strk', 'strd', 'sinf', 'rinf', 'schi', 'trgr', 'udta', 'iprp', 'ipco')
|
|
||||||
""" List of boxes that nests the other boxes """
|
|
||||||
|
|
||||||
|
|
||||||
def parse_mp4_boxes(r: RawIOBase):
|
|
||||||
"""
|
|
||||||
Parses an ISO BMFF (which MP4 follows) and yields its boxes as a sequence.
|
|
||||||
This does not interpret content of these boxes.
|
|
||||||
|
|
||||||
Sequence details:
|
|
||||||
('atom', b'blablabla'): A box, with content (not container boxes)
|
|
||||||
('atom', b''): Possibly container box (must check MP4_CONTAINER_BOXES) or really an empty box
|
|
||||||
(None, 'atom'): End of a container box
|
|
||||||
|
|
||||||
Example: Path:
|
|
||||||
('test', b'123456') /test
|
|
||||||
('moov', b'') /moov (start of container box)
|
|
||||||
('helo', b'abcdef') /moov/helo
|
|
||||||
('1984', b'1q84') /moov/1984
|
|
||||||
('trak', b'') /moov/trak (start of container box)
|
|
||||||
('keys', b'2022') /moov/trak/keys
|
|
||||||
(None , 'trak') /moov/trak (end of container box)
|
|
||||||
('topp', b'1991') /moov/topp
|
|
||||||
(None , 'moov') /moov (end of container box)
|
|
||||||
"""
|
|
||||||
|
|
||||||
while True:
|
|
||||||
size_b = read_harder(r, 4)
|
|
||||||
if not size_b:
|
|
||||||
break
|
|
||||||
type_b = r.read(4)
|
|
||||||
# 00 00 00 20 is big-endian
|
|
||||||
box_size = unpack_be32(size_b)
|
|
||||||
type_s = type_b.decode()
|
|
||||||
if type_s in MP4_CONTAINER_BOXES:
|
|
||||||
yield (type_s, b'')
|
|
||||||
yield from parse_mp4_boxes(LengthLimiter(r, box_size - 8))
|
|
||||||
yield (None, type_s)
|
|
||||||
continue
|
|
||||||
# subtract by 8
|
|
||||||
full_body = read_harder(r, box_size - 8)
|
|
||||||
yield (type_s, full_body)
|
|
||||||
|
|
||||||
|
|
||||||
def write_mp4_boxes(w: RawIOBase, box_iter):
|
|
||||||
"""
|
|
||||||
Writes an ISO BMFF file from a given sequence to a given writer.
|
|
||||||
The iterator to be passed must follow parse_mp4_boxes's protocol.
|
|
||||||
"""
|
|
||||||
|
|
||||||
stack = [
|
|
||||||
(None, w), # parent box, IO
|
|
||||||
]
|
|
||||||
for btype, content in box_iter:
|
|
||||||
if btype in MP4_CONTAINER_BOXES:
|
|
||||||
bio = BytesIO()
|
|
||||||
stack.append((btype, bio))
|
|
||||||
continue
|
|
||||||
elif btype is None:
|
|
||||||
assert stack[-1][0] == content
|
|
||||||
btype, bio = stack.pop()
|
|
||||||
content = bio.getvalue()
|
|
||||||
|
|
||||||
wt = stack[-1][1]
|
|
||||||
wt.write(pack_be32(len(content) + 8))
|
|
||||||
wt.write(btype.encode()[:4])
|
|
||||||
wt.write(content)
|
|
|
@ -1,19 +1,165 @@
|
||||||
import os
|
import os
|
||||||
|
import struct
|
||||||
|
|
||||||
|
from io import BytesIO, RawIOBase
|
||||||
from math import inf
|
from math import inf
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
from .common import PostProcessor
|
from .common import PostProcessor
|
||||||
from ..utils import prepend_extension
|
from ..utils import prepend_extension
|
||||||
|
|
||||||
from ..mp4_parser import (
|
|
||||||
write_mp4_boxes,
|
class LengthLimiter(RawIOBase):
|
||||||
parse_mp4_boxes,
|
"""
|
||||||
pack_be32,
|
A bytes IO to limit length to be read.
|
||||||
pack_be64,
|
"""
|
||||||
unpack_ver_flags,
|
|
||||||
unpack_be32,
|
def __init__(self, r: RawIOBase, size: int):
|
||||||
unpack_be64,
|
self.r = r
|
||||||
)
|
self.remaining = size
|
||||||
|
|
||||||
|
def read(self, sz: int = None) -> bytes:
|
||||||
|
if self.remaining == 0:
|
||||||
|
return b''
|
||||||
|
if sz in (-1, None):
|
||||||
|
sz = self.remaining
|
||||||
|
sz = min(sz, self.remaining)
|
||||||
|
ret = self.r.read(sz)
|
||||||
|
if ret:
|
||||||
|
self.remaining -= len(ret)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def readall(self) -> bytes:
|
||||||
|
if self.remaining == 0:
|
||||||
|
return b''
|
||||||
|
ret = self.read(self.remaining)
|
||||||
|
if ret:
|
||||||
|
self.remaining -= len(ret)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def readable(self) -> bool:
|
||||||
|
return bool(self.remaining)
|
||||||
|
|
||||||
|
|
||||||
|
def read_harder(r, size):
|
||||||
|
"""
|
||||||
|
Try to read from the stream.
|
||||||
|
|
||||||
|
@params r byte stream to read
|
||||||
|
@params size Number of bytes to read in total
|
||||||
|
"""
|
||||||
|
|
||||||
|
retry = 0
|
||||||
|
buf = b''
|
||||||
|
while len(buf) < size and retry < 3:
|
||||||
|
ret = r.read(size - len(buf))
|
||||||
|
if not ret:
|
||||||
|
retry += 1
|
||||||
|
continue
|
||||||
|
retry = 0
|
||||||
|
buf += ret
|
||||||
|
|
||||||
|
return buf
|
||||||
|
|
||||||
|
|
||||||
|
def pack_be32(value: int) -> bytes:
|
||||||
|
""" Pack value to 4-byte-long bytes in the big-endian byte order """
|
||||||
|
return struct.pack('>I', value)
|
||||||
|
|
||||||
|
|
||||||
|
def pack_be64(value: int) -> bytes:
|
||||||
|
""" Pack value to 8-byte-long bytes in the big-endian byte order """
|
||||||
|
return struct.pack('>L', value)
|
||||||
|
|
||||||
|
|
||||||
|
def unpack_be32(value: bytes) -> int:
|
||||||
|
""" Convert 4-byte-long bytes in the big-endian byte order, to an integer value """
|
||||||
|
return struct.unpack('>I', value)[0]
|
||||||
|
|
||||||
|
|
||||||
|
def unpack_be64(value: bytes) -> int:
|
||||||
|
""" Convert 8-byte-long bytes in the big-endian byte order, to an integer value """
|
||||||
|
return struct.unpack('>L', value)[0]
|
||||||
|
|
||||||
|
|
||||||
|
def unpack_ver_flags(value: bytes) -> Tuple[int, int]:
|
||||||
|
"""
|
||||||
|
Unpack 4-byte-long value into version and flags.
|
||||||
|
@returns (version, flags)
|
||||||
|
"""
|
||||||
|
|
||||||
|
ver, up_flag, down_flag = struct.unpack('>BBH', value)
|
||||||
|
return ver, (up_flag << 16 | down_flag)
|
||||||
|
|
||||||
|
|
||||||
|
# https://github.com/gpac/mp4box.js/blob/4e1bc23724d2603754971abc00c2bd5aede7be60/src/box.js#L13-L40
|
||||||
|
MP4_CONTAINER_BOXES = ('moov', 'trak', 'edts', 'mdia', 'minf', 'dinf', 'stbl', 'mvex', 'moof', 'traf', 'vttc', 'tref', 'iref', 'mfra', 'meco', 'hnti', 'hinf', 'strk', 'strd', 'sinf', 'rinf', 'schi', 'trgr', 'udta', 'iprp', 'ipco')
|
||||||
|
""" List of boxes that nests the other boxes """
|
||||||
|
|
||||||
|
|
||||||
|
def parse_mp4_boxes(r: RawIOBase):
|
||||||
|
"""
|
||||||
|
Parses an ISO BMFF (which MP4 follows) and yields its boxes as a sequence.
|
||||||
|
This does not interpret content of these boxes.
|
||||||
|
|
||||||
|
Sequence details:
|
||||||
|
('atom', b'blablabla'): A box, with content (not container boxes)
|
||||||
|
('atom', b''): Possibly container box (must check MP4_CONTAINER_BOXES) or really an empty box
|
||||||
|
(None, 'atom'): End of a container box
|
||||||
|
|
||||||
|
Example: Path:
|
||||||
|
('test', b'123456') /test
|
||||||
|
('moov', b'') /moov (start of container box)
|
||||||
|
('helo', b'abcdef') /moov/helo
|
||||||
|
('1984', b'1q84') /moov/1984
|
||||||
|
('trak', b'') /moov/trak (start of container box)
|
||||||
|
('keys', b'2022') /moov/trak/keys
|
||||||
|
(None , 'trak') /moov/trak (end of container box)
|
||||||
|
('topp', b'1991') /moov/topp
|
||||||
|
(None , 'moov') /moov (end of container box)
|
||||||
|
"""
|
||||||
|
|
||||||
|
while True:
|
||||||
|
size_b = read_harder(r, 4)
|
||||||
|
if not size_b:
|
||||||
|
break
|
||||||
|
type_b = r.read(4)
|
||||||
|
# 00 00 00 20 is big-endian
|
||||||
|
box_size = unpack_be32(size_b)
|
||||||
|
type_s = type_b.decode()
|
||||||
|
if type_s in MP4_CONTAINER_BOXES:
|
||||||
|
yield (type_s, b'')
|
||||||
|
yield from parse_mp4_boxes(LengthLimiter(r, box_size - 8))
|
||||||
|
yield (None, type_s)
|
||||||
|
continue
|
||||||
|
# subtract by 8
|
||||||
|
full_body = read_harder(r, box_size - 8)
|
||||||
|
yield (type_s, full_body)
|
||||||
|
|
||||||
|
|
||||||
|
def write_mp4_boxes(w: RawIOBase, box_iter):
|
||||||
|
"""
|
||||||
|
Writes an ISO BMFF file from a given sequence to a given writer.
|
||||||
|
The iterator to be passed must follow parse_mp4_boxes's protocol.
|
||||||
|
"""
|
||||||
|
|
||||||
|
stack = [
|
||||||
|
(None, w), # parent box, IO
|
||||||
|
]
|
||||||
|
for btype, content in box_iter:
|
||||||
|
if btype in MP4_CONTAINER_BOXES:
|
||||||
|
bio = BytesIO()
|
||||||
|
stack.append((btype, bio))
|
||||||
|
continue
|
||||||
|
elif btype is None:
|
||||||
|
assert stack[-1][0] == content
|
||||||
|
btype, bio = stack.pop()
|
||||||
|
content = bio.getvalue()
|
||||||
|
|
||||||
|
wt = stack[-1][1]
|
||||||
|
wt.write(pack_be32(len(content) + 8))
|
||||||
|
wt.write(btype.encode()[:4])
|
||||||
|
wt.write(content)
|
||||||
|
|
||||||
|
|
||||||
class MP4FixupTimestampPP(PostProcessor):
|
class MP4FixupTimestampPP(PostProcessor):
|
||||||
|
|
Loading…
Reference in a new issue