You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
762 lines
27 KiB
762 lines
27 KiB
5 years ago
|
# Licence==MIT; Vitaly "_Vi" Shukela 2012
|
||
|
|
||
|
# Simple easy-to-use hacky matroska parser
|
||
|
|
||
|
# Supports SimpleBlock and BlockGroup, lacing, TimecodeScale.
|
||
|
# Does not support seeking, cues, chapters and other features.
|
||
|
# No proper EOF handling unfortunately
|
||
|
|
||
|
# See "mkvuser.py" for the example
|
||
|
|
||
|
import traceback
|
||
|
from struct import unpack
|
||
|
|
||
|
import sys
|
||
|
import datetime
|
||
|
|
||
|
if sys.version < '3':
|
||
|
range=xrange
|
||
|
else:
|
||
|
#identity=lambda x:x
|
||
|
def ord(something):
|
||
|
if type(something)==bytes:
|
||
|
if something == b"":
|
||
|
raise StopIteration
|
||
|
return something[0]
|
||
|
else:
|
||
|
return something
|
||
|
|
||
|
def get_major_bit_number(n):
|
||
|
'''
|
||
|
Takes uint8, returns number of the most significant bit plus the number with that bit cleared.
|
||
|
Examples:
|
||
|
0b10010101 -> (0, 0b00010101)
|
||
|
0b00010101 -> (3, 0b00000101)
|
||
|
0b01111111 -> (1, 0b00111111)
|
||
|
'''
|
||
|
if not n:
|
||
|
raise Exception("Bad number")
|
||
|
i=0x80;
|
||
|
r=0
|
||
|
while not n&i:
|
||
|
r+=1
|
||
|
i>>=1
|
||
|
return (r,n&~i);
|
||
|
|
||
|
def read_matroska_number(f, unmodified=False, signed=False):
|
||
|
'''
|
||
|
Read ebml number. Unmodified means don't clear the length bit (as in Element IDs)
|
||
|
Returns the number and it's length as a tuple
|
||
|
|
||
|
See examples in "parse_matroska_number" function
|
||
|
'''
|
||
|
if unmodified and signed:
|
||
|
raise Exception("Contradictary arguments")
|
||
|
first_byte=f.read(1)
|
||
|
if(first_byte==""):
|
||
|
raise StopIteration
|
||
|
r = ord(first_byte)
|
||
|
(n,r2) = get_major_bit_number(r)
|
||
|
if not unmodified:
|
||
|
r=r2
|
||
|
# from now "signed" means "negative"
|
||
|
i=n
|
||
|
while i:
|
||
|
r = r * 0x100 + ord(f.read(1))
|
||
|
i-=1
|
||
|
if signed:
|
||
|
r-=(2**(7*n+7)-1)
|
||
|
else:
|
||
|
if r==2**(7*n+7)-1:
|
||
|
return (-1, n+1)
|
||
|
return (r,n+1)
|
||
|
|
||
|
def parse_matroska_number(data, pos, unmodified=False, signed=False):
|
||
|
'''
|
||
|
Parse ebml number from buffer[pos:]. Just like read_matroska_number.
|
||
|
Unmodified means don't clear the length bit (as in Element IDs)
|
||
|
Returns the number plus the new position in input buffer
|
||
|
|
||
|
Examples:
|
||
|
"\x81" -> (1, pos+1)
|
||
|
"\x40\x01" -> (1, pos+2)
|
||
|
"\x20\x00\x01" -> (1, pos+3)
|
||
|
"\x3F\xFF\xFF" -> (0x1FFFFF, pos+3)
|
||
|
"\x20\x00\x01" unmodified -> (0x200001, pos+3)
|
||
|
"\xBF" signed -> (0, pos+1)
|
||
|
"\xBE" signed -> (-1, pos+1)
|
||
|
"\xC0" signed -> (1, pos+1)
|
||
|
"\x5F\xEF" signed -> (-16, pos+2)
|
||
|
'''
|
||
|
if unmodified and signed:
|
||
|
raise Exception("Contradictary arguments")
|
||
|
r = ord(data[pos])
|
||
|
pos+=1
|
||
|
(n,r2) = get_major_bit_number(r)
|
||
|
if not unmodified:
|
||
|
r=r2
|
||
|
# from now "signed" means "negative"
|
||
|
i=n
|
||
|
while i:
|
||
|
r = r * 0x100 + ord(data[pos])
|
||
|
pos+=1
|
||
|
i-=1
|
||
|
if signed:
|
||
|
r-=(2**(7*n+6)-1)
|
||
|
else:
|
||
|
if r==2**(7*n+7)-1:
|
||
|
return (-1, pos)
|
||
|
return (r,pos)
|
||
|
|
||
|
def parse_xiph_number(data, pos):
|
||
|
'''
|
||
|
Parse the Xiph lacing number from data[pos:]
|
||
|
Returns the number plus the new position
|
||
|
|
||
|
Examples:
|
||
|
"\x01" -> (1, pos+1)
|
||
|
"\x55" -> (0x55, pos+1)
|
||
|
"\xFF\x04" -> (0x103, pos+2)
|
||
|
"\xFF\xFF\x04" -> (0x202, pos+3)
|
||
|
"\xFF\xFF\x00" -> (0x1FE, pos+3)
|
||
|
'''
|
||
|
v = ord(data[pos])
|
||
|
pos+=1
|
||
|
|
||
|
r=0
|
||
|
while v==255:
|
||
|
r+=v
|
||
|
v = ord(data[pos])
|
||
|
pos+=1
|
||
|
|
||
|
r+=v
|
||
|
return (r, pos)
|
||
|
|
||
|
|
||
|
def parse_fixedlength_number(data, pos, length, signed=False):
|
||
|
'''
|
||
|
Read the big-endian number from data[pos:pos+length]
|
||
|
Returns the number plus the new position
|
||
|
|
||
|
Examples:
|
||
|
"\x01" -> (0x1, pos+1)
|
||
|
"\x55" -> (0x55, pos+1)
|
||
|
"\x55" signed -> (0x55, pos+1)
|
||
|
"\xFF\x04" -> (0xFF04, pos+2)
|
||
|
"\xFF\x04" signed -> (-0x00FC, pos+2)
|
||
|
'''
|
||
|
r=0
|
||
|
for i in range(length):
|
||
|
r=r*0x100+ord(data[pos+i])
|
||
|
if signed:
|
||
|
if ord(data[pos]) & 0x80:
|
||
|
r-=2**(8*length)
|
||
|
return (r, pos+length)
|
||
|
|
||
|
def read_fixedlength_number(f, length, signed=False):
|
||
|
""" Read length bytes and parse (parse_fixedlength_number) it.
|
||
|
Returns only the number"""
|
||
|
buf = f.read(length)
|
||
|
(r, pos) = parse_fixedlength_number(buf, 0, length, signed)
|
||
|
return r
|
||
|
|
||
|
def read_ebml_element_header(f):
|
||
|
'''
|
||
|
Read Element ID and size
|
||
|
Returns id, element size and this header size
|
||
|
'''
|
||
|
(id_, n) = read_matroska_number(f, unmodified=True)
|
||
|
(size, n2) = read_matroska_number(f)
|
||
|
return (id_, size, n+n2)
|
||
|
|
||
|
class EbmlElementType:
|
||
|
VOID=0
|
||
|
MASTER=1 # read all subelements and return tree. Don't use this too large things like Segment
|
||
|
UNSIGNED=2
|
||
|
SIGNED=3
|
||
|
TEXTA=4
|
||
|
TEXTU=5
|
||
|
BINARY=6
|
||
|
FLOAT=7
|
||
|
DATE=8
|
||
|
|
||
|
JUST_GO_ON=10 # For "Segment".
|
||
|
# Actually MASTER, but don't build the tree for all subelements,
|
||
|
# interpreting all child elements as if they were top-level elements
|
||
|
|
||
|
|
||
|
EET=EbmlElementType
|
||
|
|
||
|
# lynx -width=10000 -dump http://matroska.org/technical/specs/index.html
|
||
|
# | sed 's/not 0/not0/g; s/> 0/>0/g; s/Sampling Frequency/SamplingFrequency/g'
|
||
|
# | awk '{print $1 " " $3 " " $8}'
|
||
|
# | grep '\[..\]'
|
||
|
# | perl -ne '/(\S+) (\S+) (.)/;
|
||
|
# $name=$1; $id=$2; $type=$3;
|
||
|
# $id=~s/\[|\]//g;
|
||
|
# %types = (m=>"EET.MASTER",
|
||
|
# u=>"EET.UNSIGNED",
|
||
|
# i=>"EET.SIGNED",
|
||
|
# 8=>"EET.TEXTU",
|
||
|
# s=>"EET.TEXTA",
|
||
|
# b=>"EET.BINARY",
|
||
|
# f=>"EET.FLOAT",
|
||
|
# d=>"EET.DATE");
|
||
|
# $t=$types{$type};
|
||
|
# next unless $t;
|
||
|
# $t="EET.JUST_GO_ON" if $name eq "Segment" or $name eq "Cluster";
|
||
|
# print "\t0x$id: ($t, \"$name\"),\n";'
|
||
|
|
||
|
element_types_names = {
|
||
|
0x1A45DFA3: (EET.MASTER, "EBML"),
|
||
|
0x4286: (EET.UNSIGNED, "EBMLVersion"),
|
||
|
0x42F7: (EET.UNSIGNED, "EBMLReadVersion"),
|
||
|
0x42F2: (EET.UNSIGNED, "EBMLMaxIDLength"),
|
||
|
0x42F3: (EET.UNSIGNED, "EBMLMaxSizeLength"),
|
||
|
0x4282: (EET.TEXTA, "DocType"),
|
||
|
0x4287: (EET.UNSIGNED, "DocTypeVersion"),
|
||
|
0x4285: (EET.UNSIGNED, "DocTypeReadVersion"),
|
||
|
0xEC: (EET.BINARY, "Void"),
|
||
|
0xBF: (EET.BINARY, "CRC-32"),
|
||
|
0x1B538667: (EET.MASTER, "SignatureSlot"),
|
||
|
0x7E8A: (EET.UNSIGNED, "SignatureAlgo"),
|
||
|
0x7E9A: (EET.UNSIGNED, "SignatureHash"),
|
||
|
0x7EA5: (EET.BINARY, "SignaturePublicKey"),
|
||
|
0x7EB5: (EET.BINARY, "Signature"),
|
||
|
0x7E5B: (EET.MASTER, "SignatureElements"),
|
||
|
0x7E7B: (EET.MASTER, "SignatureElementList"),
|
||
|
0x6532: (EET.BINARY, "SignedElement"),
|
||
|
0x18538067: (EET.JUST_GO_ON, "Segment"),
|
||
|
0x114D9B74: (EET.MASTER, "SeekHead"),
|
||
|
0x4DBB: (EET.MASTER, "Seek"),
|
||
|
0x53AB: (EET.BINARY, "SeekID"),
|
||
|
0x53AC: (EET.UNSIGNED, "SeekPosition"),
|
||
|
0x1549A966: (EET.MASTER, "Info"),
|
||
|
0x73A4: (EET.BINARY, "SegmentUID"),
|
||
|
0x7384: (EET.TEXTU, "SegmentFilename"),
|
||
|
0x3CB923: (EET.BINARY, "PrevUID"),
|
||
|
0x3C83AB: (EET.TEXTU, "PrevFilename"),
|
||
|
0x3EB923: (EET.BINARY, "NextUID"),
|
||
|
0x3E83BB: (EET.TEXTU, "NextFilename"),
|
||
|
0x4444: (EET.BINARY, "SegmentFamily"),
|
||
|
0x6924: (EET.MASTER, "ChapterTranslate"),
|
||
|
0x69FC: (EET.UNSIGNED, "ChapterTranslateEditionUID"),
|
||
|
0x69BF: (EET.UNSIGNED, "ChapterTranslateCodec"),
|
||
|
0x69A5: (EET.BINARY, "ChapterTranslateID"),
|
||
|
0x2AD7B1: (EET.UNSIGNED, "TimecodeScale"),
|
||
|
0x4489: (EET.FLOAT, "Duration"),
|
||
|
0x4461: (EET.DATE, "DateUTC"),
|
||
|
0x7BA9: (EET.TEXTU, "Title"),
|
||
|
0x4D80: (EET.TEXTU, "MuxingApp"),
|
||
|
0x5741: (EET.TEXTU, "WritingApp"),
|
||
|
0x1F43B675: (EET.JUST_GO_ON, "Cluster"),
|
||
|
0xE7: (EET.UNSIGNED, "Timecode"),
|
||
|
0x5854: (EET.MASTER, "SilentTracks"),
|
||
|
0x58D7: (EET.UNSIGNED, "SilentTrackNumber"),
|
||
|
0xA7: (EET.UNSIGNED, "Position"),
|
||
|
0xAB: (EET.UNSIGNED, "PrevSize"),
|
||
|
0xA3: (EET.BINARY, "SimpleBlock"),
|
||
|
0xA0: (EET.MASTER, "BlockGroup"),
|
||
|
0xA1: (EET.BINARY, "Block"),
|
||
|
0xA2: (EET.BINARY, "BlockVirtual"),
|
||
|
0x75A1: (EET.MASTER, "BlockAdditions"),
|
||
|
0xA6: (EET.MASTER, "BlockMore"),
|
||
|
0xEE: (EET.UNSIGNED, "BlockAddID"),
|
||
|
0xA5: (EET.BINARY, "BlockAdditional"),
|
||
|
0x9B: (EET.UNSIGNED, "BlockDuration"),
|
||
|
0xFA: (EET.UNSIGNED, "ReferencePriority"),
|
||
|
0xFB: (EET.SIGNED, "ReferenceBlock"),
|
||
|
0xFD: (EET.SIGNED, "ReferenceVirtual"),
|
||
|
0xA4: (EET.BINARY, "CodecState"),
|
||
|
0x8E: (EET.MASTER, "Slices"),
|
||
|
0xE8: (EET.MASTER, "TimeSlice"),
|
||
|
0xCC: (EET.UNSIGNED, "LaceNumber"),
|
||
|
0xCD: (EET.UNSIGNED, "FrameNumber"),
|
||
|
0xCB: (EET.UNSIGNED, "BlockAdditionID"),
|
||
|
0xCE: (EET.UNSIGNED, "Delay"),
|
||
|
0xCF: (EET.UNSIGNED, "SliceDuration"),
|
||
|
0xC8: (EET.MASTER, "ReferenceFrame"),
|
||
|
0xC9: (EET.UNSIGNED, "ReferenceOffset"),
|
||
|
0xCA: (EET.UNSIGNED, "ReferenceTimeCode"),
|
||
|
0xAF: (EET.BINARY, "EncryptedBlock"),
|
||
|
0x1654AE6B: (EET.MASTER, "Tracks"),
|
||
|
0xAE: (EET.MASTER, "TrackEntry"),
|
||
|
0xD7: (EET.UNSIGNED, "TrackNumber"),
|
||
|
0x73C5: (EET.UNSIGNED, "TrackUID"),
|
||
|
0x83: (EET.UNSIGNED, "TrackType"),
|
||
|
0xB9: (EET.UNSIGNED, "FlagEnabled"),
|
||
|
0x88: (EET.UNSIGNED, "FlagDefault"),
|
||
|
0x55AA: (EET.UNSIGNED, "FlagForced"),
|
||
|
0x9C: (EET.UNSIGNED, "FlagLacing"),
|
||
|
0x6DE7: (EET.UNSIGNED, "MinCache"),
|
||
|
0x6DF8: (EET.UNSIGNED, "MaxCache"),
|
||
|
0x23E383: (EET.UNSIGNED, "DefaultDuration"),
|
||
|
0x23314F: (EET.FLOAT, "TrackTimecodeScale"),
|
||
|
0x537F: (EET.SIGNED, "TrackOffset"),
|
||
|
0x55EE: (EET.UNSIGNED, "MaxBlockAdditionID"),
|
||
|
0x536E: (EET.TEXTU, "Name"),
|
||
|
0x22B59C: (EET.TEXTA, "Language"),
|
||
|
0x86: (EET.TEXTA, "CodecID"),
|
||
|
0x63A2: (EET.BINARY, "CodecPrivate"),
|
||
|
0x258688: (EET.TEXTU, "CodecName"),
|
||
|
0x7446: (EET.UNSIGNED, "AttachmentLink"),
|
||
|
0x3A9697: (EET.TEXTU, "CodecSettings"),
|
||
|
0x3B4040: (EET.TEXTA, "CodecInfoURL"),
|
||
|
0x26B240: (EET.TEXTA, "CodecDownloadURL"),
|
||
|
0xAA: (EET.UNSIGNED, "CodecDecodeAll"),
|
||
|
0x6FAB: (EET.UNSIGNED, "TrackOverlay"),
|
||
|
0x6624: (EET.MASTER, "TrackTranslate"),
|
||
|
0x66FC: (EET.UNSIGNED, "TrackTranslateEditionUID"),
|
||
|
0x66BF: (EET.UNSIGNED, "TrackTranslateCodec"),
|
||
|
0x66A5: (EET.BINARY, "TrackTranslateTrackID"),
|
||
|
0xE0: (EET.MASTER, "Video"),
|
||
|
0x9A: (EET.UNSIGNED, "FlagInterlaced"),
|
||
|
0x53B8: (EET.UNSIGNED, "StereoMode"),
|
||
|
0x53B9: (EET.UNSIGNED, "OldStereoMode"),
|
||
|
0xB0: (EET.UNSIGNED, "PixelWidth"),
|
||
|
0xBA: (EET.UNSIGNED, "PixelHeight"),
|
||
|
0x54AA: (EET.UNSIGNED, "PixelCropBottom"),
|
||
|
0x54BB: (EET.UNSIGNED, "PixelCropTop"),
|
||
|
0x54CC: (EET.UNSIGNED, "PixelCropLeft"),
|
||
|
0x54DD: (EET.UNSIGNED, "PixelCropRight"),
|
||
|
0x54B0: (EET.UNSIGNED, "DisplayWidth"),
|
||
|
0x54BA: (EET.UNSIGNED, "DisplayHeight"),
|
||
|
0x54B2: (EET.UNSIGNED, "DisplayUnit"),
|
||
|
0x54B3: (EET.UNSIGNED, "AspectRatioType"),
|
||
|
0x2EB524: (EET.BINARY, "ColourSpace"),
|
||
|
0x2FB523: (EET.FLOAT, "GammaValue"),
|
||
|
0x2383E3: (EET.FLOAT, "FrameRate"),
|
||
|
0xE1: (EET.MASTER, "Audio"),
|
||
|
0xB5: (EET.FLOAT, "SamplingFrequency"),
|
||
|
0x78B5: (EET.FLOAT, "OutputSamplingFrequency"),
|
||
|
0x9F: (EET.UNSIGNED, "Channels"),
|
||
|
0x7D7B: (EET.BINARY, "ChannelPositions"),
|
||
|
0x6264: (EET.UNSIGNED, "BitDepth"),
|
||
|
0xE2: (EET.MASTER, "TrackOperation"),
|
||
|
0xE3: (EET.MASTER, "TrackCombinePlanes"),
|
||
|
0xE4: (EET.MASTER, "TrackPlane"),
|
||
|
0xE5: (EET.UNSIGNED, "TrackPlaneUID"),
|
||
|
0xE6: (EET.UNSIGNED, "TrackPlaneType"),
|
||
|
0xE9: (EET.MASTER, "TrackJoinBlocks"),
|
||
|
0xED: (EET.UNSIGNED, "TrackJoinUID"),
|
||
|
0xC0: (EET.UNSIGNED, "TrickTrackUID"),
|
||
|
0xC1: (EET.BINARY, "TrickTrackSegmentUID"),
|
||
|
0xC6: (EET.UNSIGNED, "TrickTrackFlag"),
|
||
|
0xC7: (EET.UNSIGNED, "TrickMasterTrackUID"),
|
||
|
0xC4: (EET.BINARY, "TrickMasterTrackSegmentUID"),
|
||
|
0x6D80: (EET.MASTER, "ContentEncodings"),
|
||
|
0x6240: (EET.MASTER, "ContentEncoding"),
|
||
|
0x5031: (EET.UNSIGNED, "ContentEncodingOrder"),
|
||
|
0x5032: (EET.UNSIGNED, "ContentEncodingScope"),
|
||
|
0x5033: (EET.UNSIGNED, "ContentEncodingType"),
|
||
|
0x5034: (EET.MASTER, "ContentCompression"),
|
||
|
0x4254: (EET.UNSIGNED, "ContentCompAlgo"),
|
||
|
0x4255: (EET.BINARY, "ContentCompSettings"),
|
||
|
0x5035: (EET.MASTER, "ContentEncryption"),
|
||
|
0x47E1: (EET.UNSIGNED, "ContentEncAlgo"),
|
||
|
0x47E2: (EET.BINARY, "ContentEncKeyID"),
|
||
|
0x47E3: (EET.BINARY, "ContentSignature"),
|
||
|
0x47E4: (EET.BINARY, "ContentSigKeyID"),
|
||
|
0x47E5: (EET.UNSIGNED, "ContentSigAlgo"),
|
||
|
0x47E6: (EET.UNSIGNED, "ContentSigHashAlgo"),
|
||
|
0x1C53BB6B: (EET.MASTER, "Cues"),
|
||
|
0xBB: (EET.MASTER, "CuePoint"),
|
||
|
0xB3: (EET.UNSIGNED, "CueTime"),
|
||
|
0xB7: (EET.MASTER, "CueTrackPositions"),
|
||
|
0xF7: (EET.UNSIGNED, "CueTrack"),
|
||
|
0xF1: (EET.UNSIGNED, "CueClusterPosition"),
|
||
|
0x5378: (EET.UNSIGNED, "CueBlockNumber"),
|
||
|
0xEA: (EET.UNSIGNED, "CueCodecState"),
|
||
|
0xDB: (EET.MASTER, "CueReference"),
|
||
|
0x96: (EET.UNSIGNED, "CueRefTime"),
|
||
|
0x97: (EET.UNSIGNED, "CueRefCluster"),
|
||
|
0x535F: (EET.UNSIGNED, "CueRefNumber"),
|
||
|
0xEB: (EET.UNSIGNED, "CueRefCodecState"),
|
||
|
0x1941A469: (EET.MASTER, "Attachments"),
|
||
|
0x61A7: (EET.MASTER, "AttachedFile"),
|
||
|
0x467E: (EET.TEXTU, "FileDescription"),
|
||
|
0x466E: (EET.TEXTU, "FileName"),
|
||
|
0x4660: (EET.TEXTA, "FileMimeType"),
|
||
|
0x465C: (EET.BINARY, "FileData"),
|
||
|
0x46AE: (EET.UNSIGNED, "FileUID"),
|
||
|
0x4675: (EET.BINARY, "FileReferral"),
|
||
|
0x4661: (EET.UNSIGNED, "FileUsedStartTime"),
|
||
|
0x4662: (EET.UNSIGNED, "FileUsedEndTime"),
|
||
|
0x1043A770: (EET.MASTER, "Chapters"),
|
||
|
0x45B9: (EET.MASTER, "EditionEntry"),
|
||
|
0x45BC: (EET.UNSIGNED, "EditionUID"),
|
||
|
0x45BD: (EET.UNSIGNED, "EditionFlagHidden"),
|
||
|
0x45DB: (EET.UNSIGNED, "EditionFlagDefault"),
|
||
|
0x45DD: (EET.UNSIGNED, "EditionFlagOrdered"),
|
||
|
0xB6: (EET.MASTER, "ChapterAtom"),
|
||
|
0x73C4: (EET.UNSIGNED, "ChapterUID"),
|
||
|
0x91: (EET.UNSIGNED, "ChapterTimeStart"),
|
||
|
0x92: (EET.UNSIGNED, "ChapterTimeEnd"),
|
||
|
0x98: (EET.UNSIGNED, "ChapterFlagHidden"),
|
||
|
0x4598: (EET.UNSIGNED, "ChapterFlagEnabled"),
|
||
|
0x6E67: (EET.BINARY, "ChapterSegmentUID"),
|
||
|
0x6EBC: (EET.UNSIGNED, "ChapterSegmentEditionUID"),
|
||
|
0x63C3: (EET.UNSIGNED, "ChapterPhysicalEquiv"),
|
||
|
0x8F: (EET.MASTER, "ChapterTrack"),
|
||
|
0x89: (EET.UNSIGNED, "ChapterTrackNumber"),
|
||
|
0x80: (EET.MASTER, "ChapterDisplay"),
|
||
|
0x85: (EET.TEXTU, "ChapString"),
|
||
|
0x437C: (EET.TEXTA, "ChapLanguage"),
|
||
|
0x437E: (EET.TEXTA, "ChapCountry"),
|
||
|
0x6944: (EET.MASTER, "ChapProcess"),
|
||
|
0x6955: (EET.UNSIGNED, "ChapProcessCodecID"),
|
||
|
0x450D: (EET.BINARY, "ChapProcessPrivate"),
|
||
|
0x6911: (EET.MASTER, "ChapProcessCommand"),
|
||
|
0x6922: (EET.UNSIGNED, "ChapProcessTime"),
|
||
|
0x6933: (EET.BINARY, "ChapProcessData"),
|
||
|
0x1254C367: (EET.MASTER, "Tags"),
|
||
|
0x7373: (EET.MASTER, "Tag"),
|
||
|
0x63C0: (EET.MASTER, "Targets"),
|
||
|
0x68CA: (EET.UNSIGNED, "TargetTypeValue"),
|
||
|
0x63CA: (EET.TEXTA, "TargetType"),
|
||
|
0x63C5: (EET.UNSIGNED, "TagTrackUID"),
|
||
|
0x63C9: (EET.UNSIGNED, "TagEditionUID"),
|
||
|
0x63C4: (EET.UNSIGNED, "TagChapterUID"),
|
||
|
0x63C6: (EET.UNSIGNED, "TagAttachmentUID"),
|
||
|
0x67C8: (EET.MASTER, "SimpleTag"),
|
||
|
0x45A3: (EET.TEXTU, "TagName"),
|
||
|
0x447A: (EET.TEXTA, "TagLanguage"),
|
||
|
0x4484: (EET.UNSIGNED, "TagDefault"),
|
||
|
0x4487: (EET.TEXTU, "TagString"),
|
||
|
0x4485: (EET.BINARY, "TagBinary"),
|
||
|
0x56AA: (EET.UNSIGNED, "CodecDelay"),
|
||
|
0x56BB: (EET.UNSIGNED, "SeekPreRoll"),
|
||
|
0xF0: (EET.UNSIGNED, "CueRelativePosition"),
|
||
|
0x53C0: (EET.UNSIGNED, "AlphaMode"),
|
||
|
0x55B2: (EET.UNSIGNED, "BitsPerChannel"),
|
||
|
0x55B5: (EET.UNSIGNED, "CbSubsamplingHorz"),
|
||
|
0x55B6: (EET.UNSIGNED, "CbSubsamplingVert"),
|
||
|
0x5654: (EET.TEXTU, "ChapterStringUID"),
|
||
|
0x55B7: (EET.UNSIGNED, "ChromaSitingHorz"),
|
||
|
0x55B8: (EET.UNSIGNED, "ChromaSitingVert"),
|
||
|
0x55B3: (EET.UNSIGNED, "ChromaSubsamplingHorz"),
|
||
|
0x55B4: (EET.UNSIGNED, "ChromaSubsamplingVert"),
|
||
|
0x55B0: (EET.MASTER, "Colour"),
|
||
|
0x234E7A: (EET.UNSIGNED, "DefaultDecodedFieldDuration"),
|
||
|
0x75A2: (EET.SIGNED, "DiscardPadding"),
|
||
|
0x9D: (EET.UNSIGNED, "FieldOrder"),
|
||
|
0x55D9: (EET.FLOAT, "LuminanceMax"),
|
||
|
0x55DA: (EET.FLOAT, "LuminanceMin"),
|
||
|
0x55D0: (EET.MASTER, "MasteringMetadata"),
|
||
|
0x55B1: (EET.UNSIGNED, "MatrixCoefficients"),
|
||
|
0x55BC: (EET.UNSIGNED, "MaxCLL"),
|
||
|
0x55BD: (EET.UNSIGNED, "MaxFALL"),
|
||
|
0x55BB: (EET.UNSIGNED, "Primaries"),
|
||
|
0x55D5: (EET.FLOAT, "PrimaryBChromaticityX"),
|
||
|
0x55D6: (EET.FLOAT, "PrimaryBChromaticityY"),
|
||
|
0x55D3: (EET.FLOAT, "PrimaryGChromaticityX"),
|
||
|
0x55D4: (EET.FLOAT, "PrimaryGChromaticityY"),
|
||
|
0x55D1: (EET.FLOAT, "PrimaryRChromaticityX"),
|
||
|
0x55D2: (EET.FLOAT, "PrimaryRChromaticityY"),
|
||
|
0x55B9: (EET.UNSIGNED, "Range"),
|
||
|
0x55BA: (EET.UNSIGNED, "TransferCharacteristics"),
|
||
|
0x55D7: (EET.FLOAT, "WhitePointChromaticityX"),
|
||
|
0x55D8: (EET.FLOAT, "WhitePointChromaticityY"),
|
||
|
}
|
||
|
|
||
|
def read_simple_element(f, type_, size):
|
||
|
date = None
|
||
|
if size==0:
|
||
|
return ""
|
||
|
|
||
|
if type_==EET.UNSIGNED:
|
||
|
data=read_fixedlength_number(f, size, False)
|
||
|
elif type_==EET.SIGNED:
|
||
|
data=read_fixedlength_number(f, size, True)
|
||
|
elif type_==EET.TEXTA:
|
||
|
data=f.read(size)
|
||
|
data = data.replace(b"\x00", b"") # filter out \0, for gstreamer
|
||
|
data = data.decode("ascii")
|
||
|
elif type_==EET.TEXTU:
|
||
|
data=f.read(size)
|
||
|
data = data.replace(b"\x00", b"") # filter out \0, for gstreamer
|
||
|
data = data.decode("UTF-8")
|
||
|
elif type_==EET.MASTER:
|
||
|
data=read_ebml_element_tree(f, size)
|
||
|
elif type_==EET.DATE:
|
||
|
data=read_fixedlength_number(f, size, True)
|
||
|
data*= 1e-9
|
||
|
data+= (datetime.datetime(2001, 1, 1) - datetime.datetime(1970, 1, 1)).total_seconds()
|
||
|
# now should be UNIX date
|
||
|
elif type_==EET.FLOAT:
|
||
|
if size==4:
|
||
|
data = f.read(4)
|
||
|
data = unpack(">f", data)[0]
|
||
|
elif size==8:
|
||
|
data = f.read(8)
|
||
|
data = unpack(">d", data)[0]
|
||
|
else:
|
||
|
data=read_fixedlength_number(f, size, False)
|
||
|
sys.stderr.write("mkvparse: Floating point of size %d is not supported\n" % size)
|
||
|
data = None
|
||
|
else:
|
||
|
data=f.read(size)
|
||
|
return data
|
||
|
|
||
|
def read_ebml_element_tree(f, total_size):
|
||
|
'''
|
||
|
Build tree of elements, reading f until total_size reached
|
||
|
Don't use for the whole segment, it's not Haskell
|
||
|
|
||
|
Returns list of pairs (element_name, element_value).
|
||
|
element_value can also be list of pairs
|
||
|
'''
|
||
|
childs=[]
|
||
|
while(total_size>0):
|
||
|
(id_, size, hsize) = read_ebml_element_header(f)
|
||
|
if size == -1:
|
||
|
sys.stderr.write("mkvparse: Element %x without size? Damaged data? Skipping %d bytes\n" % (id_, size, total_size))
|
||
|
f.read(total_size);
|
||
|
break;
|
||
|
if size>total_size:
|
||
|
sys.stderr.write("mkvparse: Element %x with size %d? Damaged data? Skipping %d bytes\n" % (id_, size, total_size))
|
||
|
f.read(total_size);
|
||
|
break
|
||
|
type_ = EET.BINARY
|
||
|
name = "unknown_%x"%id_
|
||
|
if id_ in element_types_names:
|
||
|
(type_, name) = element_types_names[id_]
|
||
|
data = read_simple_element(f, type_, size)
|
||
|
total_size-=(size+hsize)
|
||
|
childs.append((name, (type_, data)))
|
||
|
return childs
|
||
|
|
||
|
|
||
|
class MatroskaHandler:
|
||
|
""" User for mkvparse should override these methods """
|
||
|
def tracks_available(self):
|
||
|
pass
|
||
|
def segment_info_available(self):
|
||
|
pass
|
||
|
def frame(self, track_id, timestamp, data, more_laced_frames, duration, keyframe, invisible, discardable):
|
||
|
pass
|
||
|
def ebml_top_element(self, id_, name_, type_, data_):
|
||
|
pass
|
||
|
def before_handling_an_element(self):
|
||
|
pass
|
||
|
def begin_handling_ebml_element(self, id_, name, type_, headersize, datasize):
|
||
|
return type_
|
||
|
def element_data_available(self, id_, name, type_, headersize, data):
|
||
|
pass
|
||
|
|
||
|
def handle_block(buffer, buffer_pos, handler, cluster_timecode, timecode_scale=1000000, duration=None, header_removal_headers_for_tracks={}):
|
||
|
'''
|
||
|
Decode a block, handling all lacings, send it to handler with appropriate timestamp, track number
|
||
|
'''
|
||
|
pos=0
|
||
|
(tracknum, pos) = parse_matroska_number(buffer, pos, signed=False)
|
||
|
(tcode, pos) = parse_fixedlength_number(buffer, pos, 2, signed=True)
|
||
|
flags = ord(buffer[pos]); pos+=1
|
||
|
f_keyframe = (flags&0x80 == 0x80)
|
||
|
f_invisible = (flags&0x08 == 0x08)
|
||
|
f_discardable = (flags&0x01 == 0x01)
|
||
|
laceflags=flags&0x06
|
||
|
|
||
|
block_timecode = (cluster_timecode + tcode)*(timecode_scale*0.000000001)
|
||
|
|
||
|
header_removal_prefix = b""
|
||
|
if tracknum in header_removal_headers_for_tracks:
|
||
|
# header_removal_prefix = header_removal_headers_for_tracks[tracknum]
|
||
|
raise NotImplementedError
|
||
|
|
||
|
if laceflags == 0x00: # no lacing
|
||
|
# buf = buffer[pos:]
|
||
|
handler.frame(tracknum, block_timecode, buffer_pos+pos, len(buffer)-pos,
|
||
|
0, duration, f_keyframe, f_invisible, f_discardable)
|
||
|
return
|
||
|
|
||
|
numframes = ord(buffer[pos]); pos+=1
|
||
|
numframes+=1
|
||
|
|
||
|
lengths=[]
|
||
|
|
||
|
if laceflags == 0x02: # Xiph lacing
|
||
|
accumlength=0
|
||
|
for i in range(numframes-1):
|
||
|
(l, pos) = parse_xiph_number(buffer, pos)
|
||
|
lengths.append(l)
|
||
|
accumlength+=l
|
||
|
lengths.append(len(buffer)-pos-accumlength)
|
||
|
elif laceflags == 0x06: # EBML lacing
|
||
|
accumlength=0
|
||
|
if numframes:
|
||
|
(flength, pos) = parse_matroska_number(buffer, pos, signed=False)
|
||
|
lengths.append(flength)
|
||
|
accumlength+=flength
|
||
|
for i in range(numframes-2):
|
||
|
(l, pos) = parse_matroska_number(buffer, pos, signed=True)
|
||
|
flength+=l
|
||
|
lengths.append(flength)
|
||
|
accumlength+=flength
|
||
|
lengths.append(len(buffer)-pos-accumlength)
|
||
|
elif laceflags==0x04: # Fixed size lacing
|
||
|
fl=int((len(buffer)-pos)/numframes)
|
||
|
for i in range(numframes):
|
||
|
lengths.append(fl)
|
||
|
|
||
|
more_laced_frames=numframes-1
|
||
|
for i in lengths:
|
||
|
# buf = buffer[pos:pos+i]
|
||
|
handler.frame(tracknum, block_timecode, buffer_pos+pos, i, more_laced_frames, duration,
|
||
|
f_keyframe, f_invisible, f_discardable)
|
||
|
pos+=i
|
||
|
more_laced_frames-=1
|
||
|
|
||
|
|
||
|
def resync(f):
|
||
|
sys.stderr.write("mvkparse: Resyncing\n")
|
||
|
while True:
|
||
|
b = f.read(1);
|
||
|
if b == b"": return (None, None);
|
||
|
if b == b"\x1F":
|
||
|
b2 = f.read(3);
|
||
|
if b2 == b"\x43\xB6\x75":
|
||
|
(seglen, x) = read_matroska_number(f)
|
||
|
return (0x1F43B675, seglen, x+4) # cluster
|
||
|
if b == b"\x18":
|
||
|
b2 = f.read(3)
|
||
|
if b2 == b"\x53\x80\x67":
|
||
|
(seglen, x) = read_matroska_number(f)
|
||
|
return (0x18538067, seglen, x+4) # segment
|
||
|
if b == b"\x16":
|
||
|
b2 = f.read(3)
|
||
|
if b2 == b"\x54\xAE\x6B":
|
||
|
(seglen ,x )= read_matroska_number(f)
|
||
|
return (0x1654AE6B, seglen, x+4) # tracks
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
def mkvparse(f, handler):
|
||
|
'''
|
||
|
Read mkv file f and call handler methods when track or segment information is ready or when frame is read.
|
||
|
Handles lacing, timecodes (except of per-track scaling)
|
||
|
'''
|
||
|
timecode_scale = 1000000
|
||
|
current_cluster_timecode = 0
|
||
|
resync_element_id = None
|
||
|
resync_element_size = None
|
||
|
resync_element_headersize = None
|
||
|
header_removal_headers_for_tracks = {}
|
||
|
while f:
|
||
|
(id_, size, hsize) = (None, None, None)
|
||
|
tree = None
|
||
|
data = None
|
||
|
(type_, name) = (None, None)
|
||
|
try:
|
||
|
if not resync_element_id:
|
||
|
try:
|
||
|
handler.before_handling_an_element()
|
||
|
(id_, size, hsize) = read_ebml_element_header(f)
|
||
|
except StopIteration:
|
||
|
break;
|
||
|
if not (id_ in element_types_names):
|
||
|
sys.stderr.write("mkvparse: Unknown element with id %x and size %d\n"%(id_, size))
|
||
|
(resync_element_id, resync_element_size, resync_element_headersize) = resync(f)
|
||
|
if resync_element_id:
|
||
|
continue;
|
||
|
else:
|
||
|
break;
|
||
|
else:
|
||
|
id_ = resync_element_id
|
||
|
size=resync_element_size
|
||
|
hsize=resync_element_headersize
|
||
|
resync_element_id = None
|
||
|
resync_element_size = None
|
||
|
resync_element_headersize = None
|
||
|
|
||
|
(type_, name) = element_types_names[id_]
|
||
|
(type_, name) = element_types_names[id_]
|
||
|
type_ = handler.begin_handling_ebml_element(id_, name, type_, hsize, size)
|
||
|
|
||
|
if type_ == EET.MASTER:
|
||
|
tree = read_ebml_element_tree(f, size)
|
||
|
data = tree
|
||
|
|
||
|
except Exception:
|
||
|
traceback.print_exc()
|
||
|
handler.before_handling_an_element()
|
||
|
(resync_element_id, resync_element_size, resync_element_headersize) = resync(f)
|
||
|
if resync_element_id:
|
||
|
continue;
|
||
|
else:
|
||
|
break;
|
||
|
|
||
|
if name=="EBML" and type(data) == list:
|
||
|
d = dict(tree)
|
||
|
if 'EBMLReadVersion' in d:
|
||
|
if d['EBMLReadVersion'][1]>1: sys.stderr.write("mkvparse: Warning: EBMLReadVersion too big\n")
|
||
|
if 'DocTypeReadVersion' in d:
|
||
|
if d['DocTypeReadVersion'][1]>2: sys.stderr.write("mkvparse: Warning: DocTypeReadVersion too big\n")
|
||
|
dt = d['DocType'][1]
|
||
|
if dt != "matroska" and dt != "webm":
|
||
|
sys.stderr.write("mkvparse: Warning: EBML DocType is not \"matroska\" or \"webm\"")
|
||
|
elif name=="Info" and type(data) == list:
|
||
|
handler.segment_info = tree
|
||
|
handler.segment_info_available()
|
||
|
|
||
|
d = dict(tree)
|
||
|
if "TimecodeScale" in d:
|
||
|
timecode_scale = d["TimecodeScale"][1]
|
||
|
elif name=="Tracks" and type(data) == list:
|
||
|
handler.tracks={}
|
||
|
for (ten, (_t, track)) in tree:
|
||
|
if ten != "TrackEntry": continue
|
||
|
d = dict(track)
|
||
|
n = d['TrackNumber'][1]
|
||
|
handler.tracks[n]=d
|
||
|
tt = d['TrackType'][1]
|
||
|
if tt==0x01: d['type']='video'
|
||
|
elif tt==0x02: d['type']='audio'
|
||
|
elif tt==0x03: d['type']='complex'
|
||
|
elif tt==0x10: d['type']='logo'
|
||
|
elif tt==0x11: d['type']='subtitle'
|
||
|
elif tt==0x12: d['type']='button'
|
||
|
elif tt==0x20: d['type']='control'
|
||
|
if 'TrackTimecodeScale' in d:
|
||
|
sys.stderr.write("mkvparse: Warning: TrackTimecodeScale is not supported\n")
|
||
|
if 'ContentEncodings' in d:
|
||
|
try:
|
||
|
compr = dict(d["ContentEncodings"][1][0][1][1][0][1][1])
|
||
|
if compr["ContentCompAlgo"][1] == 3:
|
||
|
header_removal_headers_for_tracks[n] = compr["ContentCompSettings"][1]
|
||
|
else:
|
||
|
sys.stderr.write("mkvparse: Warning: compression other than " \
|
||
|
"header removal is not supported\n")
|
||
|
except:
|
||
|
sys.stderr.write("mkvparse: Warning: unsuccessfully tried " \
|
||
|
"to handle header removal compression\n")
|
||
|
handler.tracks_available()
|
||
|
# cluster contents:
|
||
|
elif name=="Timecode" and type_ == EET.UNSIGNED:
|
||
|
data=read_fixedlength_number(f, size, False)
|
||
|
current_cluster_timecode = data;
|
||
|
elif name=="SimpleBlock" and type_ == EET.BINARY:
|
||
|
pos = f.tell()
|
||
|
data=f.read(size)
|
||
|
handle_block(data, pos, handler, current_cluster_timecode, timecode_scale, None, header_removal_headers_for_tracks)
|
||
|
elif name=="BlockGroup" and type_ == EET.MASTER:
|
||
|
d2 = dict(tree)
|
||
|
duration=None
|
||
|
raise NotImplementedError
|
||
|
# if 'BlockDuration' in d2:
|
||
|
# duration = d2['BlockDuration'][1]
|
||
|
# duration = duration*0.000000001*timecode_scale
|
||
|
# if 'Block' in d2:
|
||
|
# handle_block(d2['Block'][1], None, handler, current_cluster_timecode, timecode_scale, duration, header_removal_headers_for_tracks)
|
||
|
else:
|
||
|
if type_!=EET.JUST_GO_ON and type_!=EET.MASTER:
|
||
|
data = read_simple_element(f, type_, size)
|
||
|
|
||
|
handler.ebml_top_element(id_, name, type_, data);
|
||
|
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
print("Run mkvuser.py for the example")
|