openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

762 lines
27 KiB

# Licence==MIT; Vitaly "_Vi" Shukela 2012
# Simple easy-to-use hacky matroska parser
# Supports SimpleBlock and BlockGroup, lacing, TimecodeScale.
# Does not support seeking, cues, chapters and other features.
# No proper EOF handling unfortunately
# See "mkvuser.py" for the example
import traceback
from struct import unpack
import sys
import datetime
if sys.version < '3':
range=xrange
else:
#identity=lambda x:x
def ord(something):
if type(something)==bytes:
if something == b"":
raise StopIteration
return something[0]
else:
return something
def get_major_bit_number(n):
'''
Takes uint8, returns number of the most significant bit plus the number with that bit cleared.
Examples:
0b10010101 -> (0, 0b00010101)
0b00010101 -> (3, 0b00000101)
0b01111111 -> (1, 0b00111111)
'''
if not n:
raise Exception("Bad number")
i=0x80;
r=0
while not n&i:
r+=1
i>>=1
return (r,n&~i);
def read_matroska_number(f, unmodified=False, signed=False):
'''
Read ebml number. Unmodified means don't clear the length bit (as in Element IDs)
Returns the number and it's length as a tuple
See examples in "parse_matroska_number" function
'''
if unmodified and signed:
raise Exception("Contradictary arguments")
first_byte=f.read(1)
if(first_byte==""):
raise StopIteration
r = ord(first_byte)
(n,r2) = get_major_bit_number(r)
if not unmodified:
r=r2
# from now "signed" means "negative"
i=n
while i:
r = r * 0x100 + ord(f.read(1))
i-=1
if signed:
r-=(2**(7*n+7)-1)
else:
if r==2**(7*n+7)-1:
return (-1, n+1)
return (r,n+1)
def parse_matroska_number(data, pos, unmodified=False, signed=False):
'''
Parse ebml number from buffer[pos:]. Just like read_matroska_number.
Unmodified means don't clear the length bit (as in Element IDs)
Returns the number plus the new position in input buffer
Examples:
"\x81" -> (1, pos+1)
"\x40\x01" -> (1, pos+2)
"\x20\x00\x01" -> (1, pos+3)
"\x3F\xFF\xFF" -> (0x1FFFFF, pos+3)
"\x20\x00\x01" unmodified -> (0x200001, pos+3)
"\xBF" signed -> (0, pos+1)
"\xBE" signed -> (-1, pos+1)
"\xC0" signed -> (1, pos+1)
"\x5F\xEF" signed -> (-16, pos+2)
'''
if unmodified and signed:
raise Exception("Contradictary arguments")
r = ord(data[pos])
pos+=1
(n,r2) = get_major_bit_number(r)
if not unmodified:
r=r2
# from now "signed" means "negative"
i=n
while i:
r = r * 0x100 + ord(data[pos])
pos+=1
i-=1
if signed:
r-=(2**(7*n+6)-1)
else:
if r==2**(7*n+7)-1:
return (-1, pos)
return (r,pos)
def parse_xiph_number(data, pos):
'''
Parse the Xiph lacing number from data[pos:]
Returns the number plus the new position
Examples:
"\x01" -> (1, pos+1)
"\x55" -> (0x55, pos+1)
"\xFF\x04" -> (0x103, pos+2)
"\xFF\xFF\x04" -> (0x202, pos+3)
"\xFF\xFF\x00" -> (0x1FE, pos+3)
'''
v = ord(data[pos])
pos+=1
r=0
while v==255:
r+=v
v = ord(data[pos])
pos+=1
r+=v
return (r, pos)
def parse_fixedlength_number(data, pos, length, signed=False):
'''
Read the big-endian number from data[pos:pos+length]
Returns the number plus the new position
Examples:
"\x01" -> (0x1, pos+1)
"\x55" -> (0x55, pos+1)
"\x55" signed -> (0x55, pos+1)
"\xFF\x04" -> (0xFF04, pos+2)
"\xFF\x04" signed -> (-0x00FC, pos+2)
'''
r=0
for i in range(length):
r=r*0x100+ord(data[pos+i])
if signed:
if ord(data[pos]) & 0x80:
r-=2**(8*length)
return (r, pos+length)
def read_fixedlength_number(f, length, signed=False):
""" Read length bytes and parse (parse_fixedlength_number) it.
Returns only the number"""
buf = f.read(length)
(r, pos) = parse_fixedlength_number(buf, 0, length, signed)
return r
def read_ebml_element_header(f):
'''
Read Element ID and size
Returns id, element size and this header size
'''
(id_, n) = read_matroska_number(f, unmodified=True)
(size, n2) = read_matroska_number(f)
return (id_, size, n+n2)
class EbmlElementType:
VOID=0
MASTER=1 # read all subelements and return tree. Don't use this too large things like Segment
UNSIGNED=2
SIGNED=3
TEXTA=4
TEXTU=5
BINARY=6
FLOAT=7
DATE=8
JUST_GO_ON=10 # For "Segment".
# Actually MASTER, but don't build the tree for all subelements,
# interpreting all child elements as if they were top-level elements
EET=EbmlElementType
# lynx -width=10000 -dump http://matroska.org/technical/specs/index.html
# | sed 's/not 0/not0/g; s/> 0/>0/g; s/Sampling Frequency/SamplingFrequency/g'
# | awk '{print $1 " " $3 " " $8}'
# | grep '\[..\]'
# | perl -ne '/(\S+) (\S+) (.)/;
# $name=$1; $id=$2; $type=$3;
# $id=~s/\[|\]//g;
# %types = (m=>"EET.MASTER",
# u=>"EET.UNSIGNED",
# i=>"EET.SIGNED",
# 8=>"EET.TEXTU",
# s=>"EET.TEXTA",
# b=>"EET.BINARY",
# f=>"EET.FLOAT",
# d=>"EET.DATE");
# $t=$types{$type};
# next unless $t;
# $t="EET.JUST_GO_ON" if $name eq "Segment" or $name eq "Cluster";
# print "\t0x$id: ($t, \"$name\"),\n";'
element_types_names = {
0x1A45DFA3: (EET.MASTER, "EBML"),
0x4286: (EET.UNSIGNED, "EBMLVersion"),
0x42F7: (EET.UNSIGNED, "EBMLReadVersion"),
0x42F2: (EET.UNSIGNED, "EBMLMaxIDLength"),
0x42F3: (EET.UNSIGNED, "EBMLMaxSizeLength"),
0x4282: (EET.TEXTA, "DocType"),
0x4287: (EET.UNSIGNED, "DocTypeVersion"),
0x4285: (EET.UNSIGNED, "DocTypeReadVersion"),
0xEC: (EET.BINARY, "Void"),
0xBF: (EET.BINARY, "CRC-32"),
0x1B538667: (EET.MASTER, "SignatureSlot"),
0x7E8A: (EET.UNSIGNED, "SignatureAlgo"),
0x7E9A: (EET.UNSIGNED, "SignatureHash"),
0x7EA5: (EET.BINARY, "SignaturePublicKey"),
0x7EB5: (EET.BINARY, "Signature"),
0x7E5B: (EET.MASTER, "SignatureElements"),
0x7E7B: (EET.MASTER, "SignatureElementList"),
0x6532: (EET.BINARY, "SignedElement"),
0x18538067: (EET.JUST_GO_ON, "Segment"),
0x114D9B74: (EET.MASTER, "SeekHead"),
0x4DBB: (EET.MASTER, "Seek"),
0x53AB: (EET.BINARY, "SeekID"),
0x53AC: (EET.UNSIGNED, "SeekPosition"),
0x1549A966: (EET.MASTER, "Info"),
0x73A4: (EET.BINARY, "SegmentUID"),
0x7384: (EET.TEXTU, "SegmentFilename"),
0x3CB923: (EET.BINARY, "PrevUID"),
0x3C83AB: (EET.TEXTU, "PrevFilename"),
0x3EB923: (EET.BINARY, "NextUID"),
0x3E83BB: (EET.TEXTU, "NextFilename"),
0x4444: (EET.BINARY, "SegmentFamily"),
0x6924: (EET.MASTER, "ChapterTranslate"),
0x69FC: (EET.UNSIGNED, "ChapterTranslateEditionUID"),
0x69BF: (EET.UNSIGNED, "ChapterTranslateCodec"),
0x69A5: (EET.BINARY, "ChapterTranslateID"),
0x2AD7B1: (EET.UNSIGNED, "TimecodeScale"),
0x4489: (EET.FLOAT, "Duration"),
0x4461: (EET.DATE, "DateUTC"),
0x7BA9: (EET.TEXTU, "Title"),
0x4D80: (EET.TEXTU, "MuxingApp"),
0x5741: (EET.TEXTU, "WritingApp"),
0x1F43B675: (EET.JUST_GO_ON, "Cluster"),
0xE7: (EET.UNSIGNED, "Timecode"),
0x5854: (EET.MASTER, "SilentTracks"),
0x58D7: (EET.UNSIGNED, "SilentTrackNumber"),
0xA7: (EET.UNSIGNED, "Position"),
0xAB: (EET.UNSIGNED, "PrevSize"),
0xA3: (EET.BINARY, "SimpleBlock"),
0xA0: (EET.MASTER, "BlockGroup"),
0xA1: (EET.BINARY, "Block"),
0xA2: (EET.BINARY, "BlockVirtual"),
0x75A1: (EET.MASTER, "BlockAdditions"),
0xA6: (EET.MASTER, "BlockMore"),
0xEE: (EET.UNSIGNED, "BlockAddID"),
0xA5: (EET.BINARY, "BlockAdditional"),
0x9B: (EET.UNSIGNED, "BlockDuration"),
0xFA: (EET.UNSIGNED, "ReferencePriority"),
0xFB: (EET.SIGNED, "ReferenceBlock"),
0xFD: (EET.SIGNED, "ReferenceVirtual"),
0xA4: (EET.BINARY, "CodecState"),
0x8E: (EET.MASTER, "Slices"),
0xE8: (EET.MASTER, "TimeSlice"),
0xCC: (EET.UNSIGNED, "LaceNumber"),
0xCD: (EET.UNSIGNED, "FrameNumber"),
0xCB: (EET.UNSIGNED, "BlockAdditionID"),
0xCE: (EET.UNSIGNED, "Delay"),
0xCF: (EET.UNSIGNED, "SliceDuration"),
0xC8: (EET.MASTER, "ReferenceFrame"),
0xC9: (EET.UNSIGNED, "ReferenceOffset"),
0xCA: (EET.UNSIGNED, "ReferenceTimeCode"),
0xAF: (EET.BINARY, "EncryptedBlock"),
0x1654AE6B: (EET.MASTER, "Tracks"),
0xAE: (EET.MASTER, "TrackEntry"),
0xD7: (EET.UNSIGNED, "TrackNumber"),
0x73C5: (EET.UNSIGNED, "TrackUID"),
0x83: (EET.UNSIGNED, "TrackType"),
0xB9: (EET.UNSIGNED, "FlagEnabled"),
0x88: (EET.UNSIGNED, "FlagDefault"),
0x55AA: (EET.UNSIGNED, "FlagForced"),
0x9C: (EET.UNSIGNED, "FlagLacing"),
0x6DE7: (EET.UNSIGNED, "MinCache"),
0x6DF8: (EET.UNSIGNED, "MaxCache"),
0x23E383: (EET.UNSIGNED, "DefaultDuration"),
0x23314F: (EET.FLOAT, "TrackTimecodeScale"),
0x537F: (EET.SIGNED, "TrackOffset"),
0x55EE: (EET.UNSIGNED, "MaxBlockAdditionID"),
0x536E: (EET.TEXTU, "Name"),
0x22B59C: (EET.TEXTA, "Language"),
0x86: (EET.TEXTA, "CodecID"),
0x63A2: (EET.BINARY, "CodecPrivate"),
0x258688: (EET.TEXTU, "CodecName"),
0x7446: (EET.UNSIGNED, "AttachmentLink"),
0x3A9697: (EET.TEXTU, "CodecSettings"),
0x3B4040: (EET.TEXTA, "CodecInfoURL"),
0x26B240: (EET.TEXTA, "CodecDownloadURL"),
0xAA: (EET.UNSIGNED, "CodecDecodeAll"),
0x6FAB: (EET.UNSIGNED, "TrackOverlay"),
0x6624: (EET.MASTER, "TrackTranslate"),
0x66FC: (EET.UNSIGNED, "TrackTranslateEditionUID"),
0x66BF: (EET.UNSIGNED, "TrackTranslateCodec"),
0x66A5: (EET.BINARY, "TrackTranslateTrackID"),
0xE0: (EET.MASTER, "Video"),
0x9A: (EET.UNSIGNED, "FlagInterlaced"),
0x53B8: (EET.UNSIGNED, "StereoMode"),
0x53B9: (EET.UNSIGNED, "OldStereoMode"),
0xB0: (EET.UNSIGNED, "PixelWidth"),
0xBA: (EET.UNSIGNED, "PixelHeight"),
0x54AA: (EET.UNSIGNED, "PixelCropBottom"),
0x54BB: (EET.UNSIGNED, "PixelCropTop"),
0x54CC: (EET.UNSIGNED, "PixelCropLeft"),
0x54DD: (EET.UNSIGNED, "PixelCropRight"),
0x54B0: (EET.UNSIGNED, "DisplayWidth"),
0x54BA: (EET.UNSIGNED, "DisplayHeight"),
0x54B2: (EET.UNSIGNED, "DisplayUnit"),
0x54B3: (EET.UNSIGNED, "AspectRatioType"),
0x2EB524: (EET.BINARY, "ColourSpace"),
0x2FB523: (EET.FLOAT, "GammaValue"),
0x2383E3: (EET.FLOAT, "FrameRate"),
0xE1: (EET.MASTER, "Audio"),
0xB5: (EET.FLOAT, "SamplingFrequency"),
0x78B5: (EET.FLOAT, "OutputSamplingFrequency"),
0x9F: (EET.UNSIGNED, "Channels"),
0x7D7B: (EET.BINARY, "ChannelPositions"),
0x6264: (EET.UNSIGNED, "BitDepth"),
0xE2: (EET.MASTER, "TrackOperation"),
0xE3: (EET.MASTER, "TrackCombinePlanes"),
0xE4: (EET.MASTER, "TrackPlane"),
0xE5: (EET.UNSIGNED, "TrackPlaneUID"),
0xE6: (EET.UNSIGNED, "TrackPlaneType"),
0xE9: (EET.MASTER, "TrackJoinBlocks"),
0xED: (EET.UNSIGNED, "TrackJoinUID"),
0xC0: (EET.UNSIGNED, "TrickTrackUID"),
0xC1: (EET.BINARY, "TrickTrackSegmentUID"),
0xC6: (EET.UNSIGNED, "TrickTrackFlag"),
0xC7: (EET.UNSIGNED, "TrickMasterTrackUID"),
0xC4: (EET.BINARY, "TrickMasterTrackSegmentUID"),
0x6D80: (EET.MASTER, "ContentEncodings"),
0x6240: (EET.MASTER, "ContentEncoding"),
0x5031: (EET.UNSIGNED, "ContentEncodingOrder"),
0x5032: (EET.UNSIGNED, "ContentEncodingScope"),
0x5033: (EET.UNSIGNED, "ContentEncodingType"),
0x5034: (EET.MASTER, "ContentCompression"),
0x4254: (EET.UNSIGNED, "ContentCompAlgo"),
0x4255: (EET.BINARY, "ContentCompSettings"),
0x5035: (EET.MASTER, "ContentEncryption"),
0x47E1: (EET.UNSIGNED, "ContentEncAlgo"),
0x47E2: (EET.BINARY, "ContentEncKeyID"),
0x47E3: (EET.BINARY, "ContentSignature"),
0x47E4: (EET.BINARY, "ContentSigKeyID"),
0x47E5: (EET.UNSIGNED, "ContentSigAlgo"),
0x47E6: (EET.UNSIGNED, "ContentSigHashAlgo"),
0x1C53BB6B: (EET.MASTER, "Cues"),
0xBB: (EET.MASTER, "CuePoint"),
0xB3: (EET.UNSIGNED, "CueTime"),
0xB7: (EET.MASTER, "CueTrackPositions"),
0xF7: (EET.UNSIGNED, "CueTrack"),
0xF1: (EET.UNSIGNED, "CueClusterPosition"),
0x5378: (EET.UNSIGNED, "CueBlockNumber"),
0xEA: (EET.UNSIGNED, "CueCodecState"),
0xDB: (EET.MASTER, "CueReference"),
0x96: (EET.UNSIGNED, "CueRefTime"),
0x97: (EET.UNSIGNED, "CueRefCluster"),
0x535F: (EET.UNSIGNED, "CueRefNumber"),
0xEB: (EET.UNSIGNED, "CueRefCodecState"),
0x1941A469: (EET.MASTER, "Attachments"),
0x61A7: (EET.MASTER, "AttachedFile"),
0x467E: (EET.TEXTU, "FileDescription"),
0x466E: (EET.TEXTU, "FileName"),
0x4660: (EET.TEXTA, "FileMimeType"),
0x465C: (EET.BINARY, "FileData"),
0x46AE: (EET.UNSIGNED, "FileUID"),
0x4675: (EET.BINARY, "FileReferral"),
0x4661: (EET.UNSIGNED, "FileUsedStartTime"),
0x4662: (EET.UNSIGNED, "FileUsedEndTime"),
0x1043A770: (EET.MASTER, "Chapters"),
0x45B9: (EET.MASTER, "EditionEntry"),
0x45BC: (EET.UNSIGNED, "EditionUID"),
0x45BD: (EET.UNSIGNED, "EditionFlagHidden"),
0x45DB: (EET.UNSIGNED, "EditionFlagDefault"),
0x45DD: (EET.UNSIGNED, "EditionFlagOrdered"),
0xB6: (EET.MASTER, "ChapterAtom"),
0x73C4: (EET.UNSIGNED, "ChapterUID"),
0x91: (EET.UNSIGNED, "ChapterTimeStart"),
0x92: (EET.UNSIGNED, "ChapterTimeEnd"),
0x98: (EET.UNSIGNED, "ChapterFlagHidden"),
0x4598: (EET.UNSIGNED, "ChapterFlagEnabled"),
0x6E67: (EET.BINARY, "ChapterSegmentUID"),
0x6EBC: (EET.UNSIGNED, "ChapterSegmentEditionUID"),
0x63C3: (EET.UNSIGNED, "ChapterPhysicalEquiv"),
0x8F: (EET.MASTER, "ChapterTrack"),
0x89: (EET.UNSIGNED, "ChapterTrackNumber"),
0x80: (EET.MASTER, "ChapterDisplay"),
0x85: (EET.TEXTU, "ChapString"),
0x437C: (EET.TEXTA, "ChapLanguage"),
0x437E: (EET.TEXTA, "ChapCountry"),
0x6944: (EET.MASTER, "ChapProcess"),
0x6955: (EET.UNSIGNED, "ChapProcessCodecID"),
0x450D: (EET.BINARY, "ChapProcessPrivate"),
0x6911: (EET.MASTER, "ChapProcessCommand"),
0x6922: (EET.UNSIGNED, "ChapProcessTime"),
0x6933: (EET.BINARY, "ChapProcessData"),
0x1254C367: (EET.MASTER, "Tags"),
0x7373: (EET.MASTER, "Tag"),
0x63C0: (EET.MASTER, "Targets"),
0x68CA: (EET.UNSIGNED, "TargetTypeValue"),
0x63CA: (EET.TEXTA, "TargetType"),
0x63C5: (EET.UNSIGNED, "TagTrackUID"),
0x63C9: (EET.UNSIGNED, "TagEditionUID"),
0x63C4: (EET.UNSIGNED, "TagChapterUID"),
0x63C6: (EET.UNSIGNED, "TagAttachmentUID"),
0x67C8: (EET.MASTER, "SimpleTag"),
0x45A3: (EET.TEXTU, "TagName"),
0x447A: (EET.TEXTA, "TagLanguage"),
0x4484: (EET.UNSIGNED, "TagDefault"),
0x4487: (EET.TEXTU, "TagString"),
0x4485: (EET.BINARY, "TagBinary"),
0x56AA: (EET.UNSIGNED, "CodecDelay"),
0x56BB: (EET.UNSIGNED, "SeekPreRoll"),
0xF0: (EET.UNSIGNED, "CueRelativePosition"),
0x53C0: (EET.UNSIGNED, "AlphaMode"),
0x55B2: (EET.UNSIGNED, "BitsPerChannel"),
0x55B5: (EET.UNSIGNED, "CbSubsamplingHorz"),
0x55B6: (EET.UNSIGNED, "CbSubsamplingVert"),
0x5654: (EET.TEXTU, "ChapterStringUID"),
0x55B7: (EET.UNSIGNED, "ChromaSitingHorz"),
0x55B8: (EET.UNSIGNED, "ChromaSitingVert"),
0x55B3: (EET.UNSIGNED, "ChromaSubsamplingHorz"),
0x55B4: (EET.UNSIGNED, "ChromaSubsamplingVert"),
0x55B0: (EET.MASTER, "Colour"),
0x234E7A: (EET.UNSIGNED, "DefaultDecodedFieldDuration"),
0x75A2: (EET.SIGNED, "DiscardPadding"),
0x9D: (EET.UNSIGNED, "FieldOrder"),
0x55D9: (EET.FLOAT, "LuminanceMax"),
0x55DA: (EET.FLOAT, "LuminanceMin"),
0x55D0: (EET.MASTER, "MasteringMetadata"),
0x55B1: (EET.UNSIGNED, "MatrixCoefficients"),
0x55BC: (EET.UNSIGNED, "MaxCLL"),
0x55BD: (EET.UNSIGNED, "MaxFALL"),
0x55BB: (EET.UNSIGNED, "Primaries"),
0x55D5: (EET.FLOAT, "PrimaryBChromaticityX"),
0x55D6: (EET.FLOAT, "PrimaryBChromaticityY"),
0x55D3: (EET.FLOAT, "PrimaryGChromaticityX"),
0x55D4: (EET.FLOAT, "PrimaryGChromaticityY"),
0x55D1: (EET.FLOAT, "PrimaryRChromaticityX"),
0x55D2: (EET.FLOAT, "PrimaryRChromaticityY"),
0x55B9: (EET.UNSIGNED, "Range"),
0x55BA: (EET.UNSIGNED, "TransferCharacteristics"),
0x55D7: (EET.FLOAT, "WhitePointChromaticityX"),
0x55D8: (EET.FLOAT, "WhitePointChromaticityY"),
}
def read_simple_element(f, type_, size):
date = None
if size==0:
return ""
if type_==EET.UNSIGNED:
data=read_fixedlength_number(f, size, False)
elif type_==EET.SIGNED:
data=read_fixedlength_number(f, size, True)
elif type_==EET.TEXTA:
data=f.read(size)
data = data.replace(b"\x00", b"") # filter out \0, for gstreamer
data = data.decode("ascii")
elif type_==EET.TEXTU:
data=f.read(size)
data = data.replace(b"\x00", b"") # filter out \0, for gstreamer
data = data.decode("UTF-8")
elif type_==EET.MASTER:
data=read_ebml_element_tree(f, size)
elif type_==EET.DATE:
data=read_fixedlength_number(f, size, True)
data*= 1e-9
data+= (datetime.datetime(2001, 1, 1) - datetime.datetime(1970, 1, 1)).total_seconds()
# now should be UNIX date
elif type_==EET.FLOAT:
if size==4:
data = f.read(4)
data = unpack(">f", data)[0]
elif size==8:
data = f.read(8)
data = unpack(">d", data)[0]
else:
data=read_fixedlength_number(f, size, False)
sys.stderr.write("mkvparse: Floating point of size %d is not supported\n" % size)
data = None
else:
data=f.read(size)
return data
def read_ebml_element_tree(f, total_size):
'''
Build tree of elements, reading f until total_size reached
Don't use for the whole segment, it's not Haskell
Returns list of pairs (element_name, element_value).
element_value can also be list of pairs
'''
childs=[]
while(total_size>0):
(id_, size, hsize) = read_ebml_element_header(f)
if size == -1:
sys.stderr.write("mkvparse: Element %x without size? Damaged data? Skipping %d bytes\n" % (id_, size, total_size))
f.read(total_size);
break;
if size>total_size:
sys.stderr.write("mkvparse: Element %x with size %d? Damaged data? Skipping %d bytes\n" % (id_, size, total_size))
f.read(total_size);
break
type_ = EET.BINARY
name = "unknown_%x"%id_
if id_ in element_types_names:
(type_, name) = element_types_names[id_]
data = read_simple_element(f, type_, size)
total_size-=(size+hsize)
childs.append((name, (type_, data)))
return childs
class MatroskaHandler:
""" User for mkvparse should override these methods """
def tracks_available(self):
pass
def segment_info_available(self):
pass
def frame(self, track_id, timestamp, data, more_laced_frames, duration, keyframe, invisible, discardable):
pass
def ebml_top_element(self, id_, name_, type_, data_):
pass
def before_handling_an_element(self):
pass
def begin_handling_ebml_element(self, id_, name, type_, headersize, datasize):
return type_
def element_data_available(self, id_, name, type_, headersize, data):
pass
def handle_block(buffer, buffer_pos, handler, cluster_timecode, timecode_scale=1000000, duration=None, header_removal_headers_for_tracks={}):
'''
Decode a block, handling all lacings, send it to handler with appropriate timestamp, track number
'''
pos=0
(tracknum, pos) = parse_matroska_number(buffer, pos, signed=False)
(tcode, pos) = parse_fixedlength_number(buffer, pos, 2, signed=True)
flags = ord(buffer[pos]); pos+=1
f_keyframe = (flags&0x80 == 0x80)
f_invisible = (flags&0x08 == 0x08)
f_discardable = (flags&0x01 == 0x01)
laceflags=flags&0x06
block_timecode = (cluster_timecode + tcode)*(timecode_scale*0.000000001)
header_removal_prefix = b""
if tracknum in header_removal_headers_for_tracks:
# header_removal_prefix = header_removal_headers_for_tracks[tracknum]
raise NotImplementedError
if laceflags == 0x00: # no lacing
# buf = buffer[pos:]
handler.frame(tracknum, block_timecode, buffer_pos+pos, len(buffer)-pos,
0, duration, f_keyframe, f_invisible, f_discardable)
return
numframes = ord(buffer[pos]); pos+=1
numframes+=1
lengths=[]
if laceflags == 0x02: # Xiph lacing
accumlength=0
for i in range(numframes-1):
(l, pos) = parse_xiph_number(buffer, pos)
lengths.append(l)
accumlength+=l
lengths.append(len(buffer)-pos-accumlength)
elif laceflags == 0x06: # EBML lacing
accumlength=0
if numframes:
(flength, pos) = parse_matroska_number(buffer, pos, signed=False)
lengths.append(flength)
accumlength+=flength
for i in range(numframes-2):
(l, pos) = parse_matroska_number(buffer, pos, signed=True)
flength+=l
lengths.append(flength)
accumlength+=flength
lengths.append(len(buffer)-pos-accumlength)
elif laceflags==0x04: # Fixed size lacing
fl=int((len(buffer)-pos)/numframes)
for i in range(numframes):
lengths.append(fl)
more_laced_frames=numframes-1
for i in lengths:
# buf = buffer[pos:pos+i]
handler.frame(tracknum, block_timecode, buffer_pos+pos, i, more_laced_frames, duration,
f_keyframe, f_invisible, f_discardable)
pos+=i
more_laced_frames-=1
def resync(f):
sys.stderr.write("mvkparse: Resyncing\n")
while True:
b = f.read(1);
if b == b"": return (None, None);
if b == b"\x1F":
b2 = f.read(3);
if b2 == b"\x43\xB6\x75":
(seglen, x) = read_matroska_number(f)
return (0x1F43B675, seglen, x+4) # cluster
if b == b"\x18":
b2 = f.read(3)
if b2 == b"\x53\x80\x67":
(seglen, x) = read_matroska_number(f)
return (0x18538067, seglen, x+4) # segment
if b == b"\x16":
b2 = f.read(3)
if b2 == b"\x54\xAE\x6B":
(seglen ,x )= read_matroska_number(f)
return (0x1654AE6B, seglen, x+4) # tracks
def mkvparse(f, handler):
'''
Read mkv file f and call handler methods when track or segment information is ready or when frame is read.
Handles lacing, timecodes (except of per-track scaling)
'''
timecode_scale = 1000000
current_cluster_timecode = 0
resync_element_id = None
resync_element_size = None
resync_element_headersize = None
header_removal_headers_for_tracks = {}
while f:
(id_, size, hsize) = (None, None, None)
tree = None
data = None
(type_, name) = (None, None)
try:
if not resync_element_id:
try:
handler.before_handling_an_element()
(id_, size, hsize) = read_ebml_element_header(f)
except StopIteration:
break;
if not (id_ in element_types_names):
sys.stderr.write("mkvparse: Unknown element with id %x and size %d\n"%(id_, size))
(resync_element_id, resync_element_size, resync_element_headersize) = resync(f)
if resync_element_id:
continue;
else:
break;
else:
id_ = resync_element_id
size=resync_element_size
hsize=resync_element_headersize
resync_element_id = None
resync_element_size = None
resync_element_headersize = None
(type_, name) = element_types_names[id_]
(type_, name) = element_types_names[id_]
type_ = handler.begin_handling_ebml_element(id_, name, type_, hsize, size)
if type_ == EET.MASTER:
tree = read_ebml_element_tree(f, size)
data = tree
except Exception:
traceback.print_exc()
handler.before_handling_an_element()
(resync_element_id, resync_element_size, resync_element_headersize) = resync(f)
if resync_element_id:
continue;
else:
break;
if name=="EBML" and type(data) == list:
d = dict(tree)
if 'EBMLReadVersion' in d:
if d['EBMLReadVersion'][1]>1: sys.stderr.write("mkvparse: Warning: EBMLReadVersion too big\n")
if 'DocTypeReadVersion' in d:
if d['DocTypeReadVersion'][1]>2: sys.stderr.write("mkvparse: Warning: DocTypeReadVersion too big\n")
dt = d['DocType'][1]
if dt != "matroska" and dt != "webm":
sys.stderr.write("mkvparse: Warning: EBML DocType is not \"matroska\" or \"webm\"")
elif name=="Info" and type(data) == list:
handler.segment_info = tree
handler.segment_info_available()
d = dict(tree)
if "TimecodeScale" in d:
timecode_scale = d["TimecodeScale"][1]
elif name=="Tracks" and type(data) == list:
handler.tracks={}
for (ten, (_t, track)) in tree:
if ten != "TrackEntry": continue
d = dict(track)
n = d['TrackNumber'][1]
handler.tracks[n]=d
tt = d['TrackType'][1]
if tt==0x01: d['type']='video'
elif tt==0x02: d['type']='audio'
elif tt==0x03: d['type']='complex'
elif tt==0x10: d['type']='logo'
elif tt==0x11: d['type']='subtitle'
elif tt==0x12: d['type']='button'
elif tt==0x20: d['type']='control'
if 'TrackTimecodeScale' in d:
sys.stderr.write("mkvparse: Warning: TrackTimecodeScale is not supported\n")
if 'ContentEncodings' in d:
try:
compr = dict(d["ContentEncodings"][1][0][1][1][0][1][1])
if compr["ContentCompAlgo"][1] == 3:
header_removal_headers_for_tracks[n] = compr["ContentCompSettings"][1]
else:
sys.stderr.write("mkvparse: Warning: compression other than " \
"header removal is not supported\n")
except:
sys.stderr.write("mkvparse: Warning: unsuccessfully tried " \
"to handle header removal compression\n")
handler.tracks_available()
# cluster contents:
elif name=="Timecode" and type_ == EET.UNSIGNED:
data=read_fixedlength_number(f, size, False)
current_cluster_timecode = data;
elif name=="SimpleBlock" and type_ == EET.BINARY:
pos = f.tell()
data=f.read(size)
handle_block(data, pos, handler, current_cluster_timecode, timecode_scale, None, header_removal_headers_for_tracks)
elif name=="BlockGroup" and type_ == EET.MASTER:
d2 = dict(tree)
duration=None
raise NotImplementedError
# if 'BlockDuration' in d2:
# duration = d2['BlockDuration'][1]
# duration = duration*0.000000001*timecode_scale
# if 'Block' in d2:
# handle_block(d2['Block'][1], None, handler, current_cluster_timecode, timecode_scale, duration, header_removal_headers_for_tracks)
else:
if type_!=EET.JUST_GO_ON and type_!=EET.MASTER:
data = read_simple_element(f, type_, size)
handler.ebml_top_element(id_, name, type_, data);
if __name__ == '__main__':
print("Run mkvuser.py for the example")