# Licence==MIT; Vitaly "_Vi" Shukela 2012 # Simple easy-to-use hacky matroska parser # Supports SimpleBlock and BlockGroup, lacing, TimecodeScale. # Does not support seeking, cues, chapters and other features. # No proper EOF handling unfortunately # See "mkvuser.py" for the example import traceback from struct import unpack import sys import datetime if sys.version < '3': range=xrange else: #identity=lambda x:x def ord(something): if type(something)==bytes: if something == b"": raise StopIteration return something[0] else: return something def get_major_bit_number(n): ''' Takes uint8, returns number of the most significant bit plus the number with that bit cleared. Examples: 0b10010101 -> (0, 0b00010101) 0b00010101 -> (3, 0b00000101) 0b01111111 -> (1, 0b00111111) ''' if not n: raise Exception("Bad number") i=0x80; r=0 while not n&i: r+=1 i>>=1 return (r,n&~i); def read_matroska_number(f, unmodified=False, signed=False): ''' Read ebml number. Unmodified means don't clear the length bit (as in Element IDs) Returns the number and it's length as a tuple See examples in "parse_matroska_number" function ''' if unmodified and signed: raise Exception("Contradictary arguments") first_byte=f.read(1) if(first_byte==""): raise StopIteration r = ord(first_byte) (n,r2) = get_major_bit_number(r) if not unmodified: r=r2 # from now "signed" means "negative" i=n while i: r = r * 0x100 + ord(f.read(1)) i-=1 if signed: r-=(2**(7*n+7)-1) else: if r==2**(7*n+7)-1: return (-1, n+1) return (r,n+1) def parse_matroska_number(data, pos, unmodified=False, signed=False): ''' Parse ebml number from buffer[pos:]. Just like read_matroska_number. Unmodified means don't clear the length bit (as in Element IDs) Returns the number plus the new position in input buffer Examples: "\x81" -> (1, pos+1) "\x40\x01" -> (1, pos+2) "\x20\x00\x01" -> (1, pos+3) "\x3F\xFF\xFF" -> (0x1FFFFF, pos+3) "\x20\x00\x01" unmodified -> (0x200001, pos+3) "\xBF" signed -> (0, pos+1) "\xBE" signed -> (-1, pos+1) "\xC0" signed -> (1, pos+1) "\x5F\xEF" signed -> (-16, pos+2) ''' if unmodified and signed: raise Exception("Contradictary arguments") r = ord(data[pos]) pos+=1 (n,r2) = get_major_bit_number(r) if not unmodified: r=r2 # from now "signed" means "negative" i=n while i: r = r * 0x100 + ord(data[pos]) pos+=1 i-=1 if signed: r-=(2**(7*n+6)-1) else: if r==2**(7*n+7)-1: return (-1, pos) return (r,pos) def parse_xiph_number(data, pos): ''' Parse the Xiph lacing number from data[pos:] Returns the number plus the new position Examples: "\x01" -> (1, pos+1) "\x55" -> (0x55, pos+1) "\xFF\x04" -> (0x103, pos+2) "\xFF\xFF\x04" -> (0x202, pos+3) "\xFF\xFF\x00" -> (0x1FE, pos+3) ''' v = ord(data[pos]) pos+=1 r=0 while v==255: r+=v v = ord(data[pos]) pos+=1 r+=v return (r, pos) def parse_fixedlength_number(data, pos, length, signed=False): ''' Read the big-endian number from data[pos:pos+length] Returns the number plus the new position Examples: "\x01" -> (0x1, pos+1) "\x55" -> (0x55, pos+1) "\x55" signed -> (0x55, pos+1) "\xFF\x04" -> (0xFF04, pos+2) "\xFF\x04" signed -> (-0x00FC, pos+2) ''' r=0 for i in range(length): r=r*0x100+ord(data[pos+i]) if signed: if ord(data[pos]) & 0x80: r-=2**(8*length) return (r, pos+length) def read_fixedlength_number(f, length, signed=False): """ Read length bytes and parse (parse_fixedlength_number) it. Returns only the number""" buf = f.read(length) (r, pos) = parse_fixedlength_number(buf, 0, length, signed) return r def read_ebml_element_header(f): ''' Read Element ID and size Returns id, element size and this header size ''' (id_, n) = read_matroska_number(f, unmodified=True) (size, n2) = read_matroska_number(f) return (id_, size, n+n2) class EbmlElementType: VOID=0 MASTER=1 # read all subelements and return tree. Don't use this too large things like Segment UNSIGNED=2 SIGNED=3 TEXTA=4 TEXTU=5 BINARY=6 FLOAT=7 DATE=8 JUST_GO_ON=10 # For "Segment". # Actually MASTER, but don't build the tree for all subelements, # interpreting all child elements as if they were top-level elements EET=EbmlElementType # lynx -width=10000 -dump http://matroska.org/technical/specs/index.html # | sed 's/not 0/not0/g; s/> 0/>0/g; s/Sampling Frequency/SamplingFrequency/g' # | awk '{print $1 " " $3 " " $8}' # | grep '\[..\]' # | perl -ne '/(\S+) (\S+) (.)/; # $name=$1; $id=$2; $type=$3; # $id=~s/\[|\]//g; # %types = (m=>"EET.MASTER", # u=>"EET.UNSIGNED", # i=>"EET.SIGNED", # 8=>"EET.TEXTU", # s=>"EET.TEXTA", # b=>"EET.BINARY", # f=>"EET.FLOAT", # d=>"EET.DATE"); # $t=$types{$type}; # next unless $t; # $t="EET.JUST_GO_ON" if $name eq "Segment" or $name eq "Cluster"; # print "\t0x$id: ($t, \"$name\"),\n";' element_types_names = { 0x1A45DFA3: (EET.MASTER, "EBML"), 0x4286: (EET.UNSIGNED, "EBMLVersion"), 0x42F7: (EET.UNSIGNED, "EBMLReadVersion"), 0x42F2: (EET.UNSIGNED, "EBMLMaxIDLength"), 0x42F3: (EET.UNSIGNED, "EBMLMaxSizeLength"), 0x4282: (EET.TEXTA, "DocType"), 0x4287: (EET.UNSIGNED, "DocTypeVersion"), 0x4285: (EET.UNSIGNED, "DocTypeReadVersion"), 0xEC: (EET.BINARY, "Void"), 0xBF: (EET.BINARY, "CRC-32"), 0x1B538667: (EET.MASTER, "SignatureSlot"), 0x7E8A: (EET.UNSIGNED, "SignatureAlgo"), 0x7E9A: (EET.UNSIGNED, "SignatureHash"), 0x7EA5: (EET.BINARY, "SignaturePublicKey"), 0x7EB5: (EET.BINARY, "Signature"), 0x7E5B: (EET.MASTER, "SignatureElements"), 0x7E7B: (EET.MASTER, "SignatureElementList"), 0x6532: (EET.BINARY, "SignedElement"), 0x18538067: (EET.JUST_GO_ON, "Segment"), 0x114D9B74: (EET.MASTER, "SeekHead"), 0x4DBB: (EET.MASTER, "Seek"), 0x53AB: (EET.BINARY, "SeekID"), 0x53AC: (EET.UNSIGNED, "SeekPosition"), 0x1549A966: (EET.MASTER, "Info"), 0x73A4: (EET.BINARY, "SegmentUID"), 0x7384: (EET.TEXTU, "SegmentFilename"), 0x3CB923: (EET.BINARY, "PrevUID"), 0x3C83AB: (EET.TEXTU, "PrevFilename"), 0x3EB923: (EET.BINARY, "NextUID"), 0x3E83BB: (EET.TEXTU, "NextFilename"), 0x4444: (EET.BINARY, "SegmentFamily"), 0x6924: (EET.MASTER, "ChapterTranslate"), 0x69FC: (EET.UNSIGNED, "ChapterTranslateEditionUID"), 0x69BF: (EET.UNSIGNED, "ChapterTranslateCodec"), 0x69A5: (EET.BINARY, "ChapterTranslateID"), 0x2AD7B1: (EET.UNSIGNED, "TimecodeScale"), 0x4489: (EET.FLOAT, "Duration"), 0x4461: (EET.DATE, "DateUTC"), 0x7BA9: (EET.TEXTU, "Title"), 0x4D80: (EET.TEXTU, "MuxingApp"), 0x5741: (EET.TEXTU, "WritingApp"), 0x1F43B675: (EET.JUST_GO_ON, "Cluster"), 0xE7: (EET.UNSIGNED, "Timecode"), 0x5854: (EET.MASTER, "SilentTracks"), 0x58D7: (EET.UNSIGNED, "SilentTrackNumber"), 0xA7: (EET.UNSIGNED, "Position"), 0xAB: (EET.UNSIGNED, "PrevSize"), 0xA3: (EET.BINARY, "SimpleBlock"), 0xA0: (EET.MASTER, "BlockGroup"), 0xA1: (EET.BINARY, "Block"), 0xA2: (EET.BINARY, "BlockVirtual"), 0x75A1: (EET.MASTER, "BlockAdditions"), 0xA6: (EET.MASTER, "BlockMore"), 0xEE: (EET.UNSIGNED, "BlockAddID"), 0xA5: (EET.BINARY, "BlockAdditional"), 0x9B: (EET.UNSIGNED, "BlockDuration"), 0xFA: (EET.UNSIGNED, "ReferencePriority"), 0xFB: (EET.SIGNED, "ReferenceBlock"), 0xFD: (EET.SIGNED, "ReferenceVirtual"), 0xA4: (EET.BINARY, "CodecState"), 0x8E: (EET.MASTER, "Slices"), 0xE8: (EET.MASTER, "TimeSlice"), 0xCC: (EET.UNSIGNED, "LaceNumber"), 0xCD: (EET.UNSIGNED, "FrameNumber"), 0xCB: (EET.UNSIGNED, "BlockAdditionID"), 0xCE: (EET.UNSIGNED, "Delay"), 0xCF: (EET.UNSIGNED, "SliceDuration"), 0xC8: (EET.MASTER, "ReferenceFrame"), 0xC9: (EET.UNSIGNED, "ReferenceOffset"), 0xCA: (EET.UNSIGNED, "ReferenceTimeCode"), 0xAF: (EET.BINARY, "EncryptedBlock"), 0x1654AE6B: (EET.MASTER, "Tracks"), 0xAE: (EET.MASTER, "TrackEntry"), 0xD7: (EET.UNSIGNED, "TrackNumber"), 0x73C5: (EET.UNSIGNED, "TrackUID"), 0x83: (EET.UNSIGNED, "TrackType"), 0xB9: (EET.UNSIGNED, "FlagEnabled"), 0x88: (EET.UNSIGNED, "FlagDefault"), 0x55AA: (EET.UNSIGNED, "FlagForced"), 0x9C: (EET.UNSIGNED, "FlagLacing"), 0x6DE7: (EET.UNSIGNED, "MinCache"), 0x6DF8: (EET.UNSIGNED, "MaxCache"), 0x23E383: (EET.UNSIGNED, "DefaultDuration"), 0x23314F: (EET.FLOAT, "TrackTimecodeScale"), 0x537F: (EET.SIGNED, "TrackOffset"), 0x55EE: (EET.UNSIGNED, "MaxBlockAdditionID"), 0x536E: (EET.TEXTU, "Name"), 0x22B59C: (EET.TEXTA, "Language"), 0x86: (EET.TEXTA, "CodecID"), 0x63A2: (EET.BINARY, "CodecPrivate"), 0x258688: (EET.TEXTU, "CodecName"), 0x7446: (EET.UNSIGNED, "AttachmentLink"), 0x3A9697: (EET.TEXTU, "CodecSettings"), 0x3B4040: (EET.TEXTA, "CodecInfoURL"), 0x26B240: (EET.TEXTA, "CodecDownloadURL"), 0xAA: (EET.UNSIGNED, "CodecDecodeAll"), 0x6FAB: (EET.UNSIGNED, "TrackOverlay"), 0x6624: (EET.MASTER, "TrackTranslate"), 0x66FC: (EET.UNSIGNED, "TrackTranslateEditionUID"), 0x66BF: (EET.UNSIGNED, "TrackTranslateCodec"), 0x66A5: (EET.BINARY, "TrackTranslateTrackID"), 0xE0: (EET.MASTER, "Video"), 0x9A: (EET.UNSIGNED, "FlagInterlaced"), 0x53B8: (EET.UNSIGNED, "StereoMode"), 0x53B9: (EET.UNSIGNED, "OldStereoMode"), 0xB0: (EET.UNSIGNED, "PixelWidth"), 0xBA: (EET.UNSIGNED, "PixelHeight"), 0x54AA: (EET.UNSIGNED, "PixelCropBottom"), 0x54BB: (EET.UNSIGNED, "PixelCropTop"), 0x54CC: (EET.UNSIGNED, "PixelCropLeft"), 0x54DD: (EET.UNSIGNED, "PixelCropRight"), 0x54B0: (EET.UNSIGNED, "DisplayWidth"), 0x54BA: (EET.UNSIGNED, "DisplayHeight"), 0x54B2: (EET.UNSIGNED, "DisplayUnit"), 0x54B3: (EET.UNSIGNED, "AspectRatioType"), 0x2EB524: (EET.BINARY, "ColourSpace"), 0x2FB523: (EET.FLOAT, "GammaValue"), 0x2383E3: (EET.FLOAT, "FrameRate"), 0xE1: (EET.MASTER, "Audio"), 0xB5: (EET.FLOAT, "SamplingFrequency"), 0x78B5: (EET.FLOAT, "OutputSamplingFrequency"), 0x9F: (EET.UNSIGNED, "Channels"), 0x7D7B: (EET.BINARY, "ChannelPositions"), 0x6264: (EET.UNSIGNED, "BitDepth"), 0xE2: (EET.MASTER, "TrackOperation"), 0xE3: (EET.MASTER, "TrackCombinePlanes"), 0xE4: (EET.MASTER, "TrackPlane"), 0xE5: (EET.UNSIGNED, "TrackPlaneUID"), 0xE6: (EET.UNSIGNED, "TrackPlaneType"), 0xE9: (EET.MASTER, "TrackJoinBlocks"), 0xED: (EET.UNSIGNED, "TrackJoinUID"), 0xC0: (EET.UNSIGNED, "TrickTrackUID"), 0xC1: (EET.BINARY, "TrickTrackSegmentUID"), 0xC6: (EET.UNSIGNED, "TrickTrackFlag"), 0xC7: (EET.UNSIGNED, "TrickMasterTrackUID"), 0xC4: (EET.BINARY, "TrickMasterTrackSegmentUID"), 0x6D80: (EET.MASTER, "ContentEncodings"), 0x6240: (EET.MASTER, "ContentEncoding"), 0x5031: (EET.UNSIGNED, "ContentEncodingOrder"), 0x5032: (EET.UNSIGNED, "ContentEncodingScope"), 0x5033: (EET.UNSIGNED, "ContentEncodingType"), 0x5034: (EET.MASTER, "ContentCompression"), 0x4254: (EET.UNSIGNED, "ContentCompAlgo"), 0x4255: (EET.BINARY, "ContentCompSettings"), 0x5035: (EET.MASTER, "ContentEncryption"), 0x47E1: (EET.UNSIGNED, "ContentEncAlgo"), 0x47E2: (EET.BINARY, "ContentEncKeyID"), 0x47E3: (EET.BINARY, "ContentSignature"), 0x47E4: (EET.BINARY, "ContentSigKeyID"), 0x47E5: (EET.UNSIGNED, "ContentSigAlgo"), 0x47E6: (EET.UNSIGNED, "ContentSigHashAlgo"), 0x1C53BB6B: (EET.MASTER, "Cues"), 0xBB: (EET.MASTER, "CuePoint"), 0xB3: (EET.UNSIGNED, "CueTime"), 0xB7: (EET.MASTER, "CueTrackPositions"), 0xF7: (EET.UNSIGNED, "CueTrack"), 0xF1: (EET.UNSIGNED, "CueClusterPosition"), 0x5378: (EET.UNSIGNED, "CueBlockNumber"), 0xEA: (EET.UNSIGNED, "CueCodecState"), 0xDB: (EET.MASTER, "CueReference"), 0x96: (EET.UNSIGNED, "CueRefTime"), 0x97: (EET.UNSIGNED, "CueRefCluster"), 0x535F: (EET.UNSIGNED, "CueRefNumber"), 0xEB: (EET.UNSIGNED, "CueRefCodecState"), 0x1941A469: (EET.MASTER, "Attachments"), 0x61A7: (EET.MASTER, "AttachedFile"), 0x467E: (EET.TEXTU, "FileDescription"), 0x466E: (EET.TEXTU, "FileName"), 0x4660: (EET.TEXTA, "FileMimeType"), 0x465C: (EET.BINARY, "FileData"), 0x46AE: (EET.UNSIGNED, "FileUID"), 0x4675: (EET.BINARY, "FileReferral"), 0x4661: (EET.UNSIGNED, "FileUsedStartTime"), 0x4662: (EET.UNSIGNED, "FileUsedEndTime"), 0x1043A770: (EET.MASTER, "Chapters"), 0x45B9: (EET.MASTER, "EditionEntry"), 0x45BC: (EET.UNSIGNED, "EditionUID"), 0x45BD: (EET.UNSIGNED, "EditionFlagHidden"), 0x45DB: (EET.UNSIGNED, "EditionFlagDefault"), 0x45DD: (EET.UNSIGNED, "EditionFlagOrdered"), 0xB6: (EET.MASTER, "ChapterAtom"), 0x73C4: (EET.UNSIGNED, "ChapterUID"), 0x91: (EET.UNSIGNED, "ChapterTimeStart"), 0x92: (EET.UNSIGNED, "ChapterTimeEnd"), 0x98: (EET.UNSIGNED, "ChapterFlagHidden"), 0x4598: (EET.UNSIGNED, "ChapterFlagEnabled"), 0x6E67: (EET.BINARY, "ChapterSegmentUID"), 0x6EBC: (EET.UNSIGNED, "ChapterSegmentEditionUID"), 0x63C3: (EET.UNSIGNED, "ChapterPhysicalEquiv"), 0x8F: (EET.MASTER, "ChapterTrack"), 0x89: (EET.UNSIGNED, "ChapterTrackNumber"), 0x80: (EET.MASTER, "ChapterDisplay"), 0x85: (EET.TEXTU, "ChapString"), 0x437C: (EET.TEXTA, "ChapLanguage"), 0x437E: (EET.TEXTA, "ChapCountry"), 0x6944: (EET.MASTER, "ChapProcess"), 0x6955: (EET.UNSIGNED, "ChapProcessCodecID"), 0x450D: (EET.BINARY, "ChapProcessPrivate"), 0x6911: (EET.MASTER, "ChapProcessCommand"), 0x6922: (EET.UNSIGNED, "ChapProcessTime"), 0x6933: (EET.BINARY, "ChapProcessData"), 0x1254C367: (EET.MASTER, "Tags"), 0x7373: (EET.MASTER, "Tag"), 0x63C0: (EET.MASTER, "Targets"), 0x68CA: (EET.UNSIGNED, "TargetTypeValue"), 0x63CA: (EET.TEXTA, "TargetType"), 0x63C5: (EET.UNSIGNED, "TagTrackUID"), 0x63C9: (EET.UNSIGNED, "TagEditionUID"), 0x63C4: (EET.UNSIGNED, "TagChapterUID"), 0x63C6: (EET.UNSIGNED, "TagAttachmentUID"), 0x67C8: (EET.MASTER, "SimpleTag"), 0x45A3: (EET.TEXTU, "TagName"), 0x447A: (EET.TEXTA, "TagLanguage"), 0x4484: (EET.UNSIGNED, "TagDefault"), 0x4487: (EET.TEXTU, "TagString"), 0x4485: (EET.BINARY, "TagBinary"), 0x56AA: (EET.UNSIGNED, "CodecDelay"), 0x56BB: (EET.UNSIGNED, "SeekPreRoll"), 0xF0: (EET.UNSIGNED, "CueRelativePosition"), 0x53C0: (EET.UNSIGNED, "AlphaMode"), 0x55B2: (EET.UNSIGNED, "BitsPerChannel"), 0x55B5: (EET.UNSIGNED, "CbSubsamplingHorz"), 0x55B6: (EET.UNSIGNED, "CbSubsamplingVert"), 0x5654: (EET.TEXTU, "ChapterStringUID"), 0x55B7: (EET.UNSIGNED, "ChromaSitingHorz"), 0x55B8: (EET.UNSIGNED, "ChromaSitingVert"), 0x55B3: (EET.UNSIGNED, "ChromaSubsamplingHorz"), 0x55B4: (EET.UNSIGNED, "ChromaSubsamplingVert"), 0x55B0: (EET.MASTER, "Colour"), 0x234E7A: (EET.UNSIGNED, "DefaultDecodedFieldDuration"), 0x75A2: (EET.SIGNED, "DiscardPadding"), 0x9D: (EET.UNSIGNED, "FieldOrder"), 0x55D9: (EET.FLOAT, "LuminanceMax"), 0x55DA: (EET.FLOAT, "LuminanceMin"), 0x55D0: (EET.MASTER, "MasteringMetadata"), 0x55B1: (EET.UNSIGNED, "MatrixCoefficients"), 0x55BC: (EET.UNSIGNED, "MaxCLL"), 0x55BD: (EET.UNSIGNED, "MaxFALL"), 0x55BB: (EET.UNSIGNED, "Primaries"), 0x55D5: (EET.FLOAT, "PrimaryBChromaticityX"), 0x55D6: (EET.FLOAT, "PrimaryBChromaticityY"), 0x55D3: (EET.FLOAT, "PrimaryGChromaticityX"), 0x55D4: (EET.FLOAT, "PrimaryGChromaticityY"), 0x55D1: (EET.FLOAT, "PrimaryRChromaticityX"), 0x55D2: (EET.FLOAT, "PrimaryRChromaticityY"), 0x55B9: (EET.UNSIGNED, "Range"), 0x55BA: (EET.UNSIGNED, "TransferCharacteristics"), 0x55D7: (EET.FLOAT, "WhitePointChromaticityX"), 0x55D8: (EET.FLOAT, "WhitePointChromaticityY"), } def read_simple_element(f, type_, size): date = None if size==0: return "" if type_==EET.UNSIGNED: data=read_fixedlength_number(f, size, False) elif type_==EET.SIGNED: data=read_fixedlength_number(f, size, True) elif type_==EET.TEXTA: data=f.read(size) data = data.replace(b"\x00", b"") # filter out \0, for gstreamer data = data.decode("ascii") elif type_==EET.TEXTU: data=f.read(size) data = data.replace(b"\x00", b"") # filter out \0, for gstreamer data = data.decode("UTF-8") elif type_==EET.MASTER: data=read_ebml_element_tree(f, size) elif type_==EET.DATE: data=read_fixedlength_number(f, size, True) data*= 1e-9 data+= (datetime.datetime(2001, 1, 1) - datetime.datetime(1970, 1, 1)).total_seconds() # now should be UNIX date elif type_==EET.FLOAT: if size==4: data = f.read(4) data = unpack(">f", data)[0] elif size==8: data = f.read(8) data = unpack(">d", data)[0] else: data=read_fixedlength_number(f, size, False) sys.stderr.write("mkvparse: Floating point of size %d is not supported\n" % size) data = None else: data=f.read(size) return data def read_ebml_element_tree(f, total_size): ''' Build tree of elements, reading f until total_size reached Don't use for the whole segment, it's not Haskell Returns list of pairs (element_name, element_value). element_value can also be list of pairs ''' childs=[] while(total_size>0): (id_, size, hsize) = read_ebml_element_header(f) if size == -1: sys.stderr.write("mkvparse: Element %x without size? Damaged data? Skipping %d bytes\n" % (id_, size, total_size)) f.read(total_size); break; if size>total_size: sys.stderr.write("mkvparse: Element %x with size %d? Damaged data? Skipping %d bytes\n" % (id_, size, total_size)) f.read(total_size); break type_ = EET.BINARY name = "unknown_%x"%id_ if id_ in element_types_names: (type_, name) = element_types_names[id_] data = read_simple_element(f, type_, size) total_size-=(size+hsize) childs.append((name, (type_, data))) return childs class MatroskaHandler: """ User for mkvparse should override these methods """ def tracks_available(self): pass def segment_info_available(self): pass def frame(self, track_id, timestamp, data, more_laced_frames, duration, keyframe, invisible, discardable): pass def ebml_top_element(self, id_, name_, type_, data_): pass def before_handling_an_element(self): pass def begin_handling_ebml_element(self, id_, name, type_, headersize, datasize): return type_ def element_data_available(self, id_, name, type_, headersize, data): pass def handle_block(buffer, buffer_pos, handler, cluster_timecode, timecode_scale=1000000, duration=None, header_removal_headers_for_tracks={}): ''' Decode a block, handling all lacings, send it to handler with appropriate timestamp, track number ''' pos=0 (tracknum, pos) = parse_matroska_number(buffer, pos, signed=False) (tcode, pos) = parse_fixedlength_number(buffer, pos, 2, signed=True) flags = ord(buffer[pos]); pos+=1 f_keyframe = (flags&0x80 == 0x80) f_invisible = (flags&0x08 == 0x08) f_discardable = (flags&0x01 == 0x01) laceflags=flags&0x06 block_timecode = (cluster_timecode + tcode)*(timecode_scale*0.000000001) header_removal_prefix = b"" if tracknum in header_removal_headers_for_tracks: # header_removal_prefix = header_removal_headers_for_tracks[tracknum] raise NotImplementedError if laceflags == 0x00: # no lacing # buf = buffer[pos:] handler.frame(tracknum, block_timecode, buffer_pos+pos, len(buffer)-pos, 0, duration, f_keyframe, f_invisible, f_discardable) return numframes = ord(buffer[pos]); pos+=1 numframes+=1 lengths=[] if laceflags == 0x02: # Xiph lacing accumlength=0 for i in range(numframes-1): (l, pos) = parse_xiph_number(buffer, pos) lengths.append(l) accumlength+=l lengths.append(len(buffer)-pos-accumlength) elif laceflags == 0x06: # EBML lacing accumlength=0 if numframes: (flength, pos) = parse_matroska_number(buffer, pos, signed=False) lengths.append(flength) accumlength+=flength for i in range(numframes-2): (l, pos) = parse_matroska_number(buffer, pos, signed=True) flength+=l lengths.append(flength) accumlength+=flength lengths.append(len(buffer)-pos-accumlength) elif laceflags==0x04: # Fixed size lacing fl=int((len(buffer)-pos)/numframes) for i in range(numframes): lengths.append(fl) more_laced_frames=numframes-1 for i in lengths: # buf = buffer[pos:pos+i] handler.frame(tracknum, block_timecode, buffer_pos+pos, i, more_laced_frames, duration, f_keyframe, f_invisible, f_discardable) pos+=i more_laced_frames-=1 def resync(f): sys.stderr.write("mvkparse: Resyncing\n") while True: b = f.read(1); if b == b"": return (None, None); if b == b"\x1F": b2 = f.read(3); if b2 == b"\x43\xB6\x75": (seglen, x) = read_matroska_number(f) return (0x1F43B675, seglen, x+4) # cluster if b == b"\x18": b2 = f.read(3) if b2 == b"\x53\x80\x67": (seglen, x) = read_matroska_number(f) return (0x18538067, seglen, x+4) # segment if b == b"\x16": b2 = f.read(3) if b2 == b"\x54\xAE\x6B": (seglen ,x )= read_matroska_number(f) return (0x1654AE6B, seglen, x+4) # tracks def mkvparse(f, handler): ''' Read mkv file f and call handler methods when track or segment information is ready or when frame is read. Handles lacing, timecodes (except of per-track scaling) ''' timecode_scale = 1000000 current_cluster_timecode = 0 resync_element_id = None resync_element_size = None resync_element_headersize = None header_removal_headers_for_tracks = {} while f: (id_, size, hsize) = (None, None, None) tree = None data = None (type_, name) = (None, None) try: if not resync_element_id: try: handler.before_handling_an_element() (id_, size, hsize) = read_ebml_element_header(f) except StopIteration: break; if not (id_ in element_types_names): sys.stderr.write("mkvparse: Unknown element with id %x and size %d\n"%(id_, size)) (resync_element_id, resync_element_size, resync_element_headersize) = resync(f) if resync_element_id: continue; else: break; else: id_ = resync_element_id size=resync_element_size hsize=resync_element_headersize resync_element_id = None resync_element_size = None resync_element_headersize = None (type_, name) = element_types_names[id_] (type_, name) = element_types_names[id_] type_ = handler.begin_handling_ebml_element(id_, name, type_, hsize, size) if type_ == EET.MASTER: tree = read_ebml_element_tree(f, size) data = tree except Exception: traceback.print_exc() handler.before_handling_an_element() (resync_element_id, resync_element_size, resync_element_headersize) = resync(f) if resync_element_id: continue; else: break; if name=="EBML" and type(data) == list: d = dict(tree) if 'EBMLReadVersion' in d: if d['EBMLReadVersion'][1]>1: sys.stderr.write("mkvparse: Warning: EBMLReadVersion too big\n") if 'DocTypeReadVersion' in d: if d['DocTypeReadVersion'][1]>2: sys.stderr.write("mkvparse: Warning: DocTypeReadVersion too big\n") dt = d['DocType'][1] if dt != "matroska" and dt != "webm": sys.stderr.write("mkvparse: Warning: EBML DocType is not \"matroska\" or \"webm\"") elif name=="Info" and type(data) == list: handler.segment_info = tree handler.segment_info_available() d = dict(tree) if "TimecodeScale" in d: timecode_scale = d["TimecodeScale"][1] elif name=="Tracks" and type(data) == list: handler.tracks={} for (ten, (_t, track)) in tree: if ten != "TrackEntry": continue d = dict(track) n = d['TrackNumber'][1] handler.tracks[n]=d tt = d['TrackType'][1] if tt==0x01: d['type']='video' elif tt==0x02: d['type']='audio' elif tt==0x03: d['type']='complex' elif tt==0x10: d['type']='logo' elif tt==0x11: d['type']='subtitle' elif tt==0x12: d['type']='button' elif tt==0x20: d['type']='control' if 'TrackTimecodeScale' in d: sys.stderr.write("mkvparse: Warning: TrackTimecodeScale is not supported\n") if 'ContentEncodings' in d: try: compr = dict(d["ContentEncodings"][1][0][1][1][0][1][1]) if compr["ContentCompAlgo"][1] == 3: header_removal_headers_for_tracks[n] = compr["ContentCompSettings"][1] else: sys.stderr.write("mkvparse: Warning: compression other than " \ "header removal is not supported\n") except: sys.stderr.write("mkvparse: Warning: unsuccessfully tried " \ "to handle header removal compression\n") handler.tracks_available() # cluster contents: elif name=="Timecode" and type_ == EET.UNSIGNED: data=read_fixedlength_number(f, size, False) current_cluster_timecode = data; elif name=="SimpleBlock" and type_ == EET.BINARY: pos = f.tell() data=f.read(size) handle_block(data, pos, handler, current_cluster_timecode, timecode_scale, None, header_removal_headers_for_tracks) elif name=="BlockGroup" and type_ == EET.MASTER: d2 = dict(tree) duration=None raise NotImplementedError # if 'BlockDuration' in d2: # duration = d2['BlockDuration'][1] # duration = duration*0.000000001*timecode_scale # if 'Block' in d2: # handle_block(d2['Block'][1], None, handler, current_cluster_timecode, timecode_scale, duration, header_removal_headers_for_tracks) else: if type_!=EET.JUST_GO_ON and type_!=EET.MASTER: data = read_simple_element(f, type_, size) handler.ebml_top_element(id_, name, type_, data); if __name__ == '__main__': print("Run mkvuser.py for the example")