diff --git a/release/files_common b/release/files_common index 7bfb0f62a7..7073625cfb 100644 --- a/release/files_common +++ b/release/files_common @@ -424,6 +424,7 @@ selfdrive/modeld/transforms/transform.cc selfdrive/modeld/transforms/transform.h selfdrive/modeld/transforms/transform.cl +selfdrive/modeld/thneed/*.py selfdrive/modeld/thneed/thneed.* selfdrive/modeld/thneed/serialize.cc selfdrive/modeld/thneed/compile.cc diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index 990f8789b9..20d3fb8acc 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -67,14 +67,22 @@ common_model = lenv.Object(common_src) if use_thneed and arch in ("aarch64", "larch64"): fn = File("#models/supercombo").abspath compiler = lenv.Program('thneed/compile', ["thneed/compile.cc"]+common_model, LIBS=libs) - cmd = f"cd {Dir('.').abspath} && {compiler[0].abspath} {fn}.dlc {fn}.thneed --binary" + cmd = f"cd {Dir('.').abspath} && {compiler[0].abspath} {fn}.dlc {fn}_badweights.thneed --binary" lib_paths = ':'.join(Dir(p).abspath for p in lenv["LIBPATH"]) kernel_path = os.path.join(Dir('.').abspath, "thneed", "kernels") cenv = Environment(ENV={'LD_LIBRARY_PATH': f"{lib_paths}:{lenv['ENV']['LD_LIBRARY_PATH']}", 'KERNEL_PATH': kernel_path}) kernels = [os.path.join(kernel_path, x) for x in os.listdir(kernel_path) if x.endswith(".cl")] - cenv.Command(fn + ".thneed", [fn + ".dlc", kernels, compiler], cmd) + cenv.Command(fn + "_badweights.thneed", [fn + ".dlc", kernels, compiler], cmd) + + from selfdrive.modeld.thneed.weights_fixup import weights_fixup + def weights_fixup_action(target, source, env): + weights_fixup(target[0].abspath, source[0].abspath, source[1].abspath) + + env = Environment(BUILDERS = {'WeightFixup' : Builder(action = weights_fixup_action)}) + env.WeightFixup(target=fn + ".thneed", source=[fn+"_badweights.thneed", fn+".dlc"]) + lenv.Program('_dmonitoringmodeld', [ "dmonitoringmodeld.cc", diff --git a/selfdrive/modeld/thneed/__init__.py b/selfdrive/modeld/thneed/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/selfdrive/modeld/thneed/lib.py b/selfdrive/modeld/thneed/lib.py new file mode 100644 index 0000000000..dccdfb10ac --- /dev/null +++ b/selfdrive/modeld/thneed/lib.py @@ -0,0 +1,31 @@ +import struct, json + +def load_thneed(fn): + with open(fn, "rb") as f: + json_len = struct.unpack("I", f.read(4))[0] + jdat = json.loads(f.read(json_len).decode('latin_1')) + weights = f.read() + ptr = 0 + for o in jdat['objects']: + if o['needs_load']: + nptr = ptr + o['size'] + o['data'] = weights[ptr:nptr] + ptr = nptr + for o in jdat['binaries']: + nptr = ptr + o['length'] + o['data'] = weights[ptr:nptr] + ptr = nptr + return jdat + +def save_thneed(jdat, fn): + new_weights = [] + for o in jdat['objects'] + jdat['binaries']: + if 'data' in o: + new_weights.append(o['data']) + del o['data'] + new_weights = b''.join(new_weights) + with open(fn, "wb") as f: + j = json.dumps(jdat, ensure_ascii=False).encode('latin_1') + f.write(struct.pack("I", len(j))) + f.write(j) + f.write(new_weights) diff --git a/selfdrive/modeld/thneed/weights_fixup.py b/selfdrive/modeld/thneed/weights_fixup.py new file mode 100755 index 0000000000..47875a9ee0 --- /dev/null +++ b/selfdrive/modeld/thneed/weights_fixup.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +import os +import struct +import zipfile +import numpy as np +from tqdm import tqdm + +from common.basedir import BASEDIR +from selfdrive.modeld.thneed.lib import load_thneed, save_thneed + +# this is junk code, but it doesn't have deps +def load_dlc_weights(fn): + archive = zipfile.ZipFile(fn, 'r') + dlc_params = archive.read("model.params") + + def extract(rdat): + idx = rdat.find(b"\x00\x00\x00\x09\x04\x00\x00\x00") + rdat = rdat[idx+8:] + ll = struct.unpack("I", rdat[0:4])[0] + buf = np.frombuffer(rdat[4:4+ll*4], dtype=np.float32) + rdat = rdat[4+ll*4:] + dims = struct.unpack("I", rdat[0:4])[0] + buf = buf.reshape(struct.unpack("I"*dims, rdat[4:4+dims*4])) + if len(buf.shape) == 4: + buf = np.transpose(buf, (3,2,0,1)) + return buf + + def parse(tdat): + ll = struct.unpack("I", tdat[0:4])[0] + 4 + return (None, [extract(tdat[0:]), extract(tdat[ll:])]) + + ptr = 0x20 + def r4(): + nonlocal ptr + ret = struct.unpack("I", dlc_params[ptr:ptr+4])[0] + ptr += 4 + return ret + ranges = [] + cnt = r4() + for _ in range(cnt): + o = r4() + ptr + # the header is 0xC + plen, is_4, is_2 = struct.unpack("III", dlc_params[o:o+0xC]) + assert is_4 == 4 and is_2 == 2 + ranges.append((o+0xC, o+plen+0xC)) + ranges = sorted(ranges, reverse=True) + + return [parse(dlc_params[s:e]) for s,e in ranges] + +# this won't run on device without onnx +def load_onnx_weights(fn): + import onnx + from onnx import numpy_helper + + model = onnx.load(fn) + graph = model.graph # pylint: disable=maybe-no-member + init = {x.name:x for x in graph.initializer} + + onnx_layers = [] + for node in graph.node: + #print(node.name, node.op_type, node.input, node.output) + vals = [] + for inp in node.input: + if inp in init: + vals.append(numpy_helper.to_array(init[inp])) + if len(vals) > 0: + onnx_layers.append((node.name, vals)) + return onnx_layers + +def weights_fixup(target, source_thneed, dlc): + #onnx_layers = load_onnx_weights(os.path.join(BASEDIR, "models/supercombo.onnx")) + onnx_layers = load_dlc_weights(dlc) + jdat = load_thneed(source_thneed) + + bufs = {} + for o in jdat['objects']: + bufs[o['id']] = o + + thneed_layers = [] + for k in jdat['kernels']: + #print(k['name']) + vals = [] + for a in k['args']: + if a in bufs: + o = bufs[a] + if o['needs_load'] or ('buffer_id' in o and bufs[o['buffer_id']]['needs_load']): + #print(" ", o['arg_type']) + vals.append(o) + if len(vals) > 0: + thneed_layers.append((k['name'], vals)) + + assert len(thneed_layers) == len(onnx_layers) + + # fix up weights + for tl, ol in tqdm(zip(thneed_layers, onnx_layers), total=len(thneed_layers)): + #print(tl[0], ol[0]) + assert len(tl[1]) == len(ol[1]) + for o, onnx_weight in zip(tl[1], ol[1]): + if o['arg_type'] == "image2d_t": + obuf = bufs[o['buffer_id']] + saved_weights = np.frombuffer(obuf['data'], dtype=np.float16).reshape(o['height'], o['row_pitch']//2) + + if len(onnx_weight.shape) == 4: + # convolution + oc,ic,ch,cw = onnx_weight.shape + + if 'depthwise' in tl[0]: + assert ic == 1 + weights = np.transpose(onnx_weight.reshape(oc//4,4,ch,cw), (0,2,3,1)).reshape(o['height'], o['width']*4) + else: + weights = np.transpose(onnx_weight.reshape(oc//4,4,ic//4,4,ch,cw), (0,4,2,5,1,3)).reshape(o['height'], o['width']*4) + else: + # fc_Wtx + weights = onnx_weight + + new_weights = np.zeros((o['height'], o['row_pitch']//2), dtype=np.float32) + new_weights[:, :weights.shape[1]] = weights + + # weights shouldn't be too far off + err = np.mean((saved_weights.astype(np.float32) - new_weights)**2) + assert err < 1e-3 + rerr = np.mean(np.abs((saved_weights.astype(np.float32) - new_weights)/(new_weights+1e-12))) + assert rerr < 0.5 + + # fix should improve things + fixed_err = np.mean((new_weights.astype(np.float16).astype(np.float32) - new_weights)**2) + assert (err/fixed_err) >= 1 + + #print(" ", o['size'], onnx_weight.shape, o['row_pitch'], o['width'], o['height'], "err %.2fx better" % (err/fixed_err)) + + obuf['data'] = new_weights.astype(np.float16).tobytes() + + elif o['arg_type'] == "float*": + # unconverted floats are correct + new_weights = np.zeros(o['size']//4, dtype=np.float32) + new_weights[:onnx_weight.shape[0]] = onnx_weight + assert new_weights.tobytes() == o['data'] + #print(" ", o['size'], onnx_weight.shape) + + save_thneed(jdat, target) + +if __name__ == "__main__": + weights_fixup(os.path.join(BASEDIR, "models/supercombo_fixed.thneed"), + os.path.join(BASEDIR, "models/supercombo.thneed"), + os.path.join(BASEDIR, "models/supercombo.dlc")) diff --git a/selfdrive/test/process_replay/model_replay_ref_commit b/selfdrive/test/process_replay/model_replay_ref_commit index d210251c09..850b8561a4 100644 --- a/selfdrive/test/process_replay/model_replay_ref_commit +++ b/selfdrive/test/process_replay/model_replay_ref_commit @@ -1 +1 @@ -19720e79b1c5136a882efd689651d9044e2e2007 +15821a7f867f6b497a17e8a36c9d42ad548acacd