From 29ca4aa7dea6ac8099a9200a37c007d934b8e395 Mon Sep 17 00:00:00 2001 From: ZwX1616 Date: Thu, 27 Feb 2025 17:00:56 -0800 Subject: [PATCH] modeld refactor: supercombo -> vision+policy (#34718) * should be ok * update tg * type * wrong model * is no more * update readme * add back --------- Co-authored-by: Comma Device --- release/build_release.sh | 3 +- selfdrive/modeld/SConscript | 11 +-- selfdrive/modeld/constants.py | 2 +- selfdrive/modeld/get_model_metadata.py | 3 +- selfdrive/modeld/modeld.py | 75 +++++++++++++-------- selfdrive/modeld/models/README.md | 15 +++-- selfdrive/modeld/models/driving_policy.onnx | 3 + selfdrive/modeld/models/driving_vision.onnx | 3 + selfdrive/modeld/models/supercombo.onnx | 3 - selfdrive/modeld/parse_model_outputs.py | 21 ++++-- tinygrad_repo | 2 +- 11 files changed, 88 insertions(+), 53 deletions(-) create mode 100644 selfdrive/modeld/models/driving_policy.onnx create mode 100644 selfdrive/modeld/models/driving_vision.onnx delete mode 100644 selfdrive/modeld/models/supercombo.onnx diff --git a/release/build_release.sh b/release/build_release.sh index 8b26dc74ab..d09f762263 100755 --- a/release/build_release.sh +++ b/release/build_release.sh @@ -74,7 +74,8 @@ find . -name '*.pyc' -delete find . -name 'moc_*' -delete find . -name '__pycache__' -delete rm -rf .sconsign.dblite Jenkinsfile release/ -rm selfdrive/modeld/models/supercombo.onnx +rm selfdrive/modeld/models/driving_vision.onnx +rm selfdrive/modeld/models/driving_policy.onnx find third_party/ -name '*x86*' -exec rm -r {} + find third_party/ -name '*Darwin*' -exec rm -r {} + diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index 6caac3bce3..cecebfa18b 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -32,10 +32,11 @@ lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LI tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x] # Get model metadata -fn = File("models/supercombo").abspath -script_files = [File(Dir("#selfdrive/modeld").File("get_model_metadata.py").abspath)] -cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx' -lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files + script_files, cmd) +for model_name in ['driving_vision', 'driving_policy']: + fn = File(f"models/{model_name}").abspath + script_files = [File(Dir("#selfdrive/modeld").File("get_model_metadata.py").abspath)] + cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx' + lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files + script_files, cmd) # Compile tinygrad model pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"' @@ -46,7 +47,7 @@ elif arch == 'Darwin': else: device_string = 'LLVM=1 LLVMOPT=1 BEAM=0 IMAGE=0' -for model_name in ['supercombo', 'dmonitoring_model']: +for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']: fn = File(f"models/{model_name}").abspath cmd = f'{pythonpath_string} {device_string} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl' lenv.Command(fn + "_tinygrad.pkl", [fn + ".onnx"] + tinygrad_files, cmd) diff --git a/selfdrive/modeld/constants.py b/selfdrive/modeld/constants.py index cf5157591e..2bb7b8100c 100644 --- a/selfdrive/modeld/constants.py +++ b/selfdrive/modeld/constants.py @@ -15,7 +15,7 @@ class ModelConstants: # model inputs constants MODEL_FREQ = 20 FEATURE_LEN = 512 - FULL_HISTORY_BUFFER_LEN = 99 + FULL_HISTORY_BUFFER_LEN = 100 DESIRE_LEN = 8 TRAFFIC_CONVENTION_LEN = 2 LAT_PLANNER_STATE_LEN = 4 diff --git a/selfdrive/modeld/get_model_metadata.py b/selfdrive/modeld/get_model_metadata.py index 0f1fd2a98b..2001d23d75 100755 --- a/selfdrive/modeld/get_model_metadata.py +++ b/selfdrive/modeld/get_model_metadata.py @@ -24,8 +24,7 @@ if __name__ == "__main__": assert output_slices is not None, 'output_slices not found in metadata' metadata = { - 'policy_model': get_metadata_value_by_name(model, 'policy_model'), - 'vision_model': get_metadata_value_by_name(model, 'vision_model'), + 'model_checkpoint': get_metadata_value_by_name(model, 'model_checkpoint'), 'output_slices': pickle.loads(codecs.decode(output_slices.encode(), "base64")), 'input_shapes': dict([get_name_and_shape(x) for x in model.graph.input]), 'output_shapes': dict([get_name_and_shape(x) for x in model.graph.output]) diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py index 7a3c7288b0..ee825d158f 100755 --- a/selfdrive/modeld/modeld.py +++ b/selfdrive/modeld/modeld.py @@ -35,8 +35,10 @@ from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, PROCESS_NAME = "selfdrive.modeld.modeld" SEND_RAW_PRED = os.getenv('SEND_RAW_PRED') -MODEL_PKL_PATH = Path(__file__).parent / 'models/supercombo_tinygrad.pkl' -METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl' +VISION_PKL_PATH = Path(__file__).parent / 'models/driving_vision_tinygrad.pkl' +POLICY_PKL_PATH = Path(__file__).parent / 'models/driving_policy_tinygrad.pkl' +VISION_METADATA_PATH = Path(__file__).parent / 'models/driving_vision_metadata.pkl' +POLICY_METADATA_PATH = Path(__file__).parent / 'models/driving_policy_metadata.pkl' class FrameMeta: frame_id: int = 0 @@ -57,32 +59,42 @@ class ModelState: self.frames = {'input_imgs': DrivingModelFrame(context), 'big_input_imgs': DrivingModelFrame(context)} self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32) - # img buffers are managed in openCL transform code + # policy inputs self.numpy_inputs = { - 'desire': np.zeros((1, (ModelConstants.FULL_HISTORY_BUFFER_LEN+1), ModelConstants.DESIRE_LEN), dtype=np.float32), + 'desire': np.zeros((1, ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.DESIRE_LEN), dtype=np.float32), 'traffic_convention': np.zeros((1, ModelConstants.TRAFFIC_CONVENTION_LEN), dtype=np.float32), 'lateral_control_params': np.zeros((1, ModelConstants.LATERAL_CONTROL_PARAMS_LEN), dtype=np.float32), - 'prev_desired_curv': np.zeros((1, (ModelConstants.FULL_HISTORY_BUFFER_LEN+1), ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float32), + 'prev_desired_curv': np.zeros((1, ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float32), 'features_buffer': np.zeros((1, ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32), } - with open(METADATA_PATH, 'rb') as f: - model_metadata = pickle.load(f) - self.input_shapes = model_metadata['input_shapes'] + with open(VISION_METADATA_PATH, 'rb') as f: + vision_metadata = pickle.load(f) + self.vision_input_shapes = vision_metadata['input_shapes'] + self.vision_output_slices = vision_metadata['output_slices'] + vision_output_size = vision_metadata['output_shapes']['outputs'][1] + + with open(POLICY_METADATA_PATH, 'rb') as f: + policy_metadata = pickle.load(f) + self.policy_input_shapes = policy_metadata['input_shapes'] + self.policy_output_slices = policy_metadata['output_slices'] + policy_output_size = policy_metadata['output_shapes']['outputs'][1] - self.output_slices = model_metadata['output_slices'] - net_output_size = model_metadata['output_shapes']['outputs'][1] - self.output = np.zeros(net_output_size, dtype=np.float32) + # img buffers are managed in openCL transform code + self.vision_inputs: dict[str, Tensor] = {} + self.vision_output = np.zeros(vision_output_size, dtype=np.float32) + self.policy_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()} + self.policy_output = np.zeros(policy_output_size, dtype=np.float32) self.parser = Parser() - self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()} - with open(MODEL_PKL_PATH, "rb") as f: - self.model_run = pickle.load(f) + with open(VISION_PKL_PATH, "rb") as f: + self.vision_run = pickle.load(f) - def slice_outputs(self, model_outputs: np.ndarray) -> dict[str, np.ndarray]: - parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in self.output_slices.items()} - if SEND_RAW_PRED: - parsed_model_outputs['raw_pred'] = model_outputs.copy() + with open(POLICY_PKL_PATH, "rb") as f: + self.policy_run = pickle.load(f) + + def slice_outputs(self, model_outputs: np.ndarray, output_slices: dict[str, slice]) -> dict[str, np.ndarray]: + parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in output_slices.items()} return parsed_model_outputs def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_wide: np.ndarray, @@ -103,29 +115,34 @@ class ModelState: if TICI: # The imgs tensors are backed by opencl memory, only need init once for key in imgs_cl: - if key not in self.tensor_inputs: - self.tensor_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=dtypes.uint8) + if key not in self.vision_inputs: + self.vision_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.vision_input_shapes[key], dtype=dtypes.uint8) else: for key in imgs_cl: - self.numpy_inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key]) - self.tensor_inputs[key] = Tensor(self.numpy_inputs[key], dtype=dtypes.uint8).realize() - + frame_input = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.vision_input_shapes[key]) + self.vision_inputs[key] = Tensor(frame_input, dtype=dtypes.uint8).realize() if prepare_only: return None - self.output = self.model_run(**self.tensor_inputs).numpy().flatten() - - outputs = self.parser.parse_outputs(self.slice_outputs(self.output)) + self.vision_output = self.vision_run(**self.vision_inputs).numpy().flatten() + vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(self.vision_output, self.vision_output_slices)) self.numpy_inputs['features_buffer'][0,:-1] = self.numpy_inputs['features_buffer'][0,1:] - self.numpy_inputs['features_buffer'][0,-1] = outputs['hidden_state'][0, :] + self.numpy_inputs['features_buffer'][0,-1] = vision_outputs_dict['hidden_state'][0, :] + self.policy_output = self.policy_run(**self.policy_inputs).numpy().flatten() + policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(self.policy_output, self.policy_output_slices)) # TODO model only uses last value now self.numpy_inputs['prev_desired_curv'][0,:-1] = self.numpy_inputs['prev_desired_curv'][0,1:] - self.numpy_inputs['prev_desired_curv'][0,-1,:] = outputs['desired_curvature'][0, :] - return outputs + self.numpy_inputs['prev_desired_curv'][0,-1,:] = policy_outputs_dict['desired_curvature'][0, :] + + combined_outputs_dict = {**vision_outputs_dict, **policy_outputs_dict} + if SEND_RAW_PRED: + combined_outputs_dict['raw_pred'] = np.concatenate([self.vision_output.copy(), self.policy_output.copy()]) + + return combined_outputs_dict def main(demo=False): diff --git a/selfdrive/modeld/models/README.md b/selfdrive/modeld/models/README.md index 9e11ca8255..255f28d80e 100644 --- a/selfdrive/modeld/models/README.md +++ b/selfdrive/modeld/models/README.md @@ -1,8 +1,8 @@ ## Neural networks in openpilot To view the architecture of the ONNX networks, you can use [netron](https://netron.app/) -## Supercombo -### Supercombo input format (Full size: 799906 x float32) +## Driving Model (vision model + temporal policy model) +### Vision inputs (Full size: 799906 x float32) * **image stream** * Two consecutive images (256 * 512 * 3 in RGB) recorded at 20 Hz : 393216 = 2 * 6 * 128 * 256 * Each 256 * 512 image is represented in YUV420 with 6 channels : 6 * 128 * 256 @@ -15,16 +15,21 @@ To view the architecture of the ONNX networks, you can use [netron](https://netr * Channels 0,1,2,3 represent the full-res Y channel and are represented in numpy as Y[::2, ::2], Y[::2, 1::2], Y[1::2, ::2], and Y[1::2, 1::2] * Channel 4 represents the half-res U channel * Channel 5 represents the half-res V channel +### Policy inputs * **desire** * one-hot encoded buffer to command model to execute certain actions, bit needs to be sent for the past 5 seconds (at 20FPS) : 100 * 8 * **traffic convention** * one-hot encoded vector to tell model whether traffic is right-hand or left-hand traffic : 2 +* **lateral control params** + * speed and steering delay for predicting the desired curvature: 2 +* **previous desired curvatures** + * vector of previously predicted desired curvatures: 100 * 1 * **feature buffer** - * A buffer of intermediate features that gets appended to the current feature to form a 5 seconds temporal context (at 20FPS) : 99 * 512 + * a buffer of intermediate features including the current feature to form a 5 seconds temporal context (at 20FPS) : 100 * 512 -### Supercombo output format (Full size: XXX x float32) -Read [here](https://github.com/commaai/openpilot/blob/90af436a121164a51da9fa48d093c29f738adf6a/selfdrive/modeld/models/driving.h#L236) for more. +### Driving Model output format (Full size: XXX x float32) +Refer to **slice_outputs** and **parse_vision_outputs/parse_policy_outputs** in modeld. ## Driver Monitoring Model diff --git a/selfdrive/modeld/models/driving_policy.onnx b/selfdrive/modeld/models/driving_policy.onnx new file mode 100644 index 0000000000..f804b4ec31 --- /dev/null +++ b/selfdrive/modeld/models/driving_policy.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cae3285c876804e649b14adadcfb8be79a9bd5a1b928113e37f1f08e25e9688 +size 16581121 diff --git a/selfdrive/modeld/models/driving_vision.onnx b/selfdrive/modeld/models/driving_vision.onnx new file mode 100644 index 0000000000..06c87d8755 --- /dev/null +++ b/selfdrive/modeld/models/driving_vision.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29bbf79f9dfd7048c0013bb81e86d9b2979275b95ea1ed8a86d1a86a88695240 +size 34882971 diff --git a/selfdrive/modeld/models/supercombo.onnx b/selfdrive/modeld/models/supercombo.onnx deleted file mode 100644 index 4c9f795574..0000000000 --- a/selfdrive/modeld/models/supercombo.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d21daa542227ecc5972da45df4e26f018ba113c0461f270e367d57e3ad89221a -size 51461700 diff --git a/selfdrive/modeld/parse_model_outputs.py b/selfdrive/modeld/parse_model_outputs.py index 9b162efe89..810c44ccb9 100644 --- a/selfdrive/modeld/parse_model_outputs.py +++ b/selfdrive/modeld/parse_model_outputs.py @@ -84,23 +84,32 @@ class Parser: outs[name] = pred_mu_final.reshape(final_shape) outs[name + '_stds'] = pred_std_final.reshape(final_shape) - def parse_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: + def parse_vision_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: + self.parse_mdn('pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,)) + self.parse_mdn('wide_from_device_euler', outs, in_N=0, out_N=0, out_shape=(ModelConstants.WIDE_FROM_DEVICE_WIDTH,)) + self.parse_mdn('road_transform', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,)) + self.parse_categorical_crossentropy('desire_pred', outs, out_shape=(ModelConstants.DESIRE_PRED_LEN,ModelConstants.DESIRE_PRED_WIDTH)) + self.parse_binary_crossentropy('meta', outs) + return outs + + def parse_policy_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: self.parse_mdn('plan', outs, in_N=ModelConstants.PLAN_MHP_N, out_N=ModelConstants.PLAN_MHP_SELECTION, out_shape=(ModelConstants.IDX_N,ModelConstants.PLAN_WIDTH)) self.parse_mdn('lane_lines', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_LANE_LINES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH)) self.parse_mdn('road_edges', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_ROAD_EDGES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH)) - self.parse_mdn('pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,)) - self.parse_mdn('road_transform', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,)) self.parse_mdn('sim_pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,)) - self.parse_mdn('wide_from_device_euler', outs, in_N=0, out_N=0, out_shape=(ModelConstants.WIDE_FROM_DEVICE_WIDTH,)) self.parse_mdn('lead', outs, in_N=ModelConstants.LEAD_MHP_N, out_N=ModelConstants.LEAD_MHP_SELECTION, out_shape=(ModelConstants.LEAD_TRAJ_LEN,ModelConstants.LEAD_WIDTH)) if 'lat_planner_solution' in outs: self.parse_mdn('lat_planner_solution', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N,ModelConstants.LAT_PLANNER_SOLUTION_WIDTH)) if 'desired_curvature' in outs: self.parse_mdn('desired_curvature', outs, in_N=0, out_N=0, out_shape=(ModelConstants.DESIRED_CURV_WIDTH,)) - for k in ['lead_prob', 'lane_lines_prob', 'meta']: + for k in ['lead_prob', 'lane_lines_prob']: self.parse_binary_crossentropy(k, outs) self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,)) - self.parse_categorical_crossentropy('desire_pred', outs, out_shape=(ModelConstants.DESIRE_PRED_LEN,ModelConstants.DESIRE_PRED_WIDTH)) + return outs + + def parse_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: + outs = self.parse_vision_outputs(outs) + outs = self.parse_policy_outputs(outs) return outs diff --git a/tinygrad_repo b/tinygrad_repo index 820d1eb159..6f39c4d653 160000 --- a/tinygrad_repo +++ b/tinygrad_repo @@ -1 +1 @@ -Subproject commit 820d1eb159f327961b90044f8c7d21c6160d7b08 +Subproject commit 6f39c4d653737c056540194605dc18a7273df280