modeld refactor: supercombo -> vision+policy (#34718)

* should be ok

* update tg

* type

* wrong model

* is no more

* update readme

* add back

---------

Co-authored-by: Comma Device <device@comma.ai>
pull/34728/head
ZwX1616 2 months ago committed by GitHub
parent 8f3e773977
commit 29ca4aa7de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      release/build_release.sh
  2. 11
      selfdrive/modeld/SConscript
  3. 2
      selfdrive/modeld/constants.py
  4. 3
      selfdrive/modeld/get_model_metadata.py
  5. 75
      selfdrive/modeld/modeld.py
  6. 15
      selfdrive/modeld/models/README.md
  7. 3
      selfdrive/modeld/models/driving_policy.onnx
  8. 3
      selfdrive/modeld/models/driving_vision.onnx
  9. 3
      selfdrive/modeld/models/supercombo.onnx
  10. 21
      selfdrive/modeld/parse_model_outputs.py
  11. 2
      tinygrad_repo

@ -74,7 +74,8 @@ find . -name '*.pyc' -delete
find . -name 'moc_*' -delete find . -name 'moc_*' -delete
find . -name '__pycache__' -delete find . -name '__pycache__' -delete
rm -rf .sconsign.dblite Jenkinsfile release/ rm -rf .sconsign.dblite Jenkinsfile release/
rm selfdrive/modeld/models/supercombo.onnx rm selfdrive/modeld/models/driving_vision.onnx
rm selfdrive/modeld/models/driving_policy.onnx
find third_party/ -name '*x86*' -exec rm -r {} + find third_party/ -name '*x86*' -exec rm -r {} +
find third_party/ -name '*Darwin*' -exec rm -r {} + find third_party/ -name '*Darwin*' -exec rm -r {} +

@ -32,10 +32,11 @@ lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LI
tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x] tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x]
# Get model metadata # Get model metadata
fn = File("models/supercombo").abspath for model_name in ['driving_vision', 'driving_policy']:
script_files = [File(Dir("#selfdrive/modeld").File("get_model_metadata.py").abspath)] fn = File(f"models/{model_name}").abspath
cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx' script_files = [File(Dir("#selfdrive/modeld").File("get_model_metadata.py").abspath)]
lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files + script_files, cmd) cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx'
lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files + script_files, cmd)
# Compile tinygrad model # Compile tinygrad model
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"' pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
@ -46,7 +47,7 @@ elif arch == 'Darwin':
else: else:
device_string = 'LLVM=1 LLVMOPT=1 BEAM=0 IMAGE=0' device_string = 'LLVM=1 LLVMOPT=1 BEAM=0 IMAGE=0'
for model_name in ['supercombo', 'dmonitoring_model']: for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
fn = File(f"models/{model_name}").abspath fn = File(f"models/{model_name}").abspath
cmd = f'{pythonpath_string} {device_string} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl' cmd = f'{pythonpath_string} {device_string} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
lenv.Command(fn + "_tinygrad.pkl", [fn + ".onnx"] + tinygrad_files, cmd) lenv.Command(fn + "_tinygrad.pkl", [fn + ".onnx"] + tinygrad_files, cmd)

@ -15,7 +15,7 @@ class ModelConstants:
# model inputs constants # model inputs constants
MODEL_FREQ = 20 MODEL_FREQ = 20
FEATURE_LEN = 512 FEATURE_LEN = 512
FULL_HISTORY_BUFFER_LEN = 99 FULL_HISTORY_BUFFER_LEN = 100
DESIRE_LEN = 8 DESIRE_LEN = 8
TRAFFIC_CONVENTION_LEN = 2 TRAFFIC_CONVENTION_LEN = 2
LAT_PLANNER_STATE_LEN = 4 LAT_PLANNER_STATE_LEN = 4

@ -24,8 +24,7 @@ if __name__ == "__main__":
assert output_slices is not None, 'output_slices not found in metadata' assert output_slices is not None, 'output_slices not found in metadata'
metadata = { metadata = {
'policy_model': get_metadata_value_by_name(model, 'policy_model'), 'model_checkpoint': get_metadata_value_by_name(model, 'model_checkpoint'),
'vision_model': get_metadata_value_by_name(model, 'vision_model'),
'output_slices': pickle.loads(codecs.decode(output_slices.encode(), "base64")), 'output_slices': pickle.loads(codecs.decode(output_slices.encode(), "base64")),
'input_shapes': dict([get_name_and_shape(x) for x in model.graph.input]), 'input_shapes': dict([get_name_and_shape(x) for x in model.graph.input]),
'output_shapes': dict([get_name_and_shape(x) for x in model.graph.output]) 'output_shapes': dict([get_name_and_shape(x) for x in model.graph.output])

@ -35,8 +35,10 @@ from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame,
PROCESS_NAME = "selfdrive.modeld.modeld" PROCESS_NAME = "selfdrive.modeld.modeld"
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED') SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
MODEL_PKL_PATH = Path(__file__).parent / 'models/supercombo_tinygrad.pkl' VISION_PKL_PATH = Path(__file__).parent / 'models/driving_vision_tinygrad.pkl'
METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl' POLICY_PKL_PATH = Path(__file__).parent / 'models/driving_policy_tinygrad.pkl'
VISION_METADATA_PATH = Path(__file__).parent / 'models/driving_vision_metadata.pkl'
POLICY_METADATA_PATH = Path(__file__).parent / 'models/driving_policy_metadata.pkl'
class FrameMeta: class FrameMeta:
frame_id: int = 0 frame_id: int = 0
@ -57,32 +59,42 @@ class ModelState:
self.frames = {'input_imgs': DrivingModelFrame(context), 'big_input_imgs': DrivingModelFrame(context)} self.frames = {'input_imgs': DrivingModelFrame(context), 'big_input_imgs': DrivingModelFrame(context)}
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32) self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
# img buffers are managed in openCL transform code # policy inputs
self.numpy_inputs = { self.numpy_inputs = {
'desire': np.zeros((1, (ModelConstants.FULL_HISTORY_BUFFER_LEN+1), ModelConstants.DESIRE_LEN), dtype=np.float32), 'desire': np.zeros((1, ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.DESIRE_LEN), dtype=np.float32),
'traffic_convention': np.zeros((1, ModelConstants.TRAFFIC_CONVENTION_LEN), dtype=np.float32), 'traffic_convention': np.zeros((1, ModelConstants.TRAFFIC_CONVENTION_LEN), dtype=np.float32),
'lateral_control_params': np.zeros((1, ModelConstants.LATERAL_CONTROL_PARAMS_LEN), dtype=np.float32), 'lateral_control_params': np.zeros((1, ModelConstants.LATERAL_CONTROL_PARAMS_LEN), dtype=np.float32),
'prev_desired_curv': np.zeros((1, (ModelConstants.FULL_HISTORY_BUFFER_LEN+1), ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float32), 'prev_desired_curv': np.zeros((1, ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float32),
'features_buffer': np.zeros((1, ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32), 'features_buffer': np.zeros((1, ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32),
} }
with open(METADATA_PATH, 'rb') as f: with open(VISION_METADATA_PATH, 'rb') as f:
model_metadata = pickle.load(f) vision_metadata = pickle.load(f)
self.input_shapes = model_metadata['input_shapes'] self.vision_input_shapes = vision_metadata['input_shapes']
self.vision_output_slices = vision_metadata['output_slices']
vision_output_size = vision_metadata['output_shapes']['outputs'][1]
with open(POLICY_METADATA_PATH, 'rb') as f:
policy_metadata = pickle.load(f)
self.policy_input_shapes = policy_metadata['input_shapes']
self.policy_output_slices = policy_metadata['output_slices']
policy_output_size = policy_metadata['output_shapes']['outputs'][1]
self.output_slices = model_metadata['output_slices'] # img buffers are managed in openCL transform code
net_output_size = model_metadata['output_shapes']['outputs'][1] self.vision_inputs: dict[str, Tensor] = {}
self.output = np.zeros(net_output_size, dtype=np.float32) self.vision_output = np.zeros(vision_output_size, dtype=np.float32)
self.policy_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
self.policy_output = np.zeros(policy_output_size, dtype=np.float32)
self.parser = Parser() self.parser = Parser()
self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()} with open(VISION_PKL_PATH, "rb") as f:
with open(MODEL_PKL_PATH, "rb") as f: self.vision_run = pickle.load(f)
self.model_run = pickle.load(f)
def slice_outputs(self, model_outputs: np.ndarray) -> dict[str, np.ndarray]: with open(POLICY_PKL_PATH, "rb") as f:
parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in self.output_slices.items()} self.policy_run = pickle.load(f)
if SEND_RAW_PRED:
parsed_model_outputs['raw_pred'] = model_outputs.copy() def slice_outputs(self, model_outputs: np.ndarray, output_slices: dict[str, slice]) -> dict[str, np.ndarray]:
parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in output_slices.items()}
return parsed_model_outputs return parsed_model_outputs
def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_wide: np.ndarray, def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_wide: np.ndarray,
@ -103,29 +115,34 @@ class ModelState:
if TICI: if TICI:
# The imgs tensors are backed by opencl memory, only need init once # The imgs tensors are backed by opencl memory, only need init once
for key in imgs_cl: for key in imgs_cl:
if key not in self.tensor_inputs: if key not in self.vision_inputs:
self.tensor_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=dtypes.uint8) self.vision_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.vision_input_shapes[key], dtype=dtypes.uint8)
else: else:
for key in imgs_cl: for key in imgs_cl:
self.numpy_inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key]) frame_input = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.vision_input_shapes[key])
self.tensor_inputs[key] = Tensor(self.numpy_inputs[key], dtype=dtypes.uint8).realize() self.vision_inputs[key] = Tensor(frame_input, dtype=dtypes.uint8).realize()
if prepare_only: if prepare_only:
return None return None
self.output = self.model_run(**self.tensor_inputs).numpy().flatten() self.vision_output = self.vision_run(**self.vision_inputs).numpy().flatten()
vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(self.vision_output, self.vision_output_slices))
outputs = self.parser.parse_outputs(self.slice_outputs(self.output))
self.numpy_inputs['features_buffer'][0,:-1] = self.numpy_inputs['features_buffer'][0,1:] self.numpy_inputs['features_buffer'][0,:-1] = self.numpy_inputs['features_buffer'][0,1:]
self.numpy_inputs['features_buffer'][0,-1] = outputs['hidden_state'][0, :] self.numpy_inputs['features_buffer'][0,-1] = vision_outputs_dict['hidden_state'][0, :]
self.policy_output = self.policy_run(**self.policy_inputs).numpy().flatten()
policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(self.policy_output, self.policy_output_slices))
# TODO model only uses last value now # TODO model only uses last value now
self.numpy_inputs['prev_desired_curv'][0,:-1] = self.numpy_inputs['prev_desired_curv'][0,1:] self.numpy_inputs['prev_desired_curv'][0,:-1] = self.numpy_inputs['prev_desired_curv'][0,1:]
self.numpy_inputs['prev_desired_curv'][0,-1,:] = outputs['desired_curvature'][0, :] self.numpy_inputs['prev_desired_curv'][0,-1,:] = policy_outputs_dict['desired_curvature'][0, :]
return outputs
combined_outputs_dict = {**vision_outputs_dict, **policy_outputs_dict}
if SEND_RAW_PRED:
combined_outputs_dict['raw_pred'] = np.concatenate([self.vision_output.copy(), self.policy_output.copy()])
return combined_outputs_dict
def main(demo=False): def main(demo=False):

@ -1,8 +1,8 @@
## Neural networks in openpilot ## Neural networks in openpilot
To view the architecture of the ONNX networks, you can use [netron](https://netron.app/) To view the architecture of the ONNX networks, you can use [netron](https://netron.app/)
## Supercombo ## Driving Model (vision model + temporal policy model)
### Supercombo input format (Full size: 799906 x float32) ### Vision inputs (Full size: 799906 x float32)
* **image stream** * **image stream**
* Two consecutive images (256 * 512 * 3 in RGB) recorded at 20 Hz : 393216 = 2 * 6 * 128 * 256 * Two consecutive images (256 * 512 * 3 in RGB) recorded at 20 Hz : 393216 = 2 * 6 * 128 * 256
* Each 256 * 512 image is represented in YUV420 with 6 channels : 6 * 128 * 256 * Each 256 * 512 image is represented in YUV420 with 6 channels : 6 * 128 * 256
@ -15,16 +15,21 @@ To view the architecture of the ONNX networks, you can use [netron](https://netr
* Channels 0,1,2,3 represent the full-res Y channel and are represented in numpy as Y[::2, ::2], Y[::2, 1::2], Y[1::2, ::2], and Y[1::2, 1::2] * Channels 0,1,2,3 represent the full-res Y channel and are represented in numpy as Y[::2, ::2], Y[::2, 1::2], Y[1::2, ::2], and Y[1::2, 1::2]
* Channel 4 represents the half-res U channel * Channel 4 represents the half-res U channel
* Channel 5 represents the half-res V channel * Channel 5 represents the half-res V channel
### Policy inputs
* **desire** * **desire**
* one-hot encoded buffer to command model to execute certain actions, bit needs to be sent for the past 5 seconds (at 20FPS) : 100 * 8 * one-hot encoded buffer to command model to execute certain actions, bit needs to be sent for the past 5 seconds (at 20FPS) : 100 * 8
* **traffic convention** * **traffic convention**
* one-hot encoded vector to tell model whether traffic is right-hand or left-hand traffic : 2 * one-hot encoded vector to tell model whether traffic is right-hand or left-hand traffic : 2
* **lateral control params**
* speed and steering delay for predicting the desired curvature: 2
* **previous desired curvatures**
* vector of previously predicted desired curvatures: 100 * 1
* **feature buffer** * **feature buffer**
* A buffer of intermediate features that gets appended to the current feature to form a 5 seconds temporal context (at 20FPS) : 99 * 512 * a buffer of intermediate features including the current feature to form a 5 seconds temporal context (at 20FPS) : 100 * 512
### Supercombo output format (Full size: XXX x float32) ### Driving Model output format (Full size: XXX x float32)
Read [here](https://github.com/commaai/openpilot/blob/90af436a121164a51da9fa48d093c29f738adf6a/selfdrive/modeld/models/driving.h#L236) for more. Refer to **slice_outputs** and **parse_vision_outputs/parse_policy_outputs** in modeld.
## Driver Monitoring Model ## Driver Monitoring Model

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5cae3285c876804e649b14adadcfb8be79a9bd5a1b928113e37f1f08e25e9688
size 16581121

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:29bbf79f9dfd7048c0013bb81e86d9b2979275b95ea1ed8a86d1a86a88695240
size 34882971

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d21daa542227ecc5972da45df4e26f018ba113c0461f270e367d57e3ad89221a
size 51461700

@ -84,23 +84,32 @@ class Parser:
outs[name] = pred_mu_final.reshape(final_shape) outs[name] = pred_mu_final.reshape(final_shape)
outs[name + '_stds'] = pred_std_final.reshape(final_shape) outs[name + '_stds'] = pred_std_final.reshape(final_shape)
def parse_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: def parse_vision_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
self.parse_mdn('pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,))
self.parse_mdn('wide_from_device_euler', outs, in_N=0, out_N=0, out_shape=(ModelConstants.WIDE_FROM_DEVICE_WIDTH,))
self.parse_mdn('road_transform', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,))
self.parse_categorical_crossentropy('desire_pred', outs, out_shape=(ModelConstants.DESIRE_PRED_LEN,ModelConstants.DESIRE_PRED_WIDTH))
self.parse_binary_crossentropy('meta', outs)
return outs
def parse_policy_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
self.parse_mdn('plan', outs, in_N=ModelConstants.PLAN_MHP_N, out_N=ModelConstants.PLAN_MHP_SELECTION, self.parse_mdn('plan', outs, in_N=ModelConstants.PLAN_MHP_N, out_N=ModelConstants.PLAN_MHP_SELECTION,
out_shape=(ModelConstants.IDX_N,ModelConstants.PLAN_WIDTH)) out_shape=(ModelConstants.IDX_N,ModelConstants.PLAN_WIDTH))
self.parse_mdn('lane_lines', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_LANE_LINES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH)) self.parse_mdn('lane_lines', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_LANE_LINES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH))
self.parse_mdn('road_edges', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_ROAD_EDGES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH)) self.parse_mdn('road_edges', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_ROAD_EDGES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH))
self.parse_mdn('pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,))
self.parse_mdn('road_transform', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,))
self.parse_mdn('sim_pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,)) self.parse_mdn('sim_pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,))
self.parse_mdn('wide_from_device_euler', outs, in_N=0, out_N=0, out_shape=(ModelConstants.WIDE_FROM_DEVICE_WIDTH,))
self.parse_mdn('lead', outs, in_N=ModelConstants.LEAD_MHP_N, out_N=ModelConstants.LEAD_MHP_SELECTION, self.parse_mdn('lead', outs, in_N=ModelConstants.LEAD_MHP_N, out_N=ModelConstants.LEAD_MHP_SELECTION,
out_shape=(ModelConstants.LEAD_TRAJ_LEN,ModelConstants.LEAD_WIDTH)) out_shape=(ModelConstants.LEAD_TRAJ_LEN,ModelConstants.LEAD_WIDTH))
if 'lat_planner_solution' in outs: if 'lat_planner_solution' in outs:
self.parse_mdn('lat_planner_solution', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N,ModelConstants.LAT_PLANNER_SOLUTION_WIDTH)) self.parse_mdn('lat_planner_solution', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N,ModelConstants.LAT_PLANNER_SOLUTION_WIDTH))
if 'desired_curvature' in outs: if 'desired_curvature' in outs:
self.parse_mdn('desired_curvature', outs, in_N=0, out_N=0, out_shape=(ModelConstants.DESIRED_CURV_WIDTH,)) self.parse_mdn('desired_curvature', outs, in_N=0, out_N=0, out_shape=(ModelConstants.DESIRED_CURV_WIDTH,))
for k in ['lead_prob', 'lane_lines_prob', 'meta']: for k in ['lead_prob', 'lane_lines_prob']:
self.parse_binary_crossentropy(k, outs) self.parse_binary_crossentropy(k, outs)
self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,)) self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,))
self.parse_categorical_crossentropy('desire_pred', outs, out_shape=(ModelConstants.DESIRE_PRED_LEN,ModelConstants.DESIRE_PRED_WIDTH)) return outs
def parse_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
outs = self.parse_vision_outputs(outs)
outs = self.parse_policy_outputs(outs)
return outs return outs

@ -1 +1 @@
Subproject commit 820d1eb159f327961b90044f8c7d21c6160d7b08 Subproject commit 6f39c4d653737c056540194605dc18a7273df280
Loading…
Cancel
Save