From cb0b7375b728d1b6e92db68c9ba55f0f54c09a3f Mon Sep 17 00:00:00 2001 From: HaraldSchafer Date: Wed, 5 Oct 2022 21:43:38 -0700 Subject: [PATCH] Rocket Launcher Model (#25963) * 1456d261-d232-4654-8885-4d9fde883894/440 6b7d7cec-ead8-40f3-86cc-86d52c9b03fe/300 * compute only 9 tokens: 1456d261-d232-4654-8885-4d9fde883894/440 6b7d7cec-ead8-40f3-86cc-86d52c9b03fe/300 * tinygrad: cleanup gather * 1456d261-d232-4654-8885-4d9fde883894/440 6b7d7cec-ead8-40f3-86cc-86d52c9b03fe/700 * empty commit for tests * bump tinygrad * dont use tinygrad matmul for now * bump tinygrad * 1456d261-d232-4654-8885-4d9fde883894/440 e63ab895-2222-4abd-a9a5-af86bb70e260/700 * float16 1456d261-d232-4654-8885-4d9fde883894/440 e63ab895-2222-4abd-a9a5-af86bb70e260/700 * increase steer rate cost * Revert "increase steer rate cost" This reverts commit 74ce9ab9be7ef17ecfec931f96851b12f37f2336. * fork tinygrad * empty commit for tests * basics * Kinda works * new lat * new tuning * Move LATMPCN so scons compiles * Update long weights * Add tinygrad optim * Update model ref * update weights * Update ref * Try * Error message for field ignore * update model regf * ref commit * Fix onnx test Co-authored-by: Yassine Yousfi --- .gitmodules | 2 +- release/files_common | 1 + selfdrive/controls/lib/drive_helpers.py | 6 -- .../controls/lib/lateral_mpc_lib/lat_mpc.py | 63 ++++++++++--------- selfdrive/controls/lib/lateral_planner.py | 18 +++--- .../lib/longitudinal_mpc_lib/long_mpc.py | 2 +- .../controls/lib/longitudinal_planner.py | 6 +- selfdrive/controls/tests/test_lateral_mpc.py | 5 +- selfdrive/modeld/SConscript | 4 +- selfdrive/modeld/models/driving.cc | 16 +++-- selfdrive/modeld/models/driving.h | 15 ++++- selfdrive/modeld/models/supercombo.onnx | 4 +- selfdrive/modeld/runners/onnx_runner.py | 13 ++-- selfdrive/modeld/thneed/thneed_common.cc | 2 +- selfdrive/test/process_replay/compare_logs.py | 2 +- selfdrive/test/process_replay/model_replay.py | 4 +- .../process_replay/model_replay_ref_commit | 2 +- selfdrive/test/process_replay/ref_commit | 2 +- tinygrad_repo | 2 +- 19 files changed, 93 insertions(+), 76 deletions(-) diff --git a/.gitmodules b/.gitmodules index 26f93ef164..544c95c943 100644 --- a/.gitmodules +++ b/.gitmodules @@ -18,4 +18,4 @@ url = ../../commaai/body.git [submodule "tinygrad"] path = tinygrad_repo - url = https://github.com/geohot/tinygrad.git + url = ../../commaai/tinygrad.git diff --git a/release/files_common b/release/files_common index 0c341eb3f6..07ffaf8501 100644 --- a/release/files_common +++ b/release/files_common @@ -574,3 +574,4 @@ tinygrad_repo/tinygrad/ops.py tinygrad_repo/tinygrad/shapetracker.py tinygrad_repo/tinygrad/tensor.py tinygrad_repo/tinygrad/nn/__init__.py +tinygrad_repo/tinygrad/nn/optim.py diff --git a/selfdrive/controls/lib/drive_helpers.py b/selfdrive/controls/lib/drive_helpers.py index e8f9585a6f..f81cd0c40c 100644 --- a/selfdrive/controls/lib/drive_helpers.py +++ b/selfdrive/controls/lib/drive_helpers.py @@ -33,12 +33,6 @@ CRUISE_INTERVAL_SIGN = { } -class MPC_COST_LAT: - PATH = 1.0 - HEADING = 1.0 - STEER_RATE = 1.0 - - def apply_deadzone(error, deadzone): if error > deadzone: error -= deadzone diff --git a/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py b/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py index c0e7358160..07efad73c9 100755 --- a/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py +++ b/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py @@ -5,7 +5,6 @@ import numpy as np from casadi import SX, vertcat, sin, cos from common.realtime import sec_since_boot -from selfdrive.controls.lib.drive_helpers import LAT_MPC_N as N from selfdrive.modeld.constants import T_IDXS if __name__ == '__main__': # generating code @@ -18,6 +17,9 @@ EXPORT_DIR = os.path.join(LAT_MPC_DIR, "c_generated_code") JSON_FILE = os.path.join(LAT_MPC_DIR, "acados_ocp_lat.json") X_DIM = 4 P_DIM = 2 +N = 16 +COST_E_DIM = 3 +COST_DIM = COST_E_DIM + 1 MODEL_NAME = 'lat' ACADOS_SOLVER_TYPE = 'SQP_RTI' @@ -29,8 +31,8 @@ def gen_lat_model(): x_ego = SX.sym('x_ego') y_ego = SX.sym('y_ego') psi_ego = SX.sym('psi_ego') - curv_ego = SX.sym('curv_ego') - model.x = vertcat(x_ego, y_ego, psi_ego, curv_ego) + psi_rate_ego = SX.sym('psi_rate_ego') + model.x = vertcat(x_ego, y_ego, psi_ego, psi_rate_ego) # parameters v_ego = SX.sym('v_ego') @@ -38,22 +40,22 @@ def gen_lat_model(): model.p = vertcat(v_ego, rotation_radius) # controls - curv_rate = SX.sym('curv_rate') - model.u = vertcat(curv_rate) + psi_accel_ego = SX.sym('psi_accel_ego') + model.u = vertcat(psi_accel_ego) # xdot x_ego_dot = SX.sym('x_ego_dot') y_ego_dot = SX.sym('y_ego_dot') psi_ego_dot = SX.sym('psi_ego_dot') - curv_ego_dot = SX.sym('curv_ego_dot') + psi_rate_ego_dot = SX.sym('psi_rate_ego_dot') - model.xdot = vertcat(x_ego_dot, y_ego_dot, psi_ego_dot, curv_ego_dot) + model.xdot = vertcat(x_ego_dot, y_ego_dot, psi_ego_dot, psi_rate_ego_dot) # dynamics model - f_expl = vertcat(v_ego * cos(psi_ego) - rotation_radius * sin(psi_ego) * (v_ego * curv_ego), - v_ego * sin(psi_ego) + rotation_radius * cos(psi_ego) * (v_ego * curv_ego), - v_ego * curv_ego, - curv_rate) + f_expl = vertcat(v_ego * cos(psi_ego) - rotation_radius * sin(psi_ego) * psi_rate_ego, + v_ego * sin(psi_ego) + rotation_radius * cos(psi_ego) * psi_rate_ego, + psi_rate_ego, + psi_accel_ego) model.f_impl_expr = model.xdot - f_expl model.f_expl_expr = f_expl return model @@ -72,26 +74,28 @@ def gen_lat_ocp(): ocp.cost.cost_type = 'NONLINEAR_LS' ocp.cost.cost_type_e = 'NONLINEAR_LS' - Q = np.diag([0.0, 0.0]) - QR = np.diag([0.0, 0.0, 0.0]) + Q = np.diag(np.zeros(COST_E_DIM)) + QR = np.diag(np.zeros(COST_DIM)) ocp.cost.W = QR ocp.cost.W_e = Q - y_ego, psi_ego = ocp.model.x[1], ocp.model.x[2] - curv_rate = ocp.model.u[0] + y_ego, psi_ego, psi_rate_ego = ocp.model.x[1], ocp.model.x[2], ocp.model.x[3] + psi_rate_ego_dot = ocp.model.u[0] v_ego = ocp.model.p[0] ocp.parameter_values = np.zeros((P_DIM, )) - ocp.cost.yref = np.zeros((3, )) - ocp.cost.yref_e = np.zeros((2, )) + ocp.cost.yref = np.zeros((COST_DIM, )) + ocp.cost.yref_e = np.zeros((COST_E_DIM, )) # TODO hacky weights to keep behavior the same ocp.model.cost_y_expr = vertcat(y_ego, - ((v_ego +5.0) * psi_ego), - ((v_ego + 5.0) * 4.0 * curv_rate)) + ((v_ego + 5.0) * psi_ego), + ((v_ego + 5.0) * psi_rate_ego), + ((v_ego + 5.0) * psi_rate_ego_dot)) ocp.model.cost_y_expr_e = vertcat(y_ego, - ((v_ego +5.0) * psi_ego)) + ((v_ego + 5.0) * psi_ego), + ((v_ego + 5.0) * psi_rate_ego)) # set constraints ocp.constraints.constr_type = 'BGH' @@ -124,10 +128,10 @@ class LateralMpc(): def reset(self, x0=np.zeros(X_DIM)): self.x_sol = np.zeros((N+1, X_DIM)) self.u_sol = np.zeros((N, 1)) - self.yref = np.zeros((N+1, 3)) + self.yref = np.zeros((N+1, COST_DIM)) for i in range(N): self.solver.cost_set(i, "yref", self.yref[i]) - self.solver.cost_set(N, "yref", self.yref[N][:2]) + self.solver.cost_set(N, "yref", self.yref[N][:COST_E_DIM]) # Somehow needed for stable init for i in range(N+1): @@ -140,14 +144,13 @@ class LateralMpc(): self.solve_time = 0.0 self.cost = 0 - def set_weights(self, path_weight, heading_weight, steer_rate_weight): - W = np.asfortranarray(np.diag([path_weight, heading_weight, steer_rate_weight])) + def set_weights(self, path_weight, heading_weight, yaw_rate_weight, yaw_accel_cost): + W = np.asfortranarray(np.diag([path_weight, heading_weight, yaw_rate_weight, yaw_accel_cost])) for i in range(N): self.solver.cost_set(i, 'W', W) - #TODO hacky weights to keep behavior the same - self.solver.cost_set(N, 'W', (3/20.)*W[:2,:2]) + self.solver.cost_set(N, 'W', W[:COST_E_DIM,:COST_E_DIM]) - def run(self, x0, p, y_pts, heading_pts, curv_rate_pts): + def run(self, x0, p, y_pts, heading_pts, yaw_rate_pts): x0_cp = np.copy(x0) p_cp = np.copy(p) self.solver.constraints_set(0, "lbx", x0_cp) @@ -155,13 +158,13 @@ class LateralMpc(): self.yref[:,0] = y_pts v_ego = p_cp[0] # rotation_radius = p_cp[1] - self.yref[:,1] = heading_pts*(v_ego+5.0) - self.yref[:,2] = curv_rate_pts * (v_ego+5.0) * 4.0 + self.yref[:,1] = heading_pts * (v_ego+5.0) + self.yref[:,2] = yaw_rate_pts * (v_ego+5.0) for i in range(N): self.solver.cost_set(i, "yref", self.yref[i]) self.solver.set(i, "p", p_cp) self.solver.set(N, "p", p_cp) - self.solver.cost_set(N, "yref", self.yref[N][:2]) + self.solver.cost_set(N, "yref", self.yref[N][:COST_E_DIM]) t = sec_since_boot() self.solution_status = self.solver.solve() diff --git a/selfdrive/controls/lib/lateral_planner.py b/selfdrive/controls/lib/lateral_planner.py index 2ad5f784d7..48c98e7350 100644 --- a/selfdrive/controls/lib/lateral_planner.py +++ b/selfdrive/controls/lib/lateral_planner.py @@ -3,7 +3,8 @@ from common.realtime import sec_since_boot, DT_MDL from common.numpy_fast import interp from system.swaglog import cloudlog from selfdrive.controls.lib.lateral_mpc_lib.lat_mpc import LateralMpc -from selfdrive.controls.lib.drive_helpers import CONTROL_N, MPC_COST_LAT, LAT_MPC_N +from selfdrive.controls.lib.lateral_mpc_lib.lat_mpc import N as LAT_MPC_N +from selfdrive.controls.lib.drive_helpers import CONTROL_N from selfdrive.controls.lib.desire_helper import DesireHelper import cereal.messaging as messaging from cereal import log @@ -23,7 +24,7 @@ class LateralPlanner: self.path_xyz = np.zeros((TRAJECTORY_SIZE, 3)) self.plan_yaw = np.zeros((TRAJECTORY_SIZE,)) - self.plan_curv_rate = np.zeros((TRAJECTORY_SIZE,)) + self.plan_yaw_rate = np.zeros((TRAJECTORY_SIZE,)) self.t_idxs = np.arange(TRAJECTORY_SIZE) self.y_pts = np.zeros(TRAJECTORY_SIZE) @@ -44,6 +45,7 @@ class LateralPlanner: self.path_xyz = np.column_stack([md.position.x, md.position.y, md.position.z]) self.t_idxs = np.array(md.position.t) self.plan_yaw = np.array(md.orientation.z) + self.plan_yaw_rate = np.array(md.orientationRate.z) # Lane change logic desire_state = md.meta.desireState @@ -55,24 +57,24 @@ class LateralPlanner: d_path_xyz = self.path_xyz # Heading cost is useful at low speed, otherwise end of plan can be off-heading - heading_cost = interp(v_ego, [5.0, 10.0], [MPC_COST_LAT.HEADING, 0.15]) - self.lat_mpc.set_weights(MPC_COST_LAT.PATH, heading_cost, MPC_COST_LAT.STEER_RATE) + heading_cost = interp(v_ego, [5.0, 10.0], [1.0, 0.15]) + self.lat_mpc.set_weights(1.0, heading_cost, 0.0, .075) y_pts = np.interp(v_ego * self.t_idxs[:LAT_MPC_N + 1], np.linalg.norm(d_path_xyz, axis=1), d_path_xyz[:, 1]) heading_pts = np.interp(v_ego * self.t_idxs[:LAT_MPC_N + 1], np.linalg.norm(self.path_xyz, axis=1), self.plan_yaw) - curv_rate_pts = np.interp(v_ego * self.t_idxs[:LAT_MPC_N + 1], np.linalg.norm(self.path_xyz, axis=1), self.plan_curv_rate) + yaw_rate_pts = np.interp(v_ego * self.t_idxs[:LAT_MPC_N + 1], np.linalg.norm(self.path_xyz, axis=1), self.plan_yaw_rate) self.y_pts = y_pts assert len(y_pts) == LAT_MPC_N + 1 assert len(heading_pts) == LAT_MPC_N + 1 - assert len(curv_rate_pts) == LAT_MPC_N + 1 + assert len(yaw_rate_pts) == LAT_MPC_N + 1 lateral_factor = max(0, self.factor1 - (self.factor2 * v_ego**2)) p = np.array([v_ego, lateral_factor]) self.lat_mpc.run(self.x0, p, y_pts, heading_pts, - curv_rate_pts) + yaw_rate_pts) # init state for next # mpc.u_sol is the desired curvature rate given x0 curv state. # with x0[3] = measured_curvature, this would be the actual desired rate. @@ -103,7 +105,7 @@ class LateralPlanner: lateralPlan.modelMonoTime = sm.logMonoTime['modelV2'] lateralPlan.dPathPoints = self.y_pts.tolist() lateralPlan.psis = self.lat_mpc.x_sol[0:CONTROL_N, 2].tolist() - lateralPlan.curvatures = self.lat_mpc.x_sol[0:CONTROL_N, 3].tolist() + lateralPlan.curvatures = (self.lat_mpc.x_sol[0:CONTROL_N, 3]/sm['carState'].vEgo).tolist() lateralPlan.curvatureRates = [float(x) for x in self.lat_mpc.u_sol[0:CONTROL_N - 1]] + [0.0] lateralPlan.mpcSolutionValid = bool(plan_solution_valid) diff --git a/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py b/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py index 695222ed4d..2aab4b71af 100644 --- a/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py +++ b/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py @@ -253,7 +253,7 @@ class LongitudinalMpc: cost_weights = [X_EGO_OBSTACLE_COST, X_EGO_COST, V_EGO_COST, A_EGO_COST, a_change_cost, J_EGO_COST] constraint_cost_weights = [LIMIT_COST, LIMIT_COST, LIMIT_COST, DANGER_ZONE_COST] elif self.mode == 'blended': - cost_weights = [0., 0.2, 0.25, 1.0, 0.0, 1.0] + cost_weights = [0., 0.1, 0.2, 5.0, 0.0, 1.0] constraint_cost_weights = [LIMIT_COST, LIMIT_COST, LIMIT_COST, 50.0] else: raise NotImplementedError(f'Planner mode {self.mode} not recognized in planner cost set') diff --git a/selfdrive/controls/lib/longitudinal_planner.py b/selfdrive/controls/lib/longitudinal_planner.py index a0363536ca..018136e6f2 100755 --- a/selfdrive/controls/lib/longitudinal_planner.py +++ b/selfdrive/controls/lib/longitudinal_planner.py @@ -58,7 +58,6 @@ class LongitudinalPlanner: self.a_desired = init_a self.v_desired_filter = FirstOrderFilter(init_v, 2.0, DT_MDL) - self.t_uniform = np.arange(0.0, T_IDXS_MPC[-1] + 0.5, 0.5) self.v_desired_trajectory = np.zeros(CONTROL_N) self.a_desired_trajectory = np.zeros(CONTROL_N) @@ -76,10 +75,7 @@ class LongitudinalPlanner: x = np.interp(T_IDXS_MPC, T_IDXS, model_msg.position.x) v = np.interp(T_IDXS_MPC, T_IDXS, model_msg.velocity.x) a = np.interp(T_IDXS_MPC, T_IDXS, model_msg.acceleration.x) - # Uniform interp so gradient is less noisy - a_sparse = np.interp(self.t_uniform, T_IDXS, model_msg.acceleration.x) - j_sparse = np.gradient(a_sparse, self.t_uniform) - j = np.interp(T_IDXS_MPC, self.t_uniform, j_sparse) + j = np.zeros(len(T_IDXS_MPC)) else: x = np.zeros(len(T_IDXS_MPC)) v = np.zeros(len(T_IDXS_MPC)) diff --git a/selfdrive/controls/tests/test_lateral_mpc.py b/selfdrive/controls/tests/test_lateral_mpc.py index 4864dbdc06..9b986c053d 100644 --- a/selfdrive/controls/tests/test_lateral_mpc.py +++ b/selfdrive/controls/tests/test_lateral_mpc.py @@ -1,7 +1,8 @@ import unittest import numpy as np from selfdrive.controls.lib.lateral_mpc_lib.lat_mpc import LateralMpc -from selfdrive.controls.lib.drive_helpers import LAT_MPC_N, CAR_ROTATION_RADIUS +from selfdrive.controls.lib.drive_helpers import CAR_ROTATION_RADIUS +from selfdrive.controls.lib.lateral_mpc_lib.lat_mpc import N as LAT_MPC_N def run_mpc(lat_mpc=None, v_ref=30., x_init=0., y_init=0., psi_init=0., curvature_init=0., @@ -9,7 +10,7 @@ def run_mpc(lat_mpc=None, v_ref=30., x_init=0., y_init=0., psi_init=0., curvatur if lat_mpc is None: lat_mpc = LateralMpc() - lat_mpc.set_weights(1., 1., 1.) + lat_mpc.set_weights(1., 1., 0.0, 1.) y_pts = poly_shift * np.ones(LAT_MPC_N + 1) heading_pts = np.zeros(LAT_MPC_N + 1) diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index 246f8c2941..5c02e2b15f 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -71,9 +71,9 @@ if use_thneed and arch == "larch64" or GetOption('pc_thneed'): fn = File("models/supercombo").abspath if GetOption('pc_thneed'): - cmd = f"cd {Dir('#').abspath}/tinygrad_repo && NATIVE_EXPLOG=1 OPTWG=1 UNSAFE_FLOAT4=1 DEBUGCL=1 python3 openpilot/compile.py {fn}.onnx {fn}.thneed" + cmd = f"cd {Dir('#').abspath}/tinygrad_repo && GPU=1 NATIVE_EXPLOG=1 OPTWG=1 UNSAFE_FLOAT4=1 DEBUGCL=1 python3 openpilot/compile.py {fn}.onnx {fn}.thneed" else: - cmd = f"cd {Dir('#').abspath}/tinygrad_repo && FLOAT16=1 PYOPENCL_NO_CACHE=1 MATMUL=1 NATIVE_EXPLOG=1 OPTWG=1 UNSAFE_FLOAT4=1 DEBUGCL=1 python3 openpilot/compile.py {fn}.onnx {fn}.thneed" + cmd = f"cd {Dir('#').abspath}/tinygrad_repo && FLOAT16=1 MATMUL=1 PYOPENCL_NO_CACHE=1 NATIVE_EXPLOG=1 OPTWG=1 UNSAFE_FLOAT4=1 DEBUGCL=1 python3 openpilot/compile.py {fn}.onnx {fn}.thneed" # is there a better way then listing all of tinygrad? lenv.Command(fn + ".thneed", [fn + ".onnx", diff --git a/selfdrive/modeld/models/driving.cc b/selfdrive/modeld/models/driving.cc index 8d02eb6b2f..3316e6d114 100644 --- a/selfdrive/modeld/models/driving.cc +++ b/selfdrive/modeld/models/driving.cc @@ -41,11 +41,11 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) { &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, true, false, context); #ifdef TEMPORAL - s->m->addRecurrent(&s->output[OUTPUT_SIZE], TEMPORAL_SIZE); + s->m->addRecurrent(&s->feature_buffer[0], TEMPORAL_SIZE); #endif #ifdef DESIRE - s->m->addDesire(s->pulse_desire, DESIRE_LEN); + s->m->addDesire(s->pulse_desire, DESIRE_LEN*(HISTORY_BUFFER_LEN+1)); #endif #ifdef TRAFFIC_CONVENTION @@ -56,18 +56,20 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) { ModelOutput* model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* wbuf, const mat3 &transform, const mat3 &transform_wide, float *desire_in, bool is_rhd, bool prepare_only) { #ifdef DESIRE + std::memmove(&s->pulse_desire[0], &s->pulse_desire[DESIRE_LEN], sizeof(float) * DESIRE_LEN*HISTORY_BUFFER_LEN); if (desire_in != NULL) { for (int i = 1; i < DESIRE_LEN; i++) { // Model decides when action is completed // so desire input is just a pulse triggered on rising edge if (desire_in[i] - s->prev_desire[i] > .99) { - s->pulse_desire[i] = desire_in[i]; + s->pulse_desire[DESIRE_LEN*(HISTORY_BUFFER_LEN-1)+i] = desire_in[i]; } else { - s->pulse_desire[i] = 0.0; + s->pulse_desire[DESIRE_LEN*(HISTORY_BUFFER_LEN-1)+i] = 0.0; } s->prev_desire[i] = desire_in[i]; } } +LOGT("Desire enqueued"); #endif int rhd_idx = is_rhd; @@ -92,6 +94,12 @@ ModelOutput* model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* wbuf, s->m->execute(); LOGT("Execution finished"); + #ifdef TEMPORAL + std::memmove(&s->feature_buffer[0], &s->feature_buffer[FEATURE_LEN], sizeof(float) * FEATURE_LEN*(HISTORY_BUFFER_LEN-1)); + std::memcpy(&s->feature_buffer[FEATURE_LEN*(HISTORY_BUFFER_LEN-1)], &s->output[OUTPUT_SIZE], sizeof(float) * FEATURE_LEN); + LOGT("Features enqueued"); + #endif + return (ModelOutput*)&s->output; } diff --git a/selfdrive/modeld/models/driving.h b/selfdrive/modeld/models/driving.h index e2ee812e44..90767384eb 100644 --- a/selfdrive/modeld/models/driving.h +++ b/selfdrive/modeld/models/driving.h @@ -16,6 +16,8 @@ #include "selfdrive/modeld/models/commonmodel.h" #include "selfdrive/modeld/runners/run.h" +constexpr int FEATURE_LEN = 2048; +constexpr int HISTORY_BUFFER_LEN = 99; constexpr int DESIRE_LEN = 8; constexpr int DESIRE_PRED_LEN = 4; constexpr int TRAFFIC_CONVENTION_LEN = 2; @@ -233,6 +235,11 @@ struct ModelOutputMeta { }; static_assert(sizeof(ModelOutputMeta) == sizeof(ModelOutputDesireProb) + sizeof(float) + (sizeof(ModelOutputDisengageProb)*DISENGAGE_LEN) + (sizeof(ModelOutputBlinkerProb)*BLINKER_LEN) + (sizeof(ModelOutputDesireProb)*DESIRE_PRED_LEN)); +struct ModelOutputFeatures { + std::array feature; +}; +static_assert(sizeof(ModelOutputFeatures) == (sizeof(float)*FEATURE_LEN)); + struct ModelOutput { const ModelOutputPlans plans; const ModelOutputLaneLines lane_lines; @@ -244,22 +251,24 @@ struct ModelOutput { }; constexpr int OUTPUT_SIZE = sizeof(ModelOutput) / sizeof(float); + #ifdef TEMPORAL - constexpr int TEMPORAL_SIZE = 512; + constexpr int TEMPORAL_SIZE = HISTORY_BUFFER_LEN * FEATURE_LEN; #else constexpr int TEMPORAL_SIZE = 0; #endif -constexpr int NET_OUTPUT_SIZE = OUTPUT_SIZE + TEMPORAL_SIZE; +constexpr int NET_OUTPUT_SIZE = OUTPUT_SIZE + FEATURE_LEN; // TODO: convert remaining arrays to std::array and update model runners struct ModelState { ModelFrame *frame = nullptr; ModelFrame *wide_frame = nullptr; + std::array feature_buffer = {}; std::array output = {}; std::unique_ptr m; #ifdef DESIRE float prev_desire[DESIRE_LEN] = {}; - float pulse_desire[DESIRE_LEN] = {}; + float pulse_desire[DESIRE_LEN*(HISTORY_BUFFER_LEN+1)] = {}; #endif #ifdef TRAFFIC_CONVENTION float traffic_convention[TRAFFIC_CONVENTION_LEN] = {}; diff --git a/selfdrive/modeld/models/supercombo.onnx b/selfdrive/modeld/models/supercombo.onnx index 7b11edbe08..37edc36c02 100644 --- a/selfdrive/modeld/models/supercombo.onnx +++ b/selfdrive/modeld/models/supercombo.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50c7fc8565ac69a4b9a0de122e961326820e78bf13659255a89d0ed04be030d5 -size 95167481 +oid sha256:30f30bc1251c03db135564ecbf7dc0bc96cbb07be0ebd3691edd8d555dc087fa +size 58539693 diff --git a/selfdrive/modeld/runners/onnx_runner.py b/selfdrive/modeld/runners/onnx_runner.py index ac7cc68814..d4a11a7c0b 100755 --- a/selfdrive/modeld/runners/onnx_runner.py +++ b/selfdrive/modeld/runners/onnx_runner.py @@ -9,6 +9,8 @@ os.environ["OMP_WAIT_POLICY"] = "PASSIVE" import onnxruntime as ort # pylint: disable=import-error +ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8} + def read(sz, tf8=False): dd = [] gt = 0 @@ -18,7 +20,7 @@ def read(sz, tf8=False): assert(len(st) > 0) dd.append(st) gt += len(st) - r = np.frombuffer(b''.join(dd), dtype=np.uint8 if tf8 else np.float32).astype(np.float32) + r = np.frombuffer(b''.join(dd), dtype=np.uint8 if tf8 else np.float32) if tf8: r = r / 255. return r @@ -29,22 +31,23 @@ def write(d): def run_loop(m, tf8_input=False): ishapes = [[1]+ii.shape[1:] for ii in m.get_inputs()] keys = [x.name for x in m.get_inputs()] + itypes = [ORT_TYPES_TO_NP_TYPES[x.type] for x in m.get_inputs()] # run once to initialize CUDA provider if "CUDAExecutionProvider" in m.get_providers(): - m.run(None, dict(zip(keys, [np.zeros(shp, dtype=np.float32) for shp in ishapes]))) + m.run(None, dict(zip(keys, [np.zeros(shp, dtype=itp) for shp, itp in zip(ishapes, itypes)]))) print("ready to run onnx model", keys, ishapes, file=sys.stderr) while 1: inputs = [] - for k, shp in zip(keys, ishapes): + for k, shp, itp in zip(keys, ishapes, itypes): ts = np.product(shp) #print("reshaping %s with offset %d" % (str(shp), offset), file=sys.stderr) - inputs.append(read(ts, (k=='input_img' and tf8_input)).reshape(shp)) + inputs.append(read(ts, (k=='input_img' and tf8_input)).reshape(shp).astype(itp)) ret = m.run(None, dict(zip(keys, inputs))) #print(ret, file=sys.stderr) for r in ret: - write(r) + write(r.astype(np.float32)) if __name__ == "__main__": diff --git a/selfdrive/modeld/thneed/thneed_common.cc b/selfdrive/modeld/thneed/thneed_common.cc index 21170b13a6..ecdf1237e3 100644 --- a/selfdrive/modeld/thneed/thneed_common.cc +++ b/selfdrive/modeld/thneed/thneed_common.cc @@ -12,7 +12,7 @@ map, int> g_args_size; map g_program_source; void Thneed::stop() { - printf("Thneed::stop: recorded %lu commands\n", cmds.size()); + //printf("Thneed::stop: recorded %lu commands\n", cmds.size()); record = false; } diff --git a/selfdrive/test/process_replay/compare_logs.py b/selfdrive/test/process_replay/compare_logs.py index bf6daf5fed..c14956b1b2 100755 --- a/selfdrive/test/process_replay/compare_logs.py +++ b/selfdrive/test/process_replay/compare_logs.py @@ -41,7 +41,7 @@ def remove_ignored_fields(msg, ignore): elif isinstance(v, numbers.Number): val = 0 else: - raise NotImplementedError + raise NotImplementedError('Error ignoring field') setattr(attr, keys[-1], val) return msg.as_reader() diff --git a/selfdrive/test/process_replay/model_replay.py b/selfdrive/test/process_replay/model_replay.py index ccd89bea9a..ecfacbf5d2 100755 --- a/selfdrive/test/process_replay/model_replay.py +++ b/selfdrive/test/process_replay/model_replay.py @@ -22,7 +22,7 @@ from tools.lib.logreader import LogReader TEST_ROUTE = "4cf7a6ad03080c90|2021-09-29--13-46-36" SEGMENT = 0 -MAX_FRAMES = 10 if PC else 1300 +MAX_FRAMES = 100 if PC else 1300 SEND_EXTRA_INPUTS = bool(os.getenv("SEND_EXTRA_INPUTS", "0")) @@ -174,7 +174,7 @@ if __name__ == "__main__": 'driverStateV2.dspExecutionTime' ] # TODO this tolerance is absurdly large - tolerance = 5e-1 if PC else None + tolerance = 2.0 if PC else None results: Any = {TEST_ROUTE: {}} log_paths: Any = {TEST_ROUTE: {"models": {'ref': BASE_URL + log_fn, 'new': log_fn}}} results[TEST_ROUTE]["models"] = compare_logs(cmp_log, log_msgs, tolerance=tolerance, ignore_fields=ignore) diff --git a/selfdrive/test/process_replay/model_replay_ref_commit b/selfdrive/test/process_replay/model_replay_ref_commit index 958d3da14d..9727ef3d17 100644 --- a/selfdrive/test/process_replay/model_replay_ref_commit +++ b/selfdrive/test/process_replay/model_replay_ref_commit @@ -1 +1 @@ -c40319a454840d8a2196ec1227d27b536ee14375 +008c0a622b7471c6234690d668f76bcb5dc8d999 diff --git a/selfdrive/test/process_replay/ref_commit b/selfdrive/test/process_replay/ref_commit index d4d16b726b..0e3cdf797f 100644 --- a/selfdrive/test/process_replay/ref_commit +++ b/selfdrive/test/process_replay/ref_commit @@ -1 +1 @@ -1989d8c2a5de94faa3756b7d10fc94e6c063afa5 \ No newline at end of file +e0ffcae8def2fd9c82c547d1f257d4f06a48a3c3 diff --git a/tinygrad_repo b/tinygrad_repo index 2e9b7637b3..82ca9c6666 160000 --- a/tinygrad_repo +++ b/tinygrad_repo @@ -1 +1 @@ -Subproject commit 2e9b7637b3c3c8895fda9f964215db3a35fe3441 +Subproject commit 82ca9c66664b5f3739e1881f62cb94b72cf0b1fa