Rocket Launcher Model (#25963)

* 1456d261-d232-4654-8885-4d9fde883894/440 6b7d7cec-ead8-40f3-86cc-86d52c9b03fe/300

* compute only 9 tokens: 1456d261-d232-4654-8885-4d9fde883894/440 6b7d7cec-ead8-40f3-86cc-86d52c9b03fe/300

* tinygrad: cleanup gather

* 1456d261-d232-4654-8885-4d9fde883894/440 6b7d7cec-ead8-40f3-86cc-86d52c9b03fe/700

* empty commit for tests

* bump tinygrad

* dont use tinygrad matmul for now

* bump tinygrad

* 1456d261-d232-4654-8885-4d9fde883894/440 e63ab895-2222-4abd-a9a5-af86bb70e260/700

* float16 1456d261-d232-4654-8885-4d9fde883894/440 e63ab895-2222-4abd-a9a5-af86bb70e260/700

* increase steer rate cost

* Revert "increase steer rate cost"

This reverts commit 74ce9ab9be7ef17ecfec931f96851b12f37f2336.

* fork tinygrad

* empty commit for tests

* basics

* Kinda works

* new lat

* new tuning

* Move LATMPCN so scons compiles

* Update long weights

* Add tinygrad optim

* Update model ref

* update weights

* Update ref

* Try

* Error message for field ignore

* update model regf

* ref commit

* Fix onnx test

Co-authored-by: Yassine Yousfi <yyousfi1@binghamton.edu>
old-commit-hash: cb0b7375b7
taco
HaraldSchafer 3 years ago committed by GitHub
parent f93f4e9f9b
commit a8ec6f4cfd
  1. 2
      .gitmodules
  2. 1
      release/files_common
  3. 6
      selfdrive/controls/lib/drive_helpers.py
  4. 63
      selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py
  5. 18
      selfdrive/controls/lib/lateral_planner.py
  6. 2
      selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py
  7. 6
      selfdrive/controls/lib/longitudinal_planner.py
  8. 5
      selfdrive/controls/tests/test_lateral_mpc.py
  9. 4
      selfdrive/modeld/SConscript
  10. 16
      selfdrive/modeld/models/driving.cc
  11. 15
      selfdrive/modeld/models/driving.h
  12. 4
      selfdrive/modeld/models/supercombo.onnx
  13. 13
      selfdrive/modeld/runners/onnx_runner.py
  14. 2
      selfdrive/modeld/thneed/thneed_common.cc
  15. 2
      selfdrive/test/process_replay/compare_logs.py
  16. 4
      selfdrive/test/process_replay/model_replay.py
  17. 2
      selfdrive/test/process_replay/model_replay_ref_commit
  18. 2
      selfdrive/test/process_replay/ref_commit
  19. 2
      tinygrad_repo

2
.gitmodules vendored

@ -18,4 +18,4 @@
url = ../../commaai/body.git url = ../../commaai/body.git
[submodule "tinygrad"] [submodule "tinygrad"]
path = tinygrad_repo path = tinygrad_repo
url = https://github.com/geohot/tinygrad.git url = ../../commaai/tinygrad.git

@ -574,3 +574,4 @@ tinygrad_repo/tinygrad/ops.py
tinygrad_repo/tinygrad/shapetracker.py tinygrad_repo/tinygrad/shapetracker.py
tinygrad_repo/tinygrad/tensor.py tinygrad_repo/tinygrad/tensor.py
tinygrad_repo/tinygrad/nn/__init__.py tinygrad_repo/tinygrad/nn/__init__.py
tinygrad_repo/tinygrad/nn/optim.py

@ -33,12 +33,6 @@ CRUISE_INTERVAL_SIGN = {
} }
class MPC_COST_LAT:
PATH = 1.0
HEADING = 1.0
STEER_RATE = 1.0
def apply_deadzone(error, deadzone): def apply_deadzone(error, deadzone):
if error > deadzone: if error > deadzone:
error -= deadzone error -= deadzone

@ -5,7 +5,6 @@ import numpy as np
from casadi import SX, vertcat, sin, cos from casadi import SX, vertcat, sin, cos
from common.realtime import sec_since_boot from common.realtime import sec_since_boot
from selfdrive.controls.lib.drive_helpers import LAT_MPC_N as N
from selfdrive.modeld.constants import T_IDXS from selfdrive.modeld.constants import T_IDXS
if __name__ == '__main__': # generating code if __name__ == '__main__': # generating code
@ -18,6 +17,9 @@ EXPORT_DIR = os.path.join(LAT_MPC_DIR, "c_generated_code")
JSON_FILE = os.path.join(LAT_MPC_DIR, "acados_ocp_lat.json") JSON_FILE = os.path.join(LAT_MPC_DIR, "acados_ocp_lat.json")
X_DIM = 4 X_DIM = 4
P_DIM = 2 P_DIM = 2
N = 16
COST_E_DIM = 3
COST_DIM = COST_E_DIM + 1
MODEL_NAME = 'lat' MODEL_NAME = 'lat'
ACADOS_SOLVER_TYPE = 'SQP_RTI' ACADOS_SOLVER_TYPE = 'SQP_RTI'
@ -29,8 +31,8 @@ def gen_lat_model():
x_ego = SX.sym('x_ego') x_ego = SX.sym('x_ego')
y_ego = SX.sym('y_ego') y_ego = SX.sym('y_ego')
psi_ego = SX.sym('psi_ego') psi_ego = SX.sym('psi_ego')
curv_ego = SX.sym('curv_ego') psi_rate_ego = SX.sym('psi_rate_ego')
model.x = vertcat(x_ego, y_ego, psi_ego, curv_ego) model.x = vertcat(x_ego, y_ego, psi_ego, psi_rate_ego)
# parameters # parameters
v_ego = SX.sym('v_ego') v_ego = SX.sym('v_ego')
@ -38,22 +40,22 @@ def gen_lat_model():
model.p = vertcat(v_ego, rotation_radius) model.p = vertcat(v_ego, rotation_radius)
# controls # controls
curv_rate = SX.sym('curv_rate') psi_accel_ego = SX.sym('psi_accel_ego')
model.u = vertcat(curv_rate) model.u = vertcat(psi_accel_ego)
# xdot # xdot
x_ego_dot = SX.sym('x_ego_dot') x_ego_dot = SX.sym('x_ego_dot')
y_ego_dot = SX.sym('y_ego_dot') y_ego_dot = SX.sym('y_ego_dot')
psi_ego_dot = SX.sym('psi_ego_dot') psi_ego_dot = SX.sym('psi_ego_dot')
curv_ego_dot = SX.sym('curv_ego_dot') psi_rate_ego_dot = SX.sym('psi_rate_ego_dot')
model.xdot = vertcat(x_ego_dot, y_ego_dot, psi_ego_dot, curv_ego_dot) model.xdot = vertcat(x_ego_dot, y_ego_dot, psi_ego_dot, psi_rate_ego_dot)
# dynamics model # dynamics model
f_expl = vertcat(v_ego * cos(psi_ego) - rotation_radius * sin(psi_ego) * (v_ego * curv_ego), f_expl = vertcat(v_ego * cos(psi_ego) - rotation_radius * sin(psi_ego) * psi_rate_ego,
v_ego * sin(psi_ego) + rotation_radius * cos(psi_ego) * (v_ego * curv_ego), v_ego * sin(psi_ego) + rotation_radius * cos(psi_ego) * psi_rate_ego,
v_ego * curv_ego, psi_rate_ego,
curv_rate) psi_accel_ego)
model.f_impl_expr = model.xdot - f_expl model.f_impl_expr = model.xdot - f_expl
model.f_expl_expr = f_expl model.f_expl_expr = f_expl
return model return model
@ -72,26 +74,28 @@ def gen_lat_ocp():
ocp.cost.cost_type = 'NONLINEAR_LS' ocp.cost.cost_type = 'NONLINEAR_LS'
ocp.cost.cost_type_e = 'NONLINEAR_LS' ocp.cost.cost_type_e = 'NONLINEAR_LS'
Q = np.diag([0.0, 0.0]) Q = np.diag(np.zeros(COST_E_DIM))
QR = np.diag([0.0, 0.0, 0.0]) QR = np.diag(np.zeros(COST_DIM))
ocp.cost.W = QR ocp.cost.W = QR
ocp.cost.W_e = Q ocp.cost.W_e = Q
y_ego, psi_ego = ocp.model.x[1], ocp.model.x[2] y_ego, psi_ego, psi_rate_ego = ocp.model.x[1], ocp.model.x[2], ocp.model.x[3]
curv_rate = ocp.model.u[0] psi_rate_ego_dot = ocp.model.u[0]
v_ego = ocp.model.p[0] v_ego = ocp.model.p[0]
ocp.parameter_values = np.zeros((P_DIM, )) ocp.parameter_values = np.zeros((P_DIM, ))
ocp.cost.yref = np.zeros((3, )) ocp.cost.yref = np.zeros((COST_DIM, ))
ocp.cost.yref_e = np.zeros((2, )) ocp.cost.yref_e = np.zeros((COST_E_DIM, ))
# TODO hacky weights to keep behavior the same # TODO hacky weights to keep behavior the same
ocp.model.cost_y_expr = vertcat(y_ego, ocp.model.cost_y_expr = vertcat(y_ego,
((v_ego +5.0) * psi_ego), ((v_ego + 5.0) * psi_ego),
((v_ego + 5.0) * 4.0 * curv_rate)) ((v_ego + 5.0) * psi_rate_ego),
((v_ego + 5.0) * psi_rate_ego_dot))
ocp.model.cost_y_expr_e = vertcat(y_ego, ocp.model.cost_y_expr_e = vertcat(y_ego,
((v_ego +5.0) * psi_ego)) ((v_ego + 5.0) * psi_ego),
((v_ego + 5.0) * psi_rate_ego))
# set constraints # set constraints
ocp.constraints.constr_type = 'BGH' ocp.constraints.constr_type = 'BGH'
@ -124,10 +128,10 @@ class LateralMpc():
def reset(self, x0=np.zeros(X_DIM)): def reset(self, x0=np.zeros(X_DIM)):
self.x_sol = np.zeros((N+1, X_DIM)) self.x_sol = np.zeros((N+1, X_DIM))
self.u_sol = np.zeros((N, 1)) self.u_sol = np.zeros((N, 1))
self.yref = np.zeros((N+1, 3)) self.yref = np.zeros((N+1, COST_DIM))
for i in range(N): for i in range(N):
self.solver.cost_set(i, "yref", self.yref[i]) self.solver.cost_set(i, "yref", self.yref[i])
self.solver.cost_set(N, "yref", self.yref[N][:2]) self.solver.cost_set(N, "yref", self.yref[N][:COST_E_DIM])
# Somehow needed for stable init # Somehow needed for stable init
for i in range(N+1): for i in range(N+1):
@ -140,14 +144,13 @@ class LateralMpc():
self.solve_time = 0.0 self.solve_time = 0.0
self.cost = 0 self.cost = 0
def set_weights(self, path_weight, heading_weight, steer_rate_weight): def set_weights(self, path_weight, heading_weight, yaw_rate_weight, yaw_accel_cost):
W = np.asfortranarray(np.diag([path_weight, heading_weight, steer_rate_weight])) W = np.asfortranarray(np.diag([path_weight, heading_weight, yaw_rate_weight, yaw_accel_cost]))
for i in range(N): for i in range(N):
self.solver.cost_set(i, 'W', W) self.solver.cost_set(i, 'W', W)
#TODO hacky weights to keep behavior the same self.solver.cost_set(N, 'W', W[:COST_E_DIM,:COST_E_DIM])
self.solver.cost_set(N, 'W', (3/20.)*W[:2,:2])
def run(self, x0, p, y_pts, heading_pts, curv_rate_pts): def run(self, x0, p, y_pts, heading_pts, yaw_rate_pts):
x0_cp = np.copy(x0) x0_cp = np.copy(x0)
p_cp = np.copy(p) p_cp = np.copy(p)
self.solver.constraints_set(0, "lbx", x0_cp) self.solver.constraints_set(0, "lbx", x0_cp)
@ -155,13 +158,13 @@ class LateralMpc():
self.yref[:,0] = y_pts self.yref[:,0] = y_pts
v_ego = p_cp[0] v_ego = p_cp[0]
# rotation_radius = p_cp[1] # rotation_radius = p_cp[1]
self.yref[:,1] = heading_pts*(v_ego+5.0) self.yref[:,1] = heading_pts * (v_ego+5.0)
self.yref[:,2] = curv_rate_pts * (v_ego+5.0) * 4.0 self.yref[:,2] = yaw_rate_pts * (v_ego+5.0)
for i in range(N): for i in range(N):
self.solver.cost_set(i, "yref", self.yref[i]) self.solver.cost_set(i, "yref", self.yref[i])
self.solver.set(i, "p", p_cp) self.solver.set(i, "p", p_cp)
self.solver.set(N, "p", p_cp) self.solver.set(N, "p", p_cp)
self.solver.cost_set(N, "yref", self.yref[N][:2]) self.solver.cost_set(N, "yref", self.yref[N][:COST_E_DIM])
t = sec_since_boot() t = sec_since_boot()
self.solution_status = self.solver.solve() self.solution_status = self.solver.solve()

@ -3,7 +3,8 @@ from common.realtime import sec_since_boot, DT_MDL
from common.numpy_fast import interp from common.numpy_fast import interp
from system.swaglog import cloudlog from system.swaglog import cloudlog
from selfdrive.controls.lib.lateral_mpc_lib.lat_mpc import LateralMpc from selfdrive.controls.lib.lateral_mpc_lib.lat_mpc import LateralMpc
from selfdrive.controls.lib.drive_helpers import CONTROL_N, MPC_COST_LAT, LAT_MPC_N from selfdrive.controls.lib.lateral_mpc_lib.lat_mpc import N as LAT_MPC_N
from selfdrive.controls.lib.drive_helpers import CONTROL_N
from selfdrive.controls.lib.desire_helper import DesireHelper from selfdrive.controls.lib.desire_helper import DesireHelper
import cereal.messaging as messaging import cereal.messaging as messaging
from cereal import log from cereal import log
@ -23,7 +24,7 @@ class LateralPlanner:
self.path_xyz = np.zeros((TRAJECTORY_SIZE, 3)) self.path_xyz = np.zeros((TRAJECTORY_SIZE, 3))
self.plan_yaw = np.zeros((TRAJECTORY_SIZE,)) self.plan_yaw = np.zeros((TRAJECTORY_SIZE,))
self.plan_curv_rate = np.zeros((TRAJECTORY_SIZE,)) self.plan_yaw_rate = np.zeros((TRAJECTORY_SIZE,))
self.t_idxs = np.arange(TRAJECTORY_SIZE) self.t_idxs = np.arange(TRAJECTORY_SIZE)
self.y_pts = np.zeros(TRAJECTORY_SIZE) self.y_pts = np.zeros(TRAJECTORY_SIZE)
@ -44,6 +45,7 @@ class LateralPlanner:
self.path_xyz = np.column_stack([md.position.x, md.position.y, md.position.z]) self.path_xyz = np.column_stack([md.position.x, md.position.y, md.position.z])
self.t_idxs = np.array(md.position.t) self.t_idxs = np.array(md.position.t)
self.plan_yaw = np.array(md.orientation.z) self.plan_yaw = np.array(md.orientation.z)
self.plan_yaw_rate = np.array(md.orientationRate.z)
# Lane change logic # Lane change logic
desire_state = md.meta.desireState desire_state = md.meta.desireState
@ -55,24 +57,24 @@ class LateralPlanner:
d_path_xyz = self.path_xyz d_path_xyz = self.path_xyz
# Heading cost is useful at low speed, otherwise end of plan can be off-heading # Heading cost is useful at low speed, otherwise end of plan can be off-heading
heading_cost = interp(v_ego, [5.0, 10.0], [MPC_COST_LAT.HEADING, 0.15]) heading_cost = interp(v_ego, [5.0, 10.0], [1.0, 0.15])
self.lat_mpc.set_weights(MPC_COST_LAT.PATH, heading_cost, MPC_COST_LAT.STEER_RATE) self.lat_mpc.set_weights(1.0, heading_cost, 0.0, .075)
y_pts = np.interp(v_ego * self.t_idxs[:LAT_MPC_N + 1], np.linalg.norm(d_path_xyz, axis=1), d_path_xyz[:, 1]) y_pts = np.interp(v_ego * self.t_idxs[:LAT_MPC_N + 1], np.linalg.norm(d_path_xyz, axis=1), d_path_xyz[:, 1])
heading_pts = np.interp(v_ego * self.t_idxs[:LAT_MPC_N + 1], np.linalg.norm(self.path_xyz, axis=1), self.plan_yaw) heading_pts = np.interp(v_ego * self.t_idxs[:LAT_MPC_N + 1], np.linalg.norm(self.path_xyz, axis=1), self.plan_yaw)
curv_rate_pts = np.interp(v_ego * self.t_idxs[:LAT_MPC_N + 1], np.linalg.norm(self.path_xyz, axis=1), self.plan_curv_rate) yaw_rate_pts = np.interp(v_ego * self.t_idxs[:LAT_MPC_N + 1], np.linalg.norm(self.path_xyz, axis=1), self.plan_yaw_rate)
self.y_pts = y_pts self.y_pts = y_pts
assert len(y_pts) == LAT_MPC_N + 1 assert len(y_pts) == LAT_MPC_N + 1
assert len(heading_pts) == LAT_MPC_N + 1 assert len(heading_pts) == LAT_MPC_N + 1
assert len(curv_rate_pts) == LAT_MPC_N + 1 assert len(yaw_rate_pts) == LAT_MPC_N + 1
lateral_factor = max(0, self.factor1 - (self.factor2 * v_ego**2)) lateral_factor = max(0, self.factor1 - (self.factor2 * v_ego**2))
p = np.array([v_ego, lateral_factor]) p = np.array([v_ego, lateral_factor])
self.lat_mpc.run(self.x0, self.lat_mpc.run(self.x0,
p, p,
y_pts, y_pts,
heading_pts, heading_pts,
curv_rate_pts) yaw_rate_pts)
# init state for next # init state for next
# mpc.u_sol is the desired curvature rate given x0 curv state. # mpc.u_sol is the desired curvature rate given x0 curv state.
# with x0[3] = measured_curvature, this would be the actual desired rate. # with x0[3] = measured_curvature, this would be the actual desired rate.
@ -103,7 +105,7 @@ class LateralPlanner:
lateralPlan.modelMonoTime = sm.logMonoTime['modelV2'] lateralPlan.modelMonoTime = sm.logMonoTime['modelV2']
lateralPlan.dPathPoints = self.y_pts.tolist() lateralPlan.dPathPoints = self.y_pts.tolist()
lateralPlan.psis = self.lat_mpc.x_sol[0:CONTROL_N, 2].tolist() lateralPlan.psis = self.lat_mpc.x_sol[0:CONTROL_N, 2].tolist()
lateralPlan.curvatures = self.lat_mpc.x_sol[0:CONTROL_N, 3].tolist() lateralPlan.curvatures = (self.lat_mpc.x_sol[0:CONTROL_N, 3]/sm['carState'].vEgo).tolist()
lateralPlan.curvatureRates = [float(x) for x in self.lat_mpc.u_sol[0:CONTROL_N - 1]] + [0.0] lateralPlan.curvatureRates = [float(x) for x in self.lat_mpc.u_sol[0:CONTROL_N - 1]] + [0.0]
lateralPlan.mpcSolutionValid = bool(plan_solution_valid) lateralPlan.mpcSolutionValid = bool(plan_solution_valid)

@ -253,7 +253,7 @@ class LongitudinalMpc:
cost_weights = [X_EGO_OBSTACLE_COST, X_EGO_COST, V_EGO_COST, A_EGO_COST, a_change_cost, J_EGO_COST] cost_weights = [X_EGO_OBSTACLE_COST, X_EGO_COST, V_EGO_COST, A_EGO_COST, a_change_cost, J_EGO_COST]
constraint_cost_weights = [LIMIT_COST, LIMIT_COST, LIMIT_COST, DANGER_ZONE_COST] constraint_cost_weights = [LIMIT_COST, LIMIT_COST, LIMIT_COST, DANGER_ZONE_COST]
elif self.mode == 'blended': elif self.mode == 'blended':
cost_weights = [0., 0.2, 0.25, 1.0, 0.0, 1.0] cost_weights = [0., 0.1, 0.2, 5.0, 0.0, 1.0]
constraint_cost_weights = [LIMIT_COST, LIMIT_COST, LIMIT_COST, 50.0] constraint_cost_weights = [LIMIT_COST, LIMIT_COST, LIMIT_COST, 50.0]
else: else:
raise NotImplementedError(f'Planner mode {self.mode} not recognized in planner cost set') raise NotImplementedError(f'Planner mode {self.mode} not recognized in planner cost set')

@ -58,7 +58,6 @@ class LongitudinalPlanner:
self.a_desired = init_a self.a_desired = init_a
self.v_desired_filter = FirstOrderFilter(init_v, 2.0, DT_MDL) self.v_desired_filter = FirstOrderFilter(init_v, 2.0, DT_MDL)
self.t_uniform = np.arange(0.0, T_IDXS_MPC[-1] + 0.5, 0.5)
self.v_desired_trajectory = np.zeros(CONTROL_N) self.v_desired_trajectory = np.zeros(CONTROL_N)
self.a_desired_trajectory = np.zeros(CONTROL_N) self.a_desired_trajectory = np.zeros(CONTROL_N)
@ -76,10 +75,7 @@ class LongitudinalPlanner:
x = np.interp(T_IDXS_MPC, T_IDXS, model_msg.position.x) x = np.interp(T_IDXS_MPC, T_IDXS, model_msg.position.x)
v = np.interp(T_IDXS_MPC, T_IDXS, model_msg.velocity.x) v = np.interp(T_IDXS_MPC, T_IDXS, model_msg.velocity.x)
a = np.interp(T_IDXS_MPC, T_IDXS, model_msg.acceleration.x) a = np.interp(T_IDXS_MPC, T_IDXS, model_msg.acceleration.x)
# Uniform interp so gradient is less noisy j = np.zeros(len(T_IDXS_MPC))
a_sparse = np.interp(self.t_uniform, T_IDXS, model_msg.acceleration.x)
j_sparse = np.gradient(a_sparse, self.t_uniform)
j = np.interp(T_IDXS_MPC, self.t_uniform, j_sparse)
else: else:
x = np.zeros(len(T_IDXS_MPC)) x = np.zeros(len(T_IDXS_MPC))
v = np.zeros(len(T_IDXS_MPC)) v = np.zeros(len(T_IDXS_MPC))

@ -1,7 +1,8 @@
import unittest import unittest
import numpy as np import numpy as np
from selfdrive.controls.lib.lateral_mpc_lib.lat_mpc import LateralMpc from selfdrive.controls.lib.lateral_mpc_lib.lat_mpc import LateralMpc
from selfdrive.controls.lib.drive_helpers import LAT_MPC_N, CAR_ROTATION_RADIUS from selfdrive.controls.lib.drive_helpers import CAR_ROTATION_RADIUS
from selfdrive.controls.lib.lateral_mpc_lib.lat_mpc import N as LAT_MPC_N
def run_mpc(lat_mpc=None, v_ref=30., x_init=0., y_init=0., psi_init=0., curvature_init=0., def run_mpc(lat_mpc=None, v_ref=30., x_init=0., y_init=0., psi_init=0., curvature_init=0.,
@ -9,7 +10,7 @@ def run_mpc(lat_mpc=None, v_ref=30., x_init=0., y_init=0., psi_init=0., curvatur
if lat_mpc is None: if lat_mpc is None:
lat_mpc = LateralMpc() lat_mpc = LateralMpc()
lat_mpc.set_weights(1., 1., 1.) lat_mpc.set_weights(1., 1., 0.0, 1.)
y_pts = poly_shift * np.ones(LAT_MPC_N + 1) y_pts = poly_shift * np.ones(LAT_MPC_N + 1)
heading_pts = np.zeros(LAT_MPC_N + 1) heading_pts = np.zeros(LAT_MPC_N + 1)

@ -71,9 +71,9 @@ if use_thneed and arch == "larch64" or GetOption('pc_thneed'):
fn = File("models/supercombo").abspath fn = File("models/supercombo").abspath
if GetOption('pc_thneed'): if GetOption('pc_thneed'):
cmd = f"cd {Dir('#').abspath}/tinygrad_repo && NATIVE_EXPLOG=1 OPTWG=1 UNSAFE_FLOAT4=1 DEBUGCL=1 python3 openpilot/compile.py {fn}.onnx {fn}.thneed" cmd = f"cd {Dir('#').abspath}/tinygrad_repo && GPU=1 NATIVE_EXPLOG=1 OPTWG=1 UNSAFE_FLOAT4=1 DEBUGCL=1 python3 openpilot/compile.py {fn}.onnx {fn}.thneed"
else: else:
cmd = f"cd {Dir('#').abspath}/tinygrad_repo && FLOAT16=1 PYOPENCL_NO_CACHE=1 MATMUL=1 NATIVE_EXPLOG=1 OPTWG=1 UNSAFE_FLOAT4=1 DEBUGCL=1 python3 openpilot/compile.py {fn}.onnx {fn}.thneed" cmd = f"cd {Dir('#').abspath}/tinygrad_repo && FLOAT16=1 MATMUL=1 PYOPENCL_NO_CACHE=1 NATIVE_EXPLOG=1 OPTWG=1 UNSAFE_FLOAT4=1 DEBUGCL=1 python3 openpilot/compile.py {fn}.onnx {fn}.thneed"
# is there a better way then listing all of tinygrad? # is there a better way then listing all of tinygrad?
lenv.Command(fn + ".thneed", [fn + ".onnx", lenv.Command(fn + ".thneed", [fn + ".onnx",

@ -41,11 +41,11 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
&s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, true, false, context); &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, true, false, context);
#ifdef TEMPORAL #ifdef TEMPORAL
s->m->addRecurrent(&s->output[OUTPUT_SIZE], TEMPORAL_SIZE); s->m->addRecurrent(&s->feature_buffer[0], TEMPORAL_SIZE);
#endif #endif
#ifdef DESIRE #ifdef DESIRE
s->m->addDesire(s->pulse_desire, DESIRE_LEN); s->m->addDesire(s->pulse_desire, DESIRE_LEN*(HISTORY_BUFFER_LEN+1));
#endif #endif
#ifdef TRAFFIC_CONVENTION #ifdef TRAFFIC_CONVENTION
@ -56,18 +56,20 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
ModelOutput* model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* wbuf, ModelOutput* model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* wbuf,
const mat3 &transform, const mat3 &transform_wide, float *desire_in, bool is_rhd, bool prepare_only) { const mat3 &transform, const mat3 &transform_wide, float *desire_in, bool is_rhd, bool prepare_only) {
#ifdef DESIRE #ifdef DESIRE
std::memmove(&s->pulse_desire[0], &s->pulse_desire[DESIRE_LEN], sizeof(float) * DESIRE_LEN*HISTORY_BUFFER_LEN);
if (desire_in != NULL) { if (desire_in != NULL) {
for (int i = 1; i < DESIRE_LEN; i++) { for (int i = 1; i < DESIRE_LEN; i++) {
// Model decides when action is completed // Model decides when action is completed
// so desire input is just a pulse triggered on rising edge // so desire input is just a pulse triggered on rising edge
if (desire_in[i] - s->prev_desire[i] > .99) { if (desire_in[i] - s->prev_desire[i] > .99) {
s->pulse_desire[i] = desire_in[i]; s->pulse_desire[DESIRE_LEN*(HISTORY_BUFFER_LEN-1)+i] = desire_in[i];
} else { } else {
s->pulse_desire[i] = 0.0; s->pulse_desire[DESIRE_LEN*(HISTORY_BUFFER_LEN-1)+i] = 0.0;
} }
s->prev_desire[i] = desire_in[i]; s->prev_desire[i] = desire_in[i];
} }
} }
LOGT("Desire enqueued");
#endif #endif
int rhd_idx = is_rhd; int rhd_idx = is_rhd;
@ -92,6 +94,12 @@ ModelOutput* model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* wbuf,
s->m->execute(); s->m->execute();
LOGT("Execution finished"); LOGT("Execution finished");
#ifdef TEMPORAL
std::memmove(&s->feature_buffer[0], &s->feature_buffer[FEATURE_LEN], sizeof(float) * FEATURE_LEN*(HISTORY_BUFFER_LEN-1));
std::memcpy(&s->feature_buffer[FEATURE_LEN*(HISTORY_BUFFER_LEN-1)], &s->output[OUTPUT_SIZE], sizeof(float) * FEATURE_LEN);
LOGT("Features enqueued");
#endif
return (ModelOutput*)&s->output; return (ModelOutput*)&s->output;
} }

@ -16,6 +16,8 @@
#include "selfdrive/modeld/models/commonmodel.h" #include "selfdrive/modeld/models/commonmodel.h"
#include "selfdrive/modeld/runners/run.h" #include "selfdrive/modeld/runners/run.h"
constexpr int FEATURE_LEN = 2048;
constexpr int HISTORY_BUFFER_LEN = 99;
constexpr int DESIRE_LEN = 8; constexpr int DESIRE_LEN = 8;
constexpr int DESIRE_PRED_LEN = 4; constexpr int DESIRE_PRED_LEN = 4;
constexpr int TRAFFIC_CONVENTION_LEN = 2; constexpr int TRAFFIC_CONVENTION_LEN = 2;
@ -233,6 +235,11 @@ struct ModelOutputMeta {
}; };
static_assert(sizeof(ModelOutputMeta) == sizeof(ModelOutputDesireProb) + sizeof(float) + (sizeof(ModelOutputDisengageProb)*DISENGAGE_LEN) + (sizeof(ModelOutputBlinkerProb)*BLINKER_LEN) + (sizeof(ModelOutputDesireProb)*DESIRE_PRED_LEN)); static_assert(sizeof(ModelOutputMeta) == sizeof(ModelOutputDesireProb) + sizeof(float) + (sizeof(ModelOutputDisengageProb)*DISENGAGE_LEN) + (sizeof(ModelOutputBlinkerProb)*BLINKER_LEN) + (sizeof(ModelOutputDesireProb)*DESIRE_PRED_LEN));
struct ModelOutputFeatures {
std::array<float, FEATURE_LEN> feature;
};
static_assert(sizeof(ModelOutputFeatures) == (sizeof(float)*FEATURE_LEN));
struct ModelOutput { struct ModelOutput {
const ModelOutputPlans plans; const ModelOutputPlans plans;
const ModelOutputLaneLines lane_lines; const ModelOutputLaneLines lane_lines;
@ -244,22 +251,24 @@ struct ModelOutput {
}; };
constexpr int OUTPUT_SIZE = sizeof(ModelOutput) / sizeof(float); constexpr int OUTPUT_SIZE = sizeof(ModelOutput) / sizeof(float);
#ifdef TEMPORAL #ifdef TEMPORAL
constexpr int TEMPORAL_SIZE = 512; constexpr int TEMPORAL_SIZE = HISTORY_BUFFER_LEN * FEATURE_LEN;
#else #else
constexpr int TEMPORAL_SIZE = 0; constexpr int TEMPORAL_SIZE = 0;
#endif #endif
constexpr int NET_OUTPUT_SIZE = OUTPUT_SIZE + TEMPORAL_SIZE; constexpr int NET_OUTPUT_SIZE = OUTPUT_SIZE + FEATURE_LEN;
// TODO: convert remaining arrays to std::array and update model runners // TODO: convert remaining arrays to std::array and update model runners
struct ModelState { struct ModelState {
ModelFrame *frame = nullptr; ModelFrame *frame = nullptr;
ModelFrame *wide_frame = nullptr; ModelFrame *wide_frame = nullptr;
std::array<float, HISTORY_BUFFER_LEN * FEATURE_LEN> feature_buffer = {};
std::array<float, NET_OUTPUT_SIZE> output = {}; std::array<float, NET_OUTPUT_SIZE> output = {};
std::unique_ptr<RunModel> m; std::unique_ptr<RunModel> m;
#ifdef DESIRE #ifdef DESIRE
float prev_desire[DESIRE_LEN] = {}; float prev_desire[DESIRE_LEN] = {};
float pulse_desire[DESIRE_LEN] = {}; float pulse_desire[DESIRE_LEN*(HISTORY_BUFFER_LEN+1)] = {};
#endif #endif
#ifdef TRAFFIC_CONVENTION #ifdef TRAFFIC_CONVENTION
float traffic_convention[TRAFFIC_CONVENTION_LEN] = {}; float traffic_convention[TRAFFIC_CONVENTION_LEN] = {};

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1 version https://git-lfs.github.com/spec/v1
oid sha256:50c7fc8565ac69a4b9a0de122e961326820e78bf13659255a89d0ed04be030d5 oid sha256:30f30bc1251c03db135564ecbf7dc0bc96cbb07be0ebd3691edd8d555dc087fa
size 95167481 size 58539693

@ -9,6 +9,8 @@ os.environ["OMP_WAIT_POLICY"] = "PASSIVE"
import onnxruntime as ort # pylint: disable=import-error import onnxruntime as ort # pylint: disable=import-error
ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8}
def read(sz, tf8=False): def read(sz, tf8=False):
dd = [] dd = []
gt = 0 gt = 0
@ -18,7 +20,7 @@ def read(sz, tf8=False):
assert(len(st) > 0) assert(len(st) > 0)
dd.append(st) dd.append(st)
gt += len(st) gt += len(st)
r = np.frombuffer(b''.join(dd), dtype=np.uint8 if tf8 else np.float32).astype(np.float32) r = np.frombuffer(b''.join(dd), dtype=np.uint8 if tf8 else np.float32)
if tf8: if tf8:
r = r / 255. r = r / 255.
return r return r
@ -29,22 +31,23 @@ def write(d):
def run_loop(m, tf8_input=False): def run_loop(m, tf8_input=False):
ishapes = [[1]+ii.shape[1:] for ii in m.get_inputs()] ishapes = [[1]+ii.shape[1:] for ii in m.get_inputs()]
keys = [x.name for x in m.get_inputs()] keys = [x.name for x in m.get_inputs()]
itypes = [ORT_TYPES_TO_NP_TYPES[x.type] for x in m.get_inputs()]
# run once to initialize CUDA provider # run once to initialize CUDA provider
if "CUDAExecutionProvider" in m.get_providers(): if "CUDAExecutionProvider" in m.get_providers():
m.run(None, dict(zip(keys, [np.zeros(shp, dtype=np.float32) for shp in ishapes]))) m.run(None, dict(zip(keys, [np.zeros(shp, dtype=itp) for shp, itp in zip(ishapes, itypes)])))
print("ready to run onnx model", keys, ishapes, file=sys.stderr) print("ready to run onnx model", keys, ishapes, file=sys.stderr)
while 1: while 1:
inputs = [] inputs = []
for k, shp in zip(keys, ishapes): for k, shp, itp in zip(keys, ishapes, itypes):
ts = np.product(shp) ts = np.product(shp)
#print("reshaping %s with offset %d" % (str(shp), offset), file=sys.stderr) #print("reshaping %s with offset %d" % (str(shp), offset), file=sys.stderr)
inputs.append(read(ts, (k=='input_img' and tf8_input)).reshape(shp)) inputs.append(read(ts, (k=='input_img' and tf8_input)).reshape(shp).astype(itp))
ret = m.run(None, dict(zip(keys, inputs))) ret = m.run(None, dict(zip(keys, inputs)))
#print(ret, file=sys.stderr) #print(ret, file=sys.stderr)
for r in ret: for r in ret:
write(r) write(r.astype(np.float32))
if __name__ == "__main__": if __name__ == "__main__":

@ -12,7 +12,7 @@ map<pair<cl_kernel, int>, int> g_args_size;
map<cl_program, string> g_program_source; map<cl_program, string> g_program_source;
void Thneed::stop() { void Thneed::stop() {
printf("Thneed::stop: recorded %lu commands\n", cmds.size()); //printf("Thneed::stop: recorded %lu commands\n", cmds.size());
record = false; record = false;
} }

@ -41,7 +41,7 @@ def remove_ignored_fields(msg, ignore):
elif isinstance(v, numbers.Number): elif isinstance(v, numbers.Number):
val = 0 val = 0
else: else:
raise NotImplementedError raise NotImplementedError('Error ignoring field')
setattr(attr, keys[-1], val) setattr(attr, keys[-1], val)
return msg.as_reader() return msg.as_reader()

@ -22,7 +22,7 @@ from tools.lib.logreader import LogReader
TEST_ROUTE = "4cf7a6ad03080c90|2021-09-29--13-46-36" TEST_ROUTE = "4cf7a6ad03080c90|2021-09-29--13-46-36"
SEGMENT = 0 SEGMENT = 0
MAX_FRAMES = 10 if PC else 1300 MAX_FRAMES = 100 if PC else 1300
SEND_EXTRA_INPUTS = bool(os.getenv("SEND_EXTRA_INPUTS", "0")) SEND_EXTRA_INPUTS = bool(os.getenv("SEND_EXTRA_INPUTS", "0"))
@ -174,7 +174,7 @@ if __name__ == "__main__":
'driverStateV2.dspExecutionTime' 'driverStateV2.dspExecutionTime'
] ]
# TODO this tolerance is absurdly large # TODO this tolerance is absurdly large
tolerance = 5e-1 if PC else None tolerance = 2.0 if PC else None
results: Any = {TEST_ROUTE: {}} results: Any = {TEST_ROUTE: {}}
log_paths: Any = {TEST_ROUTE: {"models": {'ref': BASE_URL + log_fn, 'new': log_fn}}} log_paths: Any = {TEST_ROUTE: {"models": {'ref': BASE_URL + log_fn, 'new': log_fn}}}
results[TEST_ROUTE]["models"] = compare_logs(cmp_log, log_msgs, tolerance=tolerance, ignore_fields=ignore) results[TEST_ROUTE]["models"] = compare_logs(cmp_log, log_msgs, tolerance=tolerance, ignore_fields=ignore)

@ -1 +1 @@
c40319a454840d8a2196ec1227d27b536ee14375 008c0a622b7471c6234690d668f76bcb5dc8d999

@ -1 +1 @@
1989d8c2a5de94faa3756b7d10fc94e6c063afa5 e0ffcae8def2fd9c82c547d1f257d4f06a48a3c3

@ -1 +1 @@
Subproject commit 2e9b7637b3c3c8895fda9f964215db3a35fe3441 Subproject commit 82ca9c66664b5f3739e1881f62cb94b72cf0b1fa
Loading…
Cancel
Save