External GPU support for big models (#35172)

* usb gpu

* cleanup

---------

Co-authored-by: Comma Device <device@comma.ai>
pull/35214/head
Adeeb Shihadeh 7 days ago committed by GitHub
parent 478015e788
commit d0bf2be6f0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 19
      selfdrive/modeld/SConscript
  2. 12
      selfdrive/modeld/modeld.py

@ -1,3 +1,4 @@
import os
import glob
Import('env', 'envCython', 'arch', 'cereal', 'messaging', 'common', 'gpucommon', 'visionipc', 'transformations')
@ -13,7 +14,6 @@ common_src = [
"transforms/transform.cc",
]
# OpenCL is a framework on Mac
if arch == "Darwin":
frameworks += ['OpenCL']
@ -40,15 +40,16 @@ for model_name in ['driving_vision', 'driving_policy']:
# Compile tinygrad model
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
if arch == 'larch64':
device_string = 'QCOM=1'
elif arch == 'Darwin':
device_string = 'CLANG=1 IMAGE=0 JIT=2'
else:
device_string = 'LLVM=1 LLVMOPT=1 BEAM=0 IMAGE=0 JIT=2'
for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
if "USBGPU" in os.environ and not model_name.startswith("dmon"):
device_string = "AMD=1 AMD_LLVM=1 NOLOCALS=0 IMAGE=0"
elif arch == 'larch64':
device_string = 'QCOM=1'
elif arch == 'Darwin':
device_string = 'CLANG=1 IMAGE=0 JIT=2'
else:
device_string = 'LLVM=1 LLVMOPT=1 BEAM=0 IMAGE=0 JIT=2'
fn = File(f"models/{model_name}").abspath
cmd = f'{pythonpath_string} {device_string} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
lenv.Command(fn + "_tinygrad.pkl", [fn + ".onnx"] + tinygrad_files, cmd)

@ -1,7 +1,10 @@
#!/usr/bin/env python3
import os
from openpilot.system.hardware import TICI
if TICI:
USBGPU = "USBGPU" in os.environ
if USBGPU:
os.environ['AMD'] = '1'
elif TICI:
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
os.environ['QCOM'] = '1'
else:
@ -147,7 +150,7 @@ class ModelState:
imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()),
'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())}
if TICI:
if TICI and not USBGPU:
# The imgs tensors are backed by opencl memory, only need init once
for key in imgs_cl:
if key not in self.vision_inputs:
@ -188,7 +191,10 @@ def main(demo=False):
sentry.set_tag("daemon", PROCESS_NAME)
cloudlog.bind(daemon=PROCESS_NAME)
setproctitle(PROCESS_NAME)
config_realtime_process(7, 54)
if not USBGPU:
# USB GPU currently saturates a core so can't do this yet,
# also need to move the aux USB interrupts for good timings
config_realtime_process(7, 54)
cloudlog.warning("setting up CL context")
cl_context = CLContext()

Loading…
Cancel
Save