From ce92fd1a0ff88fc9510054c8af6c157d9c4dff78 Mon Sep 17 00:00:00 2001 From: Andrei Radulescu Date: Sat, 12 Jul 2025 05:48:35 +0300 Subject: [PATCH] modeld: autodetect tinygrad backend (#35405) * modeld: autodetect tinygrad backend * modeld: autodetect tinygrad CUDA backend * Revert "modeld: autodetect tinygrad CUDA backend" This reverts commit 0e9755fb3c5c2021de27f4d230bd0a162883bc37. * comment why llvm@19 Co-authored-by: Adeeb Shihadeh * backend from jit * fix static analysis * simplify * compile flags log --------- Co-authored-by: Adeeb Shihadeh --- selfdrive/modeld/SConscript | 29 ++++++++++++++------ selfdrive/modeld/dmonitoringmodeld.py | 7 ++++- selfdrive/modeld/modeld.py | 8 ++++-- selfdrive/modeld/runners/tinygrad_helpers.py | 5 +++- tools/mac_setup.sh | 2 +- 5 files changed, 38 insertions(+), 13 deletions(-) diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index 0da3c24e19..c36f1dae90 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -14,9 +14,11 @@ common_src = [ "transforms/transform.cc", ] -# OpenCL is a framework on Mac if arch == "Darwin": + # OpenCL is a framework on Mac frameworks += ['OpenCL'] + # Fix for METAL Error: $HOME must be set to run brew + lenv['ENV']['HOME'] = os.environ['HOME'] else: libs += ['OpenCL'] @@ -47,19 +49,30 @@ def tg_compile(flags, model_name): f'{pythonpath_string} {flags} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl' ) +# because tg doesn't support multi-process +import subprocess +devs = subprocess.check_output('python3 -c "from tinygrad import Device; print(list(Device.get_available_devices()))"', shell=True, cwd=env.Dir('#').abspath) +print("Available tinygrad devices:", devs) + +if b"QCOM" in devs: + flags = 'QCOM=1' +elif b"METAL" in devs: + flags = 'METAL=1 IMAGE=0 NOLOCALS=0' +elif b"GPU" in devs: + flags = 'GPU=1' +elif b"LLVM" in devs: + flags = 'LLVM=1 LLVMOPT=1 BEAM=0 IMAGE=0 JIT=2' +else: + flags = 'CPU=1 IMAGE=0 JIT=2' + +print(f"Compiling models with flags: '{flags}'") + # Compile small models for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']: - flags = { - 'larch64': 'QCOM=1', - 'Darwin': 'CPU=1 IMAGE=0 JIT=2', - }.get(arch, 'LLVM=1 LLVMOPT=1 BEAM=0 IMAGE=0 JIT=2') tg_compile(flags, model_name) # Compile BIG model if USB GPU is available if "USBGPU" in os.environ: - import subprocess - # because tg doesn't support multi-process - devs = subprocess.check_output('python3 -c "from tinygrad import Device; print(list(Device.get_available_devices()))"', shell=True, cwd=env.Dir('#').abspath) if b"AMD" in devs: print("USB GPU detected... building") flags = "AMD=1 AMD_IFACE=USB AMD_LLVM=1 NOLOCALS=0 IMAGE=0" diff --git a/selfdrive/modeld/dmonitoringmodeld.py b/selfdrive/modeld/dmonitoringmodeld.py index 7ab31ed2b4..ce5e77ce8c 100755 --- a/selfdrive/modeld/dmonitoringmodeld.py +++ b/selfdrive/modeld/dmonitoringmodeld.py @@ -7,7 +7,7 @@ if TICI: from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address os.environ['QCOM'] = '1' else: - os.environ['LLVM'] = '1' + from openpilot.selfdrive.modeld.runners.tinygrad_helpers import backend_from_jit import math import time import pickle @@ -79,6 +79,11 @@ class ModelState: with open(MODEL_PKL_PATH, "rb") as f: self.model_run = pickle.load(f) + if not TICI: + backend = backend_from_jit(self.model_run) + os.environ[backend] = '1' + cloudlog.warning(f"dmonitoringmodeld backend set to {backend}") + def run(self, buf: VisionBuf, calib: np.ndarray, transform: np.ndarray) -> tuple[np.ndarray, float]: self.numpy_inputs['calib'][0,:] = calib diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py index 298abcd38a..3baea09597 100755 --- a/selfdrive/modeld/modeld.py +++ b/selfdrive/modeld/modeld.py @@ -9,8 +9,7 @@ elif TICI: from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address os.environ['QCOM'] = '1' else: - os.environ['LLVM'] = '1' - os.environ['JIT'] = '2' + from openpilot.selfdrive.modeld.runners.tinygrad_helpers import backend_from_jit from tinygrad.tensor import Tensor from tinygrad.dtype import dtypes import time @@ -131,6 +130,11 @@ class ModelState: with open(POLICY_PKL_PATH, "rb") as f: self.policy_run = pickle.load(f) + if not TICI and not USBGPU: + backend = backend_from_jit(self.vision_run) + os.environ[backend] = '1' + cloudlog.warning(f"modeld backend set to {backend}") + def slice_outputs(self, model_outputs: np.ndarray, output_slices: dict[str, slice]) -> dict[str, np.ndarray]: parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in output_slices.items()} return parsed_model_outputs diff --git a/selfdrive/modeld/runners/tinygrad_helpers.py b/selfdrive/modeld/runners/tinygrad_helpers.py index 776381341c..edaa0c83c4 100644 --- a/selfdrive/modeld/runners/tinygrad_helpers.py +++ b/selfdrive/modeld/runners/tinygrad_helpers.py @@ -1,4 +1,3 @@ - from tinygrad.tensor import Tensor from tinygrad.helpers import to_mv @@ -6,3 +5,7 @@ def qcom_tensor_from_opencl_address(opencl_address, shape, dtype): cl_buf_desc_ptr = to_mv(opencl_address, 8).cast('Q')[0] rawbuf_ptr = to_mv(cl_buf_desc_ptr, 0x100).cast('Q')[20] # offset 0xA0 is a raw gpu pointer. return Tensor.from_blob(rawbuf_ptr, shape, dtype=dtype, device='QCOM') + +def backend_from_jit(jit): + for cached_item in jit.captured.jit_cache: + return cached_item.prg.device diff --git a/tools/mac_setup.sh b/tools/mac_setup.sh index d23052d0f0..7420d783a7 100755 --- a/tools/mac_setup.sh +++ b/tools/mac_setup.sh @@ -43,7 +43,7 @@ brew "glfw" brew "libarchive" brew "libusb" brew "libtool" -brew "llvm" +brew "llvm@19" # pinned for tinygrad brew "openssl@3.0" brew "qt@5" brew "zeromq"