openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
3.7 KiB

1 month ago
from __future__ import annotations
import ctypes, functools
from tinygrad.helpers import init_c_var, from_mv, init_c_struct_t, getenv
from tinygrad.device import Compiled, LRUAllocator, BufferSpec
from tinygrad.runtime.autogen import hip
from tinygrad.runtime.support.compiler_hip import AMDCompiler
from tinygrad.renderer.cstyle import HIPRenderer
if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401 # pylint: disable=unused-import
def check(status):
if status != 0: raise RuntimeError(f"HIP Error {status}, {ctypes.string_at(hip.hipGetErrorString(status)).decode()}")
class HIPProgram:
def __init__(self, dev:HIPDevice, name:str, lib:bytes):
self.dev, self.name, self.lib = dev, name, lib
check(hip.hipSetDevice(self.dev.device_id))
self.module = init_c_var(hip.hipModule_t(), lambda x: check(hip.hipModuleLoadData(ctypes.byref(x), lib)))
self.prg = init_c_var(hip.hipFunction_t(), lambda x: check(hip.hipModuleGetFunction(ctypes.byref(x), self.module, name.encode("utf-8"))))
def __del__(self):
if hasattr(self, 'module'): check(hip.hipModuleUnload(self.module))
def __call__(self, *args, global_size:tuple[int,int,int]=(1,1,1), local_size:tuple[int,int,int]=(1,1,1), vals:tuple[int, ...]=(), wait=False):
check(hip.hipSetDevice(self.dev.device_id))
if not hasattr(self, "vargs"):
self.c_args = init_c_struct_t(tuple([(f'f{i}', hip.hipDeviceptr_t) for i in range(len(args))] +
[(f'v{i}', ctypes.c_int) for i in range(len(vals))]))(*args, *vals)
self.vargs = (ctypes.c_void_p * 5)(1, ctypes.cast(ctypes.byref(self.c_args), ctypes.c_void_p), 2,
ctypes.cast(ctypes.pointer(ctypes.c_size_t(ctypes.sizeof(self.c_args))), ctypes.c_void_p), 3)
for i in range(len(args)): self.c_args.__setattr__(f'f{i}', args[i])
for i in range(len(vals)): self.c_args.__setattr__(f'v{i}', vals[i])
if wait: check(hip.hipEventRecord(self.dev.time_event_st, None))
check(hip.hipModuleLaunchKernel(self.prg, *global_size, *local_size, 0, None, None, self.vargs))
if wait:
check(hip.hipEventRecord(self.dev.time_event_en, None))
check(hip.hipEventSynchronize(self.dev.time_event_en))
check(hip.hipEventElapsedTime(ctypes.byref(ret := ctypes.c_float()), self.dev.time_event_st, self.dev.time_event_en))
return ret.value * 1e-3
class HIPAllocator(LRUAllocator):
def __init__(self, dev:HIPDevice):
self.dev = dev
super().__init__()
def _alloc(self, size:int, options:BufferSpec):
check(hip.hipSetDevice(self.dev.device_id))
return init_c_var(hip.hipDeviceptr_t(), lambda x: check(hip.hipMalloc(ctypes.byref(x), size)))
def _free(self, opaque, options:BufferSpec): check(hip.hipFree(opaque))
def _copyin(self, dest, src: memoryview):
check(hip.hipSetDevice(self.dev.device_id))
check(hip.hipMemcpy(dest, from_mv(src), len(src), hip.hipMemcpyHostToDevice))
def _copyout(self, dest:memoryview, src):
self.dev.synchronize()
check(hip.hipMemcpy(from_mv(dest), src, len(dest), hip.hipMemcpyDeviceToHost))
class HIPDevice(Compiled):
def __init__(self, device:str=""):
self.device_id = int(device.split(":")[1]) if ":" in device else 0
self.arch = init_c_var(hip.hipDeviceProp_t(), lambda x: check(hip.hipGetDeviceProperties(x, self.device_id))).gcnArchName.decode()
self.time_event_st, self.time_event_en = [init_c_var(hip.hipEvent_t(), lambda x: hip.hipEventCreate(ctypes.byref(x), 0)) for _ in range(2)]
super().__init__(device, HIPAllocator(self), HIPRenderer(), AMDCompiler(self.arch), functools.partial(HIPProgram, self))
def synchronize(self):
check(hip.hipSetDevice(self.device_id))
check(hip.hipDeviceSynchronize())