You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
68 lines
3.7 KiB
68 lines
3.7 KiB
1 month ago
|
from __future__ import annotations
|
||
|
import ctypes, functools
|
||
|
from tinygrad.helpers import init_c_var, from_mv, init_c_struct_t, getenv
|
||
|
from tinygrad.device import Compiled, LRUAllocator, BufferSpec
|
||
|
from tinygrad.runtime.autogen import hip
|
||
|
from tinygrad.runtime.support.compiler_hip import AMDCompiler
|
||
|
from tinygrad.renderer.cstyle import HIPRenderer
|
||
|
if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401 # pylint: disable=unused-import
|
||
|
|
||
|
def check(status):
|
||
|
if status != 0: raise RuntimeError(f"HIP Error {status}, {ctypes.string_at(hip.hipGetErrorString(status)).decode()}")
|
||
|
|
||
|
class HIPProgram:
|
||
|
def __init__(self, dev:HIPDevice, name:str, lib:bytes):
|
||
|
self.dev, self.name, self.lib = dev, name, lib
|
||
|
check(hip.hipSetDevice(self.dev.device_id))
|
||
|
self.module = init_c_var(hip.hipModule_t(), lambda x: check(hip.hipModuleLoadData(ctypes.byref(x), lib)))
|
||
|
self.prg = init_c_var(hip.hipFunction_t(), lambda x: check(hip.hipModuleGetFunction(ctypes.byref(x), self.module, name.encode("utf-8"))))
|
||
|
|
||
|
def __del__(self):
|
||
|
if hasattr(self, 'module'): check(hip.hipModuleUnload(self.module))
|
||
|
|
||
|
def __call__(self, *args, global_size:tuple[int,int,int]=(1,1,1), local_size:tuple[int,int,int]=(1,1,1), vals:tuple[int, ...]=(), wait=False):
|
||
|
check(hip.hipSetDevice(self.dev.device_id))
|
||
|
if not hasattr(self, "vargs"):
|
||
|
self.c_args = init_c_struct_t(tuple([(f'f{i}', hip.hipDeviceptr_t) for i in range(len(args))] +
|
||
|
[(f'v{i}', ctypes.c_int) for i in range(len(vals))]))(*args, *vals)
|
||
|
self.vargs = (ctypes.c_void_p * 5)(1, ctypes.cast(ctypes.byref(self.c_args), ctypes.c_void_p), 2,
|
||
|
ctypes.cast(ctypes.pointer(ctypes.c_size_t(ctypes.sizeof(self.c_args))), ctypes.c_void_p), 3)
|
||
|
|
||
|
for i in range(len(args)): self.c_args.__setattr__(f'f{i}', args[i])
|
||
|
for i in range(len(vals)): self.c_args.__setattr__(f'v{i}', vals[i])
|
||
|
|
||
|
if wait: check(hip.hipEventRecord(self.dev.time_event_st, None))
|
||
|
|
||
|
check(hip.hipModuleLaunchKernel(self.prg, *global_size, *local_size, 0, None, None, self.vargs))
|
||
|
|
||
|
if wait:
|
||
|
check(hip.hipEventRecord(self.dev.time_event_en, None))
|
||
|
check(hip.hipEventSynchronize(self.dev.time_event_en))
|
||
|
check(hip.hipEventElapsedTime(ctypes.byref(ret := ctypes.c_float()), self.dev.time_event_st, self.dev.time_event_en))
|
||
|
return ret.value * 1e-3
|
||
|
|
||
|
class HIPAllocator(LRUAllocator):
|
||
|
def __init__(self, dev:HIPDevice):
|
||
|
self.dev = dev
|
||
|
super().__init__()
|
||
|
def _alloc(self, size:int, options:BufferSpec):
|
||
|
check(hip.hipSetDevice(self.dev.device_id))
|
||
|
return init_c_var(hip.hipDeviceptr_t(), lambda x: check(hip.hipMalloc(ctypes.byref(x), size)))
|
||
|
def _free(self, opaque, options:BufferSpec): check(hip.hipFree(opaque))
|
||
|
def _copyin(self, dest, src: memoryview):
|
||
|
check(hip.hipSetDevice(self.dev.device_id))
|
||
|
check(hip.hipMemcpy(dest, from_mv(src), len(src), hip.hipMemcpyHostToDevice))
|
||
|
def _copyout(self, dest:memoryview, src):
|
||
|
self.dev.synchronize()
|
||
|
check(hip.hipMemcpy(from_mv(dest), src, len(dest), hip.hipMemcpyDeviceToHost))
|
||
|
|
||
|
class HIPDevice(Compiled):
|
||
|
def __init__(self, device:str=""):
|
||
|
self.device_id = int(device.split(":")[1]) if ":" in device else 0
|
||
|
self.arch = init_c_var(hip.hipDeviceProp_t(), lambda x: check(hip.hipGetDeviceProperties(x, self.device_id))).gcnArchName.decode()
|
||
|
self.time_event_st, self.time_event_en = [init_c_var(hip.hipEvent_t(), lambda x: hip.hipEventCreate(ctypes.byref(x), 0)) for _ in range(2)]
|
||
|
super().__init__(device, HIPAllocator(self), HIPRenderer(), AMDCompiler(self.arch), functools.partial(HIPProgram, self))
|
||
|
def synchronize(self):
|
||
|
check(hip.hipSetDevice(self.device_id))
|
||
|
check(hip.hipDeviceSynchronize())
|