import ctypes, subprocess, pathlib, tempfile from tinygrad.device import Compiled, Compiler, MallocAllocator from tinygrad.helpers import cpu_time_execution, cpu_objdump from tinygrad.renderer.cstyle import ClangRenderer class ClangCompiler(Compiler): def __init__(self, cachekey="compile_clang", args:list[str]|None=None, objdump_tool='objdump'): self.args = ['-march=native'] if args is None else args self.objdump_tool = objdump_tool super().__init__(cachekey) def compile(self, src:str) -> bytes: # TODO: remove file write. sadly clang doesn't like the use of /dev/stdout here with tempfile.NamedTemporaryFile(delete=True) as output_file: subprocess.check_output(['clang', '-shared', *self.args, '-O2', '-Wall', '-Werror', '-x', 'c', '-fPIC', '-ffreestanding', '-nostdlib', '-', '-o', str(output_file.name)], input=src.encode('utf-8')) return pathlib.Path(output_file.name).read_bytes() def disassemble(self, lib:bytes): return cpu_objdump(lib, self.objdump_tool) class ClangProgram: def __init__(self, name:str, lib:bytes): self.name, self.lib = name, lib # write to disk so we can load it with tempfile.NamedTemporaryFile(delete=True) as cached_file_path: pathlib.Path(cached_file_path.name).write_bytes(lib) self.fxn = ctypes.CDLL(str(cached_file_path.name))[name] def __call__(self, *bufs, vals=(), wait=False): return cpu_time_execution(lambda: self.fxn(*bufs, *vals), enable=wait) class ClangDevice(Compiled): def __init__(self, device:str): super().__init__(device, MallocAllocator, ClangRenderer(), ClangCompiler(), ClangProgram)