You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
63 lines
3.6 KiB
63 lines
3.6 KiB
import ctypes, platform
|
|
from tinygrad.device import Compiled, Compiler, MallocAllocator, CPUProgram
|
|
from tinygrad.helpers import OSX, getenv, capstone_flatdump, DEBUG
|
|
from tinygrad.renderer.llvmir import LLVMRenderer
|
|
import tinygrad.runtime.autogen.llvm as llvm
|
|
from tinygrad.runtime.support.elf import jit_loader
|
|
|
|
def cerr(): return ctypes.pointer(ctypes.pointer(ctypes.c_char()))
|
|
|
|
def expect(x, err, ret=None):
|
|
if x: raise RuntimeError(llvm.string_cast(err.contents) if not isinstance(err, str) else err)
|
|
return ret
|
|
|
|
class LLVMCompiler(Compiler):
|
|
jit = True
|
|
target_arch = {'arm64': 'AArch64', 'aarch64': 'AArch64', 'x86_64': 'X86', 'AMD64': 'X86'}[platform.machine()]
|
|
def __init__(self, processor:str, feats:str):
|
|
for component in ['Target', 'TargetInfo', 'TargetMC', 'AsmParser', 'AsmPrinter']: getattr(llvm, f'LLVMInitialize{self.target_arch}{component}')()
|
|
|
|
triple = {'AArch64': b'aarch64-none-unknown-elf', 'X86': b'x86_64-none-unknown-elf', 'AMDGPU': b'amdgcn-amd-amdhsa'}[self.target_arch]
|
|
target = expect(llvm.LLVMGetTargetFromTriple(triple, ctypes.pointer(tgt:=llvm.LLVMTargetRef()), err:=cerr()), err, tgt)
|
|
if DEBUG >= 2: print(f"LLVM init for {processor!r} with {feats!r}")
|
|
self.target_machine = llvm.LLVMCreateTargetMachine(target, triple, processor.encode(), feats.encode(),
|
|
llvm.LLVMCodeGenLevelDefault, llvm.LLVMRelocPIC, llvm.LLVMCodeModelDefault)
|
|
|
|
self.pbo = llvm.LLVMCreatePassBuilderOptions()
|
|
if (opt:=bool(getenv("LLVMOPT", "1"))):
|
|
self.passes = b'default<O2>'
|
|
llvm.LLVMPassBuilderOptionsSetLoopUnrolling(self.pbo, True)
|
|
llvm.LLVMPassBuilderOptionsSetLoopVectorization(self.pbo, True)
|
|
llvm.LLVMPassBuilderOptionsSetSLPVectorization(self.pbo, True)
|
|
llvm.LLVMPassBuilderOptionsSetVerifyEach(self.pbo, True)
|
|
else:
|
|
self.passes = b'default<O0>'
|
|
|
|
super().__init__(f"compile_llvm_{self.target_arch}{'_jit' if self.jit else ''}{'_opt' if opt else ''}")
|
|
|
|
def __del__(self): llvm.LLVMDisposePassBuilderOptions(self.pbo)
|
|
|
|
def compile(self, src:str) -> bytes:
|
|
src_buf = llvm.LLVMCreateMemoryBufferWithMemoryRangeCopy(ctypes.create_string_buffer(src_bytes:=src.encode()), len(src_bytes), b'src')
|
|
mod = expect(llvm.LLVMParseIRInContext(llvm.LLVMGetGlobalContext(), src_buf, ctypes.pointer(m:=llvm.LLVMModuleRef()), err:=cerr()), err, m)
|
|
expect(llvm.LLVMVerifyModule(mod, llvm.LLVMReturnStatusAction, err:=cerr()), err)
|
|
expect(llvm.LLVMRunPasses(mod, self.passes, self.target_machine, self.pbo), 'failed to run passes')
|
|
if DEBUG >= 7: print(ctypes.string_at(llvm.LLVMPrintModuleToString(mod)).decode())
|
|
obj_buf = expect(llvm.LLVMTargetMachineEmitToMemoryBuffer(self.target_machine, mod, llvm.LLVMObjectFile, err:=cerr(),
|
|
ctypes.pointer(buf:=llvm.LLVMMemoryBufferRef())), err, buf)
|
|
llvm.LLVMDisposeModule(mod)
|
|
obj = ctypes.string_at(llvm.LLVMGetBufferStart(obj_buf), llvm.LLVMGetBufferSize(obj_buf))
|
|
llvm.LLVMDisposeMemoryBuffer(obj_buf)
|
|
return jit_loader(obj) if self.jit else obj
|
|
|
|
def disassemble(self, lib:bytes): capstone_flatdump(lib)
|
|
|
|
class HostLLVMCompiler(LLVMCompiler):
|
|
def __init__(self):
|
|
# +reserve-x18 here does the same thing as -ffixed-x18 in ops_cpu.py, see comments there for why it's needed on arm osx
|
|
cpu, feats = ctypes.string_at(llvm.LLVMGetHostCPUName()), (b'+reserve-x18,' if OSX else b'') + ctypes.string_at(llvm.LLVMGetHostCPUFeatures())
|
|
super().__init__(cpu.decode(), feats.decode())
|
|
|
|
class LLVMDevice(Compiled):
|
|
def __init__(self, device:str):
|
|
super().__init__(device, MallocAllocator, LLVMRenderer(), HostLLVMCompiler(), CPUProgram)
|
|
|