#!/bin/bash -e # setup instructions for clang2py if [[ ! $(clang2py -V) ]]; then pushd . cd /tmp sudo apt-get install -y --no-install-recommends clang pip install --upgrade pip setuptools pip install clang==14.0.6 git clone https://github.com/nimlgen/ctypeslib.git cd ctypeslib pip install --user . clang2py -V popd fi BASE=tinygrad/runtime/autogen/ fixup() { sed -i '1s/^/# mypy: ignore-errors\n/' $1 sed -i 's/ *$//' $1 grep FIXME_STUB $1 || true } patch_dlopen() { path=$1; shift name=$1; shift cat <> $BASE/hip.py echo "hipGetDeviceProperties = hipGetDevicePropertiesR0600" >> $BASE/hip.py fixup $BASE/hip.py # we can trust HIP is always at /opt/rocm/lib #sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/hip.py #sed -i "s\ctypes.CDLL('/opt/rocm/lib/libhiprtc.so')\ctypes.CDLL(ctypes.util.find_library('hiprtc'))\g" $BASE/hip.py #sed -i "s\ctypes.CDLL('/opt/rocm/lib/libamdhip64.so')\ctypes.CDLL(ctypes.util.find_library('amdhip64'))\g" $BASE/hip.py sed -i "s\import ctypes\import ctypes, os\g" $BASE/hip.py sed -i "s\'/opt/rocm/\os.getenv('ROCM_PATH', '/opt/rocm/')+'/\g" $BASE/hip.py python3 -c "import tinygrad.runtime.autogen.hip" } generate_comgr() { clang2py /opt/rocm/include/amd_comgr/amd_comgr.h \ --clang-args="-D__HIP_PLATFORM_AMD__ -I/opt/rocm/include -x c++" -o $BASE/comgr.py -l /opt/rocm/lib/libamd_comgr.so fixup $BASE/comgr.py sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/comgr.py patch_dlopen $BASE/comgr.py amd_comgr "'/opt/rocm/lib/libamd_comgr.so'" "os.getenv('ROCM_PATH', '')+'/lib/libamd_comgr.so'" "'/usr/local/lib/libamd_comgr.dylib'" "'/opt/homebrew/lib/libamd_comgr.dylib'" sed -i "s\ctypes.CDLL('/opt/rocm/lib/libamd_comgr.so')\_try_dlopen_amd_comgr()\g" $BASE/comgr.py python3 -c "import tinygrad.runtime.autogen.comgr" } generate_kfd() { clang2py /usr/include/linux/kfd_ioctl.h -o $BASE/kfd.py -k cdefstum fixup $BASE/kfd.py sed -i "s\import ctypes\import ctypes, os\g" $BASE/kfd.py sed -i "s\import fcntl, functools\import functools" $BASE/kfd.py sed -i "s\import ctypes,os\a from tinygrad.runtime.support import HWInterface\g" $BASE/kfd.py sed -i "s\def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, **kwargs):\def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:HWInterface, **kwargs):\g" $BASE/kfd.py sed -i "s\fcntl.ioctl(__fd, (__idir<<30)\__fd.ioctl((__idir<<30)\g" $BASE/kfd.py python3 -c "import tinygrad.runtime.autogen.kfd" } generate_cuda() { clang2py /usr/include/cuda.h -o $BASE/cuda.py -l /usr/lib/x86_64-linux-gnu/libcuda.so sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/cuda.py sed -i "s\ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libcuda.so')\ctypes.CDLL(ctypes.util.find_library('cuda'))\g" $BASE/cuda.py fixup $BASE/cuda.py python3 -c "import tinygrad.runtime.autogen.cuda" } generate_nvrtc() { clang2py /usr/local/cuda/include/nvrtc.h /usr/local/cuda/include/nvJitLink.h -o $BASE/nvrtc.py -l /usr/local/cuda/lib64/libnvrtc.so -l /usr/local/cuda/lib64/libnvJitLink.so sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/nvrtc.py sed -i "s\ctypes.CDLL('/usr/local/cuda/lib64/libnvrtc.so')\ctypes.CDLL(ctypes.util.find_library('nvrtc'))\g" $BASE/nvrtc.py sed -i "s\ctypes.CDLL('/usr/local/cuda/lib64/libnvJitLink.so')\ctypes.CDLL(ctypes.util.find_library('nvJitLink'))\g" $BASE/nvrtc.py fixup $BASE/nvrtc.py python3 -c "import tinygrad.runtime.autogen.nvrtc" } generate_nv() { NVKERN_COMMIT_HASH=d6b75a34094b0f56c2ccadf14e5d0bd515ed1ab6 NVKERN_SRC=/tmp/open-gpu-kernel-modules-$NVKERN_COMMIT_HASH if [ ! -d "$NVKERN_SRC" ]; then git clone https://github.com/tinygrad/open-gpu-kernel-modules $NVKERN_SRC pushd . cd $NVKERN_SRC git reset --hard $NVKERN_COMMIT_HASH popd fi clang2py -k cdefstum \ extra/nv_gpu_driver/clc6c0qmd.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/class/cl0080.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/class/cl2080_notification.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc56f.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc56f.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc56f.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/class/cl83de.h \ $NVKERN_SRC/src/nvidia/generated/g_allclasses.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc6c0.h \ $NVKERN_SRC/kernel-open/nvidia-uvm/clc6b5.h \ $NVKERN_SRC/kernel-open/nvidia-uvm/uvm_ioctl.h \ $NVKERN_SRC/kernel-open/nvidia-uvm/uvm_linux_ioctl.h \ $NVKERN_SRC/kernel-open/nvidia-uvm/hwref/ampere/ga100/dev_fault.h \ $NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include/nv_escape.h \ $NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include/nv-ioctl.h \ $NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include/nv-ioctl-numbers.h \ $NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include/nv-ioctl-numa.h \ $NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include/nv-unix-nvos-params-wrappers.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/alloc/alloc_channel.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/nvos.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrl0000/*.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrl0080/*.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrl2080/*.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrl83de/*.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrlc36f.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrlcb33.h \ $NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrla06c.h \ --clang-args="-include $NVKERN_SRC/src/common/sdk/nvidia/inc/nvtypes.h -I$NVKERN_SRC/src/common/inc -I$NVKERN_SRC/kernel-open/nvidia-uvm -I$NVKERN_SRC/kernel-open/common/inc -I$NVKERN_SRC/src/common/sdk/nvidia/inc -I$NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include -I$NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl" \ -o $BASE/nv_gpu.py fixup $BASE/nv_gpu.py sed -i "s\(0000000001)\1\g" $BASE/nv_gpu.py sed -i "s\import ctypes\import ctypes, os\g" $BASE/nv_gpu.py sed -i 's/#\?\s\([A-Za-z0-9_]\+\) = MW ( \([0-9]\+\) : \([0-9]\+\) )/\1 = (\2 , \3)/' $BASE/nv_gpu.py # NVC6C0_QMDV03_00 processing sed -i 's/#\sdef NVC6C0_QMD\([A-Za-z0-9_()]\+\):/def NVC6C0_QMD\1:/' $BASE/nv_gpu.py sed -i 's/#\s*return MW(\([0-9i()*+]\+\):\([0-9i()*+]\+\))/ return (\1 , \2)/' $BASE/nv_gpu.py sed -i 's/#\?\s*\(.*\)\s*=\s*\(NV\)\?BIT\(32\)\?\s*(\s*\([0-9]\+\)\s*)/\1 = (1 << \4)/' $BASE/nv_gpu.py # name = BIT(x) -> name = (1 << x) sed -i "s/UVM_\([A-Za-z0-9_]\+\) = \['i', '(', '\([0-9]\+\)', ')'\]/UVM_\1 = \2/" $BASE/nv_gpu.py # UVM_name = ['i', '(', '', ')'] -> UVM_name = # Parse status codes sed -n '1i\ nv_status_codes = {} /^NV_STATUS_CODE/ { s/^NV_STATUS_CODE(\([^,]*\), *\([^,]*\), *"\([^"]*\)") *.*$/\1 = \2\nnv_status_codes[\1] = "\3"/; p }' $NVKERN_SRC/src/common/sdk/nvidia/inc/nvstatuscodes.h >> $BASE/nv_gpu.py python3 -c "import tinygrad.runtime.autogen.nv_gpu" } generate_amd() { # clang2py broken when pass -x c++ to prev headers clang2py -k cdefstum \ extra/hip_gpu_driver/sdma_registers.h \ extra/hip_gpu_driver/nvd.h \ extra/hip_gpu_driver/kfd_pm4_headers_ai.h \ extra/hip_gpu_driver/soc21_enum.h \ extra/hip_gpu_driver/sdma_v6_0_0_pkt_open.h \ extra/hip_gpu_driver/gc_11_0_0_offset.h \ extra/hip_gpu_driver/gc_10_3_0_offset.h \ extra/hip_gpu_driver/sienna_cichlid_ip_offset.h \ --clang-args="-I/opt/rocm/include -x c++" \ -o $BASE/amd_gpu.py fixup $BASE/amd_gpu.py sed -i "s\import ctypes\import ctypes, os\g" $BASE/amd_gpu.py python3 -c "import tinygrad.runtime.autogen.amd_gpu" } generate_hsa() { clang2py \ /opt/rocm/include/hsa/hsa.h \ /opt/rocm/include/hsa/hsa_ext_amd.h \ /opt/rocm/include/hsa/amd_hsa_signal.h \ /opt/rocm/include/hsa/amd_hsa_queue.h \ /opt/rocm/include/hsa/amd_hsa_kernel_code.h \ /opt/rocm/include/hsa/hsa_ext_finalize.h /opt/rocm/include/hsa/hsa_ext_image.h \ /opt/rocm/include/hsa/hsa_ven_amd_aqlprofile.h \ --clang-args="-I/opt/rocm/include" \ -o $BASE/hsa.py -l /opt/rocm/lib/libhsa-runtime64.so fixup $BASE/hsa.py sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/hsa.py sed -i "s\ctypes.CDLL('/opt/rocm/lib/libhsa-runtime64.so')\ctypes.CDLL(os.getenv('ROCM_PATH')+'/lib/libhsa-runtime64.so' if os.getenv('ROCM_PATH') else ctypes.util.find_library('hsa-runtime64'))\g" $BASE/hsa.py python3 -c "import tinygrad.runtime.autogen.hsa" } generate_io_uring() { clang2py -k cdefstum \ /usr/include/liburing.h \ /usr/include/linux/io_uring.h \ -o $BASE/io_uring.py sed -r '/^#define __NR_io_uring/ s/^#define __(NR_io_uring[^ ]+) (.*)$/\1 = \2/; t; d' /usr/include/asm-generic/unistd.h >> $BASE/io_uring.py # io_uring syscalls numbers fixup $BASE/io_uring.py } generate_libc() { clang2py -k cdefstum \ $(dpkg -L libc6-dev | grep sys/mman.h) \ $(dpkg -L libc6-dev | grep sys/syscall.h) \ /usr/include/string.h \ /usr/include/elf.h \ /usr/include/unistd.h \ /usr/include/asm-generic/mman-common.h \ -o $BASE/libc.py sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/libc.py sed -i "s\FIXME_STUB\libc\g" $BASE/libc.py sed -i "s\FunctionFactoryStub()\None if (libc_path := ctypes.util.find_library('c')) is None else ctypes.CDLL(libc_path)\g" $BASE/libc.py fixup $BASE/libc.py } generate_llvm() { INC="$(llvm-config-14 --includedir)" clang2py -k cdefstum \ $(find "$INC/llvm-c/" -type f -name '*.h' | sort) \ "$INC/llvm/Config/Targets.def" \ "$INC/llvm/Config/AsmPrinters.def" \ "$INC/llvm/Config/AsmParsers.def" \ "$INC/llvm/Config/Disassemblers.def" \ --clang-args="$(llvm-config-14 --cflags)" \ -o "$BASE/llvm.py" sed -i "s\import ctypes\import ctypes, tinygrad.runtime.support.llvm as llvm_support\g" "$BASE/llvm.py" sed -i "s\FIXME_STUB\llvm\g" "$BASE/llvm.py" sed -i "s\FunctionFactoryStub()\ctypes.CDLL(llvm_support.LLVM_PATH)\g" "$BASE/llvm.py" fixup "$BASE/llvm.py" } generate_kgsl() { clang2py extra/qcom_gpu_driver/msm_kgsl.h -o $BASE/kgsl.py -k cdefstum fixup $BASE/kgsl.py sed -i "s\import ctypes\import ctypes, os\g" $BASE/kgsl.py sed -nE 's/#define ([A-Za-z0-9_]+)_SHIFT\s*[^\S\r\n]*[0-9]*$/def \1(val): return (val << \1_SHIFT) \& \1_MASK/p' extra/qcom_gpu_driver/msm_kgsl.h >> $BASE/kgsl.py sed -i "s\fcntl.ioctl(__fd, (__idir<<30)\__fd.ioctl((__idir<<30)\g" $BASE/kgsl.py python3 -c "import tinygrad.runtime.autogen.kgsl" } generate_adreno() { clang2py extra/qcom_gpu_driver/a6xx.xml.h -o $BASE/adreno.py -k cestum sed -nE 's/#define ([A-Za-z0-9_]+)__SHIFT\s*[^\S\r\n]*[0-9]*$/def \1(val): return (val << \1__SHIFT) \& \1__MASK/p' extra/qcom_gpu_driver/a6xx.xml.h >> $BASE/adreno.py fixup $BASE/adreno.py sed -i "s\import ctypes\import ctypes, os\g" $BASE/adreno.py python3 -c "import tinygrad.runtime.autogen.adreno" } generate_qcom() { clang2py -k cdefstum \ extra/dsp/include/ion.h \ extra/dsp/include/msm_ion.h \ extra/dsp/include/adsprpc_shared.h \ extra/dsp/include/remote_default.h \ extra/dsp/include/apps_std.h \ -o $BASE/qcom_dsp.py fixup $BASE/qcom_dsp.py python3 -c "import tinygrad.runtime.autogen.qcom_dsp" } generate_pci() { clang2py -k cdefstum \ /usr/include/linux/pci_regs.h \ -o $BASE/pci.py fixup $BASE/pci.py } generate_vfio() { clang2py -k cdefstum \ /usr/include/linux/vfio.h \ -o $BASE/vfio.py fixup $BASE/vfio.py sed -i "s\import ctypes\import ctypes, os\g" $BASE/vfio.py sed -i "s\import fcntl, functools\import functools" $BASE/vfio.py sed -i "s\import ctypes,os\a from tinygrad.runtime.support import HWInterface\g" $BASE/vfio.py sed -i "s\fcntl.ioctl(__fd, (__idir<<30)\return __fd.ioctl((__idir<<30)\g" $BASE/vfio.py } generate_am() { AMKERN_COMMIT_HASH=ceb12c04e2b5b53ec0779362831f5ee40c4921e4 AMKERN_SRC=/tmp/ROCK-Kernel-Driver-$AMKERN_COMMIT_HASH if [ ! -d "$AMKERN_SRC" ]; then git clone https://github.com/ROCm/ROCK-Kernel-Driver $AMKERN_SRC --depth 1 fi AMKERN_AMD=$AMKERN_SRC/drivers/gpu/drm/amd/ AMKERN_INC=$AMKERN_AMD/include/ clang2py -k cdefstum \ extra/amdpci/headers/v11_structs.h \ extra/amdpci/headers/v12_structs.h \ extra/amdpci/headers/amdgpu_vm.h \ extra/amdpci/headers/discovery.h \ extra/amdpci/headers/amdgpu_ucode.h \ extra/amdpci/headers/psp_gfx_if.h \ extra/amdpci/headers/amdgpu_psp.h \ extra/amdpci/headers/amdgpu_irq.h \ extra/amdpci/headers/amdgpu_doorbell.h \ $AMKERN_INC/soc15_ih_clientid.h \ --clang-args="-include stdint.h" \ -o $BASE/am/am.py fixup $BASE/am/am.py sed -i "s\(int64_t)\ \g" $BASE/am/am.py sed -i "s\AMDGPU_PTE_MTYPE_VG10(2)\AMDGPU_PTE_MTYPE_VG10(0, 2)\g" $BASE/am/am.py # incorrect parsing (TODO: remove when clang2py is gone). clang2py -k cdefstum \ $AMKERN_AMD/amdkfd/kfd_pm4_headers_ai.h \ $AMKERN_AMD/amdgpu/soc15d.h \ -o $BASE/am/pm4_soc15.py fixup $BASE/am/pm4_soc15.py clang2py -k cdefstum \ $AMKERN_AMD/amdkfd/kfd_pm4_headers_ai.h \ $AMKERN_AMD/amdgpu/nvd.h \ -o $BASE/am/pm4_nv.py fixup $BASE/am/pm4_nv.py clang2py -k cdefstum \ $AMKERN_INC/vega10_enum.h \ -o $BASE/am/vega10.py fixup $BASE/am/vega10.py clang2py -k cdefstum \ $AMKERN_INC/navi10_enum.h \ -o $BASE/am/navi10.py fixup $BASE/am/navi10.py clang2py -k cdefstum \ $AMKERN_INC/soc21_enum.h \ -o $BASE/am/soc21.py fixup $BASE/am/soc21.py clang2py -k cdefstum \ $AMKERN_INC/soc24_enum.h \ -o $BASE/am/soc24.py fixup $BASE/am/soc24.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/mp/mp_13_0_0_offset.h \ $AMKERN_INC/asic_reg/mp/mp_13_0_0_sh_mask.h \ -o $BASE/am/mp_13_0_0.py fixup $BASE/am/mp_13_0_0.py # 14_0_3 reuses 14_0_2 clang2py -k cdefstum \ $AMKERN_INC/asic_reg/mp/mp_14_0_2_offset.h \ $AMKERN_INC/asic_reg/mp/mp_14_0_2_sh_mask.h \ -o $BASE/am/mp_14_0_3.py fixup $BASE/am/mp_14_0_3.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/mp/mp_11_0_offset.h \ $AMKERN_INC/asic_reg/mp/mp_11_0_sh_mask.h \ -o $BASE/am/mp_11_0.py fixup $BASE/am/mp_11_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/gc/gc_9_4_3_offset.h \ $AMKERN_INC/asic_reg/gc/gc_9_4_3_sh_mask.h \ extra/amdpci/overlay/gc_9_4_3.h \ -o $BASE/am/gc_9_4_3.py fixup $BASE/am/gc_9_4_3.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/gc/gc_10_3_0_offset.h \ $AMKERN_INC/asic_reg/gc/gc_10_3_0_sh_mask.h \ -o $BASE/am/gc_10_3_0.py fixup $BASE/am/gc_10_3_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/gc/gc_11_0_0_offset.h \ $AMKERN_INC/asic_reg/gc/gc_11_0_0_sh_mask.h \ -o $BASE/am/gc_11_0_0.py fixup $BASE/am/gc_11_0_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/gc/gc_12_0_0_offset.h \ $AMKERN_INC/asic_reg/gc/gc_12_0_0_sh_mask.h \ -o $BASE/am/gc_12_0_0.py fixup $BASE/am/gc_12_0_0.py clang2py -k cdefstum \ extra/hip_gpu_driver/sdma_registers.h \ $AMKERN_AMD/amdgpu/vega10_sdma_pkt_open.h \ --clang-args="-I/opt/rocm/include -x c++" \ -o $BASE/am/sdma_4_0_0.py fixup $BASE/am/sdma_4_0_0.py clang2py -k cdefstum \ extra/hip_gpu_driver/sdma_registers.h \ $AMKERN_AMD/amdgpu/navi10_sdma_pkt_open.h \ --clang-args="-I/opt/rocm/include -x c++" \ -o $BASE/am/sdma_5_0_0.py fixup $BASE/am/sdma_5_0_0.py clang2py -k cdefstum \ extra/hip_gpu_driver/sdma_registers.h \ $AMKERN_AMD/amdgpu/sdma_v6_0_0_pkt_open.h \ --clang-args="-I/opt/rocm/include -x c++" \ -o $BASE/am/sdma_6_0_0.py fixup $BASE/am/sdma_6_0_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/mmhub/mmhub_3_0_0_offset.h \ $AMKERN_INC/asic_reg/mmhub/mmhub_3_0_0_sh_mask.h \ -o $BASE/am/mmhub_3_0_0.py fixup $BASE/am/mmhub_3_0_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/mmhub/mmhub_3_0_2_offset.h \ $AMKERN_INC/asic_reg/mmhub/mmhub_3_0_2_sh_mask.h \ -o $BASE/am/mmhub_3_0_2.py fixup $BASE/am/mmhub_3_0_2.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/nbio/nbio_2_3_offset.h \ $AMKERN_INC/asic_reg/nbio/nbio_2_3_sh_mask.h \ -o $BASE/am/nbio_2_3_0.py fixup $BASE/am/nbio_2_3_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/mmhub/mmhub_4_1_0_offset.h \ $AMKERN_INC/asic_reg/mmhub/mmhub_4_1_0_sh_mask.h \ -o $BASE/am/mmhub_4_1_0.py fixup $BASE/am/mmhub_4_1_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/nbio/nbio_4_3_0_offset.h \ $AMKERN_INC/asic_reg/nbio/nbio_4_3_0_sh_mask.h \ -o $BASE/am/nbio_4_3_0.py fixup $BASE/am/nbio_4_3_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/nbif/nbif_6_3_1_offset.h \ $AMKERN_INC/asic_reg/nbif/nbif_6_3_1_sh_mask.h \ -o $BASE/am/nbif_6_3_1.py fixup $BASE/am/nbif_6_3_1.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/nbio/nbio_7_9_0_offset.h \ $AMKERN_INC/asic_reg/nbio/nbio_7_9_0_sh_mask.h \ -o $BASE/am/nbio_7_9_0.py fixup $BASE/am/nbio_7_9_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/nbio/nbio_7_11_0_offset.h \ $AMKERN_INC/asic_reg/nbio/nbio_7_11_0_sh_mask.h \ -o $BASE/am/nbio_7_11_0.py fixup $BASE/am/nbio_7_11_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/oss/osssys_6_0_0_offset.h \ $AMKERN_INC/asic_reg/oss/osssys_6_0_0_sh_mask.h \ -o $BASE/am/osssys_6_0_0.py fixup $BASE/am/osssys_6_0_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/oss/osssys_7_0_0_offset.h \ $AMKERN_INC/asic_reg/oss/osssys_7_0_0_sh_mask.h \ -o $BASE/am/osssys_7_0_0.py fixup $BASE/am/osssys_7_0_0.py clang2py -k cdefstum \ $AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h \ $AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h \ extra/amdpci/headers/amdgpu_smu.h \ -o $BASE/am/smu_v13_0_0.py fixup $BASE/am/smu_v13_0_0.py clang2py -k cdefstum \ $AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h \ $AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h \ $AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h \ $AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h \ extra/amdpci/headers/amdgpu_smu.h \ --clang-args="-include stdint.h" \ -o $BASE/am/smu_v14_0_3.py fixup $BASE/am/smu_v14_0_3.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/hdp/hdp_6_0_0_offset.h \ $AMKERN_INC/asic_reg/hdp/hdp_6_0_0_sh_mask.h \ -o $BASE/am/hdp_6_0_0.py fixup $BASE/am/hdp_6_0_0.py clang2py -k cdefstum \ $AMKERN_INC/asic_reg/hdp/hdp_7_0_0_offset.h \ $AMKERN_INC/asic_reg/hdp/hdp_7_0_0_sh_mask.h \ -o $BASE/am/hdp_7_0_0.py fixup $BASE/am/hdp_7_0_0.py } generate_sqtt() { clang2py -k cdefstum \ extra/sqtt/sqtt.h \ -o $BASE/sqtt.py fixup $BASE/sqtt.py sed -i "s\import ctypes\import ctypes, os\g" $BASE/sqtt.py python3 -c "import tinygrad.runtime.autogen.sqtt" } generate_webgpu() { clang2py extra/webgpu/webgpu.h -o $BASE/webgpu.py fixup $BASE/webgpu.py sed -i "s/FIXME_STUB/webgpu/g" "$BASE/webgpu.py" sed -i "s/FunctionFactoryStub()/ctypes.CDLL(webgpu_support.WEBGPU_PATH)/g" "$BASE/webgpu.py" sed -i "s/import ctypes/import ctypes, tinygrad.runtime.support.webgpu as webgpu_support/g" "$BASE/webgpu.py" python3 -c "import tinygrad.runtime.autogen.webgpu" } if [ "$1" == "opencl" ]; then generate_opencl elif [ "$1" == "hip" ]; then generate_hip elif [ "$1" == "comgr" ]; then generate_comgr elif [ "$1" == "cuda" ]; then generate_cuda elif [ "$1" == "nvrtc" ]; then generate_nvrtc elif [ "$1" == "hsa" ]; then generate_hsa elif [ "$1" == "kfd" ]; then generate_kfd elif [ "$1" == "nv" ]; then generate_nv elif [ "$1" == "amd" ]; then generate_amd elif [ "$1" == "am" ]; then generate_am elif [ "$1" == "sqtt" ]; then generate_sqtt elif [ "$1" == "qcom" ]; then generate_qcom elif [ "$1" == "io_uring" ]; then generate_io_uring elif [ "$1" == "libc" ]; then generate_libc elif [ "$1" == "llvm" ]; then generate_llvm elif [ "$1" == "kgsl" ]; then generate_kgsl elif [ "$1" == "adreno" ]; then generate_adreno elif [ "$1" == "pci" ]; then generate_pci elif [ "$1" == "vfio" ]; then generate_vfio elif [ "$1" == "webgpu" ]; then generate_webgpu elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_nvrtc; generate_hsa; generate_kfd; generate_nv; generate_amd; generate_io_uring; generate_libc; generate_am; generate_webgpu else echo "usage: $0 " fi