You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
			
				
					257 lines
				
				14 KiB
			
		
		
			
		
	
	
					257 lines
				
				14 KiB
			| 
											4 months ago
										 | import ctypes, mmap, collections, functools, os
 | ||
| 
											7 months ago
										 | import tinygrad.runtime.autogen.nv_gpu as nv_gpu
 | ||
| 
											4 months ago
										 | from typing import Any
 | ||
| 
											7 months ago
										 | from tinygrad.helpers import to_mv
 | ||
| 
											4 months ago
										 | from test.mockgpu.driver import VirtDriver, VirtFileDesc, VirtFile
 | ||
|  | from test.mockgpu.nv.nvgpu import NVGPU
 | ||
| 
											7 months ago
										 | 
 | ||
|  | MAP_FIXED = 0x10
 | ||
|  | libc = ctypes.CDLL(ctypes.util.find_library("c"))
 | ||
|  | libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long]
 | ||
|  | libc.mmap.restype = ctypes.c_void_p
 | ||
|  | libc.munmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t]
 | ||
|  | libc.munmap.restype = ctypes.c_int
 | ||
|  | 
 | ||
|  | NVSubDevice = collections.namedtuple('NVSubDevice', ['device'])
 | ||
|  | NVUserMode = collections.namedtuple('NVUserMode', ['subdevice'])
 | ||
|  | NVVASpace = collections.namedtuple('NVVASpace', ['device'])
 | ||
|  | NVAllocation = collections.namedtuple('NVAllocation', ['device', 'size'])
 | ||
|  | NVChannelGroup = collections.namedtuple('NVChannelGroup', ['device'])
 | ||
|  | NVContextShare = collections.namedtuple('NVContextShare', ['channel_group'])
 | ||
|  | NVGPFIFO = collections.namedtuple('NVGPFIFO', ['device', 'token'])
 | ||
|  | 
 | ||
|  | class NVCtlFileDesc(VirtFileDesc):
 | ||
|  |   def __init__(self, fd, driver):
 | ||
|  |     super().__init__(fd)
 | ||
|  |     self.driver = driver
 | ||
|  | 
 | ||
|  |   def ioctl(self, fd, request, argp): return self.driver.ctl_ioctl(request, argp)
 | ||
|  |   def mmap(self, start, sz, prot, flags, fd, offset): return libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0)
 | ||
|  | 
 | ||
|  | class NVUVMFileDesc(VirtFileDesc):
 | ||
|  |   def __init__(self, fd, driver):
 | ||
|  |     super().__init__(fd)
 | ||
|  |     self.driver = driver
 | ||
|  | 
 | ||
|  |   def ioctl(self, fd, request, argp): return self.driver.uvm_ioctl(request, argp)
 | ||
|  |   def mmap(self, start, sz, prot, flags, fd, offset): return libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0)
 | ||
|  | 
 | ||
|  | class NVDevFileDesc(VirtFileDesc):
 | ||
|  |   def __init__(self, fd, driver, gpu):
 | ||
|  |     super().__init__(fd)
 | ||
|  |     self.driver, self.gpu = driver, gpu
 | ||
|  |     self._mapping_userland = False
 | ||
|  | 
 | ||
|  |   def ioctl(self, fd, request, argp): return self.driver.dev_ioctl(self.gpu, request, argp)
 | ||
|  |   def mmap(self, start, sz, prot, flags, fd, offset):
 | ||
|  |     start = libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0)
 | ||
| 
											4 months ago
										 |     if self._mapping_userland:
 | ||
|  |       self.driver.track_address(start, start+sz, lambda mv,off: None, lambda mv, off: self.driver._gpu_mmio_write(mv, off, self.gpu))
 | ||
| 
											7 months ago
										 |     return start
 | ||
|  | 
 | ||
|  | class NVDriver(VirtDriver):
 | ||
|  |   def __init__(self, gpus=6):
 | ||
|  |     super().__init__()
 | ||
|  | 
 | ||
|  |     self.tracked_files += [VirtFile('/dev/nvidiactl', functools.partial(NVCtlFileDesc, driver=self)),
 | ||
|  |                            VirtFile('/dev/nvidia-uvm', functools.partial(NVUVMFileDesc, driver=self))]
 | ||
|  | 
 | ||
|  |     self.root_handle = None
 | ||
|  | 
 | ||
|  |     self.gpus = {}
 | ||
| 
											4 months ago
										 |     self.next_fd = (1 << 29)
 | ||
| 
											7 months ago
										 |     self.next_handle = 1
 | ||
|  | 
 | ||
|  |     self.object_by_handle = {}
 | ||
|  |     self.opened_fds = {}
 | ||
|  |     self.next_doorbell = collections.defaultdict(int)
 | ||
|  | 
 | ||
|  |     for i in range(gpus): self._prepare_gpu(i)
 | ||
|  | 
 | ||
|  |   def _alloc_fd(self):
 | ||
|  |     my_fd = self.next_fd
 | ||
|  |     self.next_fd = self.next_fd + 1
 | ||
|  |     return my_fd
 | ||
|  | 
 | ||
|  |   def _alloc_handle(self):
 | ||
|  |     handle = self.next_handle
 | ||
|  |     self.next_handle += 1
 | ||
|  |     return handle
 | ||
|  | 
 | ||
|  |   def _prepare_gpu(self, gpu_id):
 | ||
|  |     self.gpus[gpu_id] = NVGPU(gpu_id)
 | ||
|  |     self.tracked_files += [VirtFile(f'/dev/nvidia{gpu_id}', functools.partial(NVDevFileDesc, driver=self, gpu=self.gpus[gpu_id]))]
 | ||
|  | 
 | ||
|  |   def open(self, name, flags, mode, virtfile):
 | ||
|  |     cl = virtfile.fdcls(self._alloc_fd())
 | ||
|  |     self.opened_fds[cl.fd] = cl
 | ||
|  |     return cl
 | ||
|  | 
 | ||
|  |   def rm_alloc(self, argp):
 | ||
|  |     struct = nv_gpu.NVOS21_PARAMETERS.from_address(argp)
 | ||
| 
											4 months ago
										 |     params_ptr = struct.pAllocParms
 | ||
| 
											7 months ago
										 |     if struct.hClass == nv_gpu.NV01_ROOT_CLIENT: self.root_handle = struct.hObjectNew = self._alloc_handle()
 | ||
|  |     elif struct.hClass == nv_gpu.NV01_DEVICE_0:
 | ||
|  |       params:Any = nv_gpu.NV0080_ALLOC_PARAMETERS.from_address(params_ptr)
 | ||
|  |       assert params.hClientShare == self.root_handle
 | ||
|  |       struct.hObjectNew = self._alloc_handle()
 | ||
|  |       self.object_by_handle[struct.hObjectNew] = self.gpus[params.deviceId]
 | ||
|  |     elif struct.hClass == nv_gpu.NV20_SUBDEVICE_0:
 | ||
|  |       assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU)
 | ||
|  |       struct.hObjectNew = self._alloc_handle()
 | ||
|  |       self.object_by_handle[struct.hObjectNew] = NVSubDevice(self.object_by_handle[struct.hObjectParent])
 | ||
|  |     elif struct.hClass == nv_gpu.TURING_USERMODE_A:
 | ||
|  |       assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVSubDevice)
 | ||
|  |       struct.hObjectNew = self._alloc_handle()
 | ||
|  |       self.object_by_handle[struct.hObjectNew] = NVUserMode(self.object_by_handle[struct.hObjectParent])
 | ||
|  |     elif struct.hClass == nv_gpu.FERMI_VASPACE_A:
 | ||
|  |       assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU)
 | ||
|  |       struct.hObjectNew = self._alloc_handle()
 | ||
|  |       self.object_by_handle[struct.hObjectNew] = NVVASpace(self.object_by_handle[struct.hObjectParent])
 | ||
|  |     elif struct.hClass == nv_gpu.NV1_MEMORY_SYSTEM or struct.hClass == nv_gpu.NV1_MEMORY_USER:
 | ||
|  |       assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU)
 | ||
|  |       params = nv_gpu.NV_MEMORY_ALLOCATION_PARAMS.from_address(params_ptr)
 | ||
|  |       struct.hObjectNew = self._alloc_handle()
 | ||
|  |       self.object_by_handle[struct.hObjectNew] = NVAllocation(self.object_by_handle[struct.hObjectParent], params.size)
 | ||
|  |     elif struct.hClass == nv_gpu.KEPLER_CHANNEL_GROUP_A:
 | ||
|  |       assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPU)
 | ||
|  |       struct.hObjectNew = self._alloc_handle()
 | ||
|  |       self.object_by_handle[struct.hObjectNew] = NVChannelGroup(self.object_by_handle[struct.hObjectParent])
 | ||
|  |     elif struct.hClass == nv_gpu.FERMI_CONTEXT_SHARE_A:
 | ||
|  |       assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVChannelGroup)
 | ||
|  |       struct.hObjectNew = self._alloc_handle()
 | ||
|  |       self.object_by_handle[struct.hObjectNew] = NVContextShare(self.object_by_handle[struct.hObjectParent])
 | ||
|  |     elif struct.hClass == nv_gpu.AMPERE_CHANNEL_GPFIFO_A:
 | ||
|  |       assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVChannelGroup)
 | ||
|  |       struct.hObjectNew = self._alloc_handle()
 | ||
|  |       params = nv_gpu.NV_CHANNELGPFIFO_ALLOCATION_PARAMETERS.from_address(params_ptr)
 | ||
|  |       gpu = self.object_by_handle[struct.hObjectParent].device
 | ||
|  |       gpfifo_token = gpu.add_gpfifo(params.gpFifoOffset, params.gpFifoEntries)
 | ||
|  |       self.object_by_handle[struct.hObjectNew] = NVGPFIFO(gpu, gpfifo_token)
 | ||
|  |     elif struct.hClass == nv_gpu.AMPERE_DMA_COPY_B or struct.hClass == nv_gpu.ADA_COMPUTE_A:
 | ||
|  |       assert struct.hObjectParent in self.object_by_handle and isinstance(self.object_by_handle[struct.hObjectParent], NVGPFIFO)
 | ||
|  |       struct.hObjectNew = self._alloc_handle()
 | ||
|  |     elif struct.hClass == nv_gpu.GT200_DEBUGGER:
 | ||
|  |       struct.hObjectNew = self._alloc_handle()
 | ||
|  |     else: raise RuntimeError(f"Unknown {struct.hClass} to rm_alloc")
 | ||
|  |     return 0
 | ||
|  | 
 | ||
|  |   def rm_control(self, argp):
 | ||
|  |     struct = nv_gpu.NVOS54_PARAMETERS.from_address(argp)
 | ||
| 
											4 months ago
										 |     params_ptr = struct.params
 | ||
| 
											7 months ago
										 |     if struct.cmd == nv_gpu.NV0000_CTRL_CMD_GPU_GET_ID_INFO_V2:
 | ||
|  |       params:Any = nv_gpu.NV0000_CTRL_GPU_GET_ID_INFO_V2_PARAMS.from_address(params_ptr)
 | ||
|  |       params.deviceInstance = params.gpuId # emulate them to be the same
 | ||
|  |     elif struct.cmd == nv_gpu.NV0080_CTRL_CMD_GPU_GET_CLASSLIST_V2 or struct.cmd == nv_gpu.NV0080_CTRL_CMD_GPU_GET_CLASSLIST:
 | ||
|  |       if struct.cmd == nv_gpu.NV0080_CTRL_CMD_GPU_GET_CLASSLIST:
 | ||
|  |         params = nv_gpu.NV0080_CTRL_GPU_GET_CLASSLIST_PARAMS.from_address(params_ptr)
 | ||
|  |       else:
 | ||
|  |         params = nv_gpu.NV0080_CTRL_GPU_GET_CLASSLIST_V2_PARAMS.from_address(params_ptr)
 | ||
|  | 
 | ||
|  |       classes = [50021, 51607, 51648, 50543, 51125, 51125, 51125, 51125, 50529, 36967, 36909, 37105, 33868, 36978, 37095, 37094, 36980, 37014, 49270,
 | ||
|  |                  41068, 41088, 41280, 50025, 96, 112, 115, 125, 20608, 20640, 20539, 20540, 41089, 41092, 50034, 50810, 50811, 50814, 51056, 51057,
 | ||
|  |                  51059, 51069, 51071, 51632, 51639, 51639, 51706, 52019, 222, 50287, 50273, 50031, 50017] # from ada102
 | ||
|  |       params.numClasses = len(classes)
 | ||
|  |       if struct.cmd == nv_gpu.NV0080_CTRL_CMD_GPU_GET_CLASSLIST:
 | ||
|  |         clslist = to_mv(params.classList, params.numClasses * 4).cast('I')
 | ||
|  |         for i,c in enumerate(classes): clslist[i] = c
 | ||
|  |       else:
 | ||
|  |         for i,c in enumerate(classes): params.classList[i] = c
 | ||
|  |     elif struct.cmd == nv_gpu.NV2080_CTRL_CMD_GR_GET_INFO:
 | ||
|  |       info = {nv_gpu.NV2080_CTRL_GR_INFO_INDEX_SM_VERSION: nv_gpu.NV2080_CTRL_GR_INFO_SM_VERSION_3_5,
 | ||
|  |         nv_gpu.NV2080_CTRL_GR_INFO_INDEX_LITTER_NUM_GPCS: 1,
 | ||
|  |         nv_gpu.NV2080_CTRL_GR_INFO_INDEX_LITTER_NUM_TPC_PER_GPC: 1,
 | ||
|  |         nv_gpu.NV2080_CTRL_GR_INFO_INDEX_LITTER_NUM_SM_PER_TPC: 1,
 | ||
|  |         nv_gpu.NV2080_CTRL_GR_INFO_INDEX_MAX_WARPS_PER_SM: 1,
 | ||
|  |       }
 | ||
|  | 
 | ||
|  |       params = nv_gpu.NV2080_CTRL_GR_GET_INFO_PARAMS.from_address(params_ptr)
 | ||
|  |       reqlist = (nv_gpu.NV2080_CTRL_GR_INFO * params.grInfoListSize).from_address(params.grInfoList)
 | ||
|  |       for i in range(params.grInfoListSize): reqlist[i].data = info[reqlist[i].index]
 | ||
|  |     elif struct.cmd == nv_gpu.NV2080_CTRL_CMD_GPU_GET_GID_INFO:
 | ||
|  |       assert struct.hObject in self.object_by_handle and isinstance(self.object_by_handle[struct.hObject], NVSubDevice)
 | ||
|  |       gpu = self.object_by_handle[struct.hObject].device
 | ||
|  |       params = nv_gpu.NV2080_CTRL_GPU_GET_GID_INFO_PARAMS.from_address(params_ptr)
 | ||
| 
											4 months ago
										 |       if params.flags != nv_gpu.NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY: raise RuntimeError("Unknown format")
 | ||
| 
											7 months ago
										 |       bts = gpu.gpu_uuid(sz=params.length)
 | ||
|  |       for i in range(params.length): params.data[i] = bts[i]
 | ||
|  |     elif struct.cmd == nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN:
 | ||
|  |       assert struct.hObject in self.object_by_handle and isinstance(self.object_by_handle[struct.hObject], NVGPFIFO)
 | ||
|  |       params = nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN_PARAMS.from_address(params_ptr)
 | ||
|  |       gpu_fifo = self.object_by_handle[struct.hObject]
 | ||
|  |       params.workSubmitToken = gpu_fifo.token
 | ||
|  |     elif struct.cmd == nv_gpu.NVA06C_CTRL_CMD_GPFIFO_SCHEDULE: pass
 | ||
|  |     elif struct.cmd == nv_gpu.NV2080_CTRL_CMD_PERF_BOOST: pass
 | ||
|  |     elif struct.cmd == nv_gpu.NV2080_CTRL_CMD_FB_FLUSH_GPU_CACHE: pass
 | ||
| 
											4 months ago
										 |     elif struct.cmd == nv_gpu.NV83DE_CTRL_CMD_DEBUG_READ_ALL_SM_ERROR_STATES:
 | ||
|  |       params = nv_gpu.NV83DE_CTRL_DEBUG_READ_ALL_SM_ERROR_STATES_PARAMS.from_address(params_ptr)
 | ||
|  |       params.mmuFault.valid = bool("MOCKGPU_EMU_FAULTADDR" in os.environ)
 | ||
|  |     elif struct.cmd == nv_gpu.NV83DE_CTRL_CMD_DEBUG_READ_MMU_FAULT_INFO:
 | ||
|  |       params = nv_gpu.struct_NV83DE_CTRL_DEBUG_READ_MMU_FAULT_INFO_PARAMS.from_address(params_ptr)
 | ||
|  |       params.count = 1
 | ||
|  |       params.mmuFaultInfoList[0].faultAddress = int(os.environ['MOCKGPU_EMU_FAULTADDR'], base=16)
 | ||
|  |       params.mmuFaultInfoList[0].faultType = 1
 | ||
|  |       params.mmuFaultInfoList[0].accessType = 1
 | ||
| 
											7 months ago
										 |     else: raise RuntimeError(f"Unknown {struct.cmd} to rm_control")
 | ||
|  |     return 0
 | ||
|  | 
 | ||
|  |   def ctl_ioctl(self, req, argp):
 | ||
|  |     nr = req & 0xff
 | ||
|  |     if nr == nv_gpu.NV_ESC_RM_ALLOC: return self.rm_alloc(argp)
 | ||
|  |     elif nr == nv_gpu.NV_ESC_RM_ALLOC_MEMORY: pass
 | ||
|  |     elif nr == nv_gpu.NV_ESC_RM_CONTROL: return self.rm_control(argp)
 | ||
|  |     elif nr == nv_gpu.NV_ESC_RM_MAP_MEMORY:
 | ||
|  |       st:Any = nv_gpu.nv_ioctl_nvos33_parameters_with_fd.from_address(argp)
 | ||
|  |       obj = self.object_by_handle[st.params.hMemory]
 | ||
|  |       if isinstance(obj, NVUserMode):
 | ||
|  |         file = self.opened_fds[st.fd]
 | ||
|  |         assert isinstance(file, NVDevFileDesc)
 | ||
|  |         file._mapping_userland = True
 | ||
|  |     elif nr == nv_gpu.NV_ESC_RM_FREE:
 | ||
|  |       st = nv_gpu.NVOS00_PARAMETERS.from_address(argp)
 | ||
|  |       self.object_by_handle.pop(st.hObjectOld)
 | ||
|  |     elif nr == nv_gpu.NV_ESC_CARD_INFO:
 | ||
|  |       for i,gpu in enumerate(self.gpus.values()):
 | ||
|  |         st = nv_gpu.nv_ioctl_card_info_t.from_address(argp + i * ctypes.sizeof(nv_gpu.nv_ioctl_card_info_t))
 | ||
|  |         st.gpu_id = gpu.gpuid
 | ||
|  |         st.pci_info.device_id = 0x2684
 | ||
|  |         st.valid = True
 | ||
|  |     else: raise RuntimeError(f"Unknown {nr} to nvidiactl")
 | ||
|  |     return 0
 | ||
|  |   def uvm_ioctl(self, nr, argp):
 | ||
|  |     if nr == nv_gpu.UVM_INITIALIZE: pass
 | ||
|  |     elif nr == nv_gpu.UVM_MM_INITIALIZE: pass
 | ||
|  |     elif nr == nv_gpu.UVM_REGISTER_GPU:
 | ||
|  |       st:Any = nv_gpu.UVM_REGISTER_GPU_PARAMS.from_address(argp)
 | ||
|  |       assert any(all(st.gpu_uuid.uuid[i] == gpu.gpu_uuid()[i] for i in range(16)) for gpu in self.gpus.values())
 | ||
|  |     elif nr == nv_gpu.UVM_REGISTER_GPU_VASPACE: pass
 | ||
|  |     elif nr == nv_gpu.UVM_ENABLE_PEER_ACCESS: pass # uvm and shared spaced are setup already, no emulation for now
 | ||
|  |     elif nr == nv_gpu.UVM_CREATE_EXTERNAL_RANGE:
 | ||
|  |       st = nv_gpu.UVM_CREATE_EXTERNAL_RANGE_PARAMS.from_address(argp)
 | ||
|  |       libc.mmap(st.base, st.length, mmap.PROT_READ|mmap.PROT_WRITE, MAP_FIXED|mmap.MAP_SHARED|mmap.MAP_ANONYMOUS, -1, 0)
 | ||
|  |     elif nr == nv_gpu.UVM_MAP_EXTERNAL_ALLOCATION:
 | ||
|  |       st = nv_gpu.UVM_MAP_EXTERNAL_ALLOCATION_PARAMS.from_address(argp)
 | ||
|  |       for gpu_attr_id in range(st.gpuAttributesCount):
 | ||
|  |         gpu = None
 | ||
|  |         for _gpu in self.gpus.values():
 | ||
|  |           if all(st.perGpuAttributes[gpu_attr_id].gpuUuid.uuid[i] == _gpu.gpu_uuid()[i] for i in range(16)):
 | ||
|  |             gpu = _gpu
 | ||
|  |             break
 | ||
|  |         if gpu is None: return -1
 | ||
|  |         gpu.map_range(st.base, st.length)
 | ||
|  |     elif nr == nv_gpu.UVM_REGISTER_CHANNEL: pass
 | ||
|  |     elif nr == nv_gpu.UVM_FREE:
 | ||
|  |       st = nv_gpu.UVM_FREE_PARAMS.from_address(argp)
 | ||
|  |       libc.munmap(st.base, st.length)
 | ||
|  |     else: raise RuntimeError(f"Unknown {nr} to nvidia-uvm")
 | ||
|  |     return 0
 | ||
|  | 
 | ||
|  |   def dev_ioctl(self, dev, req, argp): return 0
 | ||
|  |   def _gpu_mmio_write(self, mv, off, gpu):
 | ||
|  |     any_progress = True
 | ||
|  |     while any_progress:
 | ||
|  |       any_progress = False
 | ||
|  |       for gpu in self.gpus.values():
 | ||
|  |         for q in gpu.queues:
 | ||
| 
											4 months ago
										 |           if q.ctrl.GPGet != q.ctrl.GPPut:
 | ||
| 
											7 months ago
										 |             any_progress |= q.execute()
 |