openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

485 lines
29 KiB

from __future__ import annotations
import ctypes, time, array, struct, itertools, dataclasses
from tinygrad.runtime.autogen.nv import nv
from tinygrad.helpers import to_mv, lo32, hi32, DEBUG, round_up, round_down, mv_address, fetch
from tinygrad.runtime.support.system import System
from tinygrad.runtime.support.elf import elf_loader
from tinygrad.runtime.autogen import nv_gpu
@dataclasses.dataclass(frozen=True)
class GRBufDesc: size:int; v:int; p:int; lc:int=0 # noqa: E702
class NV_IP:
def __init__(self, nvdev): self.nvdev = nvdev
def init_sw(self): pass # Prepare sw/allocations for this IP
def init_hw(self): pass # Initialize hw for this IP
class NVRpcQueue:
def __init__(self, gsp:NV_GSP, va:int, completion_q_va:int|None=None):
self.tx = nv.msgqTxHeader.from_address(va)
while self.tx.entryOff != 0x1000: pass # wait for the tx header to be initialized
if completion_q_va is not None: self.rx = nv.msgqRxHeader.from_address(completion_q_va + nv.msgqTxHeader.from_address(completion_q_va).rxHdrOff)
self.gsp, self.va, self.queue_va, self.seq = gsp, va, va + self.tx.entryOff, 0
self.queue_mv = to_mv(self.queue_va, self.tx.msgSize * self.tx.msgCount)
def _checksum(self, data):
if (pad_len:=(-len(data)) % 8): data += b'\x00' * pad_len
checksum = 0
for offset in range(0, len(data), 8): checksum ^= struct.unpack_from('Q', data, offset)[0]
return ((checksum >> 32) & 0xFFFFFFFF) ^ (checksum & 0xFFFFFFFF)
def send_rpc(self, func, msg, wait=False):
header = nv.rpc_message_header_v(signature=nv.NV_VGPU_MSG_SIGNATURE_VALID, rpc_result=nv.NV_VGPU_MSG_RESULT_RPC_PENDING,
rpc_result_private=nv.NV_VGPU_MSG_RESULT_RPC_PENDING, header_version=(3<<24), function=func, length=len(msg) + 0x20)
msg = bytes(header) + msg
phdr = nv.GSP_MSG_QUEUE_ELEMENT(elemCount=round_up(len(msg), self.tx.msgSize) // self.tx.msgSize, seqNum=self.seq)
phdr.checkSum = self._checksum(bytes(phdr) + msg)
msg = bytes(phdr) + msg
off = self.tx.writePtr * self.tx.msgSize
self.queue_mv[off:off+len(msg)] = msg
self.tx.writePtr = (self.tx.writePtr + round_up(len(msg), self.tx.msgSize) // self.tx.msgSize) % self.tx.msgCount
System.memory_barrier()
self.seq += 1
self.gsp.nvdev.NV_PGSP_QUEUE_HEAD[0].write(0x0)
def wait_resp(self, cmd) -> memoryview:
while True:
System.memory_barrier()
if self.rx.readPtr == self.tx.writePtr: continue
off = self.rx.readPtr * self.tx.msgSize
hdr = nv.rpc_message_header_v.from_address(self.queue_va + off + 0x30)
msg = self.queue_mv[off + 0x50 : off + 0x50 + hdr.length]
# Handling special functions
if hdr.function == nv.NV_VGPU_MSG_EVENT_GSP_RUN_CPU_SEQUENCER: self.gsp.run_cpu_seq(msg)
elif hdr.function == nv.NV_VGPU_MSG_EVENT_OS_ERROR_LOG: print(f"GSP LOG: {msg[12:].tobytes().rstrip(bytes([0])).decode('utf-8')}")
# Update the read pointer
self.rx.readPtr = (self.rx.readPtr + round_up(hdr.length, self.tx.msgSize) // self.tx.msgSize) % self.tx.msgCount
System.memory_barrier()
if DEBUG >= 2:
rpc_names = {**nv.c__Ea_NV_VGPU_MSG_FUNCTION_NOP__enumvalues, **nv.c__Ea_NV_VGPU_MSG_EVENT_FIRST_EVENT__enumvalues}
print(f"nv {self.gsp.nvdev.devfmt}: in RPC: {rpc_names.get(hdr.function, f'ev:{hdr.function:x}')}, res:{hdr.rpc_result:#x}")
if hdr.rpc_result != 0: raise RuntimeError(f"RPC call {hdr.function} failed with result {hdr.rpc_result}")
if hdr.function == cmd: return msg
class NV_FLCN(NV_IP):
def init_sw(self):
self.nvdev.include("src/common/inc/swref/published/ampere/ga102/dev_gsp.h")
self.nvdev.include("src/common/inc/swref/published/ampere/ga102/dev_falcon_v4.h")
self.nvdev.include("src/common/inc/swref/published/ampere/ga102/dev_falcon_v4_addendum.h")
self.nvdev.include("src/common/inc/swref/published/ampere/ga102/dev_riscv_pri.h")
self.nvdev.include("src/common/inc/swref/published/ampere/ga102/dev_fbif_v4.h")
self.nvdev.include("src/common/inc/swref/published/ampere/ga102/dev_falcon_second_pri.h")
self.nvdev.include("src/common/inc/swref/published/ampere/ga102/dev_sec_pri.h")
self.nvdev.include("src/common/inc/swref/published/turing/tu102/dev_bus.h")
self.prep_ucode()
self.prep_booter()
def prep_ucode(self):
expansion_rom_off, bit_addr = 0x14e00, 0x1b0
vbios_bytes = bytes(array.array('I', self.nvdev.mmio[0x00300000//4:(0x00300000+0x98e00)//4]))
bit_header = nv.BIT_HEADER_V1_00.from_buffer_copy(vbios_bytes[bit_addr:bit_addr + ctypes.sizeof(nv.BIT_HEADER_V1_00)])
assert bit_header.Signature == 0x00544942, f"Invalid BIT header signature {hex(bit_header.Signature)}"
for i in range(bit_header.TokenEntries):
bit = nv.BIT_TOKEN_V1_00.from_buffer_copy(vbios_bytes[bit_addr + bit_header.HeaderSize + i * bit_header.TokenSize:])
if bit.TokenId != nv.BIT_TOKEN_FALCON_DATA or bit.DataVersion != 2 or bit.DataSize < nv.BIT_DATA_FALCON_DATA_V2_SIZE_4: continue
falcon_data = nv.BIT_DATA_FALCON_DATA_V2.from_buffer_copy(vbios_bytes[bit.DataPtr & 0xffff:])
ucode_hdr = nv.FALCON_UCODE_TABLE_HDR_V1.from_buffer_copy(vbios_bytes[(table_ptr:=expansion_rom_off + falcon_data.FalconUcodeTablePtr):])
for j in range(ucode_hdr.EntryCount):
ucode_entry = nv.FALCON_UCODE_TABLE_ENTRY_V1.from_buffer_copy(vbios_bytes[table_ptr + ucode_hdr.HeaderSize + j * ucode_hdr.EntrySize:])
if ucode_entry.ApplicationID != nv.FALCON_UCODE_ENTRY_APPID_FWSEC_PROD: continue
ucode_desc_hdr = nv.FALCON_UCODE_DESC_HEADER.from_buffer_copy(vbios_bytes[expansion_rom_off + ucode_entry.DescPtr:])
ucode_desc_off = expansion_rom_off + ucode_entry.DescPtr
ucode_desc_size = ucode_desc_hdr.vDesc >> 16
self.desc_v3 = nv.FALCON_UCODE_DESC_V3.from_buffer_copy(vbios_bytes[ucode_desc_off:ucode_desc_off + ucode_desc_size])
sig_total_size = ucode_desc_size - nv.FALCON_UCODE_DESC_V3_SIZE_44
signature = vbios_bytes[ucode_desc_off + nv.FALCON_UCODE_DESC_V3_SIZE_44:][:sig_total_size]
image = vbios_bytes[ucode_desc_off + ucode_desc_size:][:round_up(self.desc_v3.StoredSize, 256)]
self.frts_offset = self.nvdev.vram_size - 0x100000 - 0x100000
read_vbios_desc = nv.FWSECLIC_READ_VBIOS_DESC(version=0x1, size=ctypes.sizeof(nv.FWSECLIC_READ_VBIOS_DESC), flags=2)
frst_reg_desc = nv.FWSECLIC_FRTS_REGION_DESC(version=0x1, size=ctypes.sizeof(nv.FWSECLIC_FRTS_REGION_DESC),
frtsRegionOffset4K=self.frts_offset >> 12, frtsRegionSize=0x100, frtsRegionMediaType=2)
frts_cmd = nv.FWSECLIC_FRTS_CMD(readVbiosDesc=read_vbios_desc, frtsRegionDesc=frst_reg_desc)
def __patch(cmd_id, cmd):
patched_image = bytearray(image)
# Patch image
dmem = nv.FALCON_APPLICATION_INTERFACE_DMEM_MAPPER_V3.from_buffer_copy(image[(dmem_mapper_offset:=self.desc_v3.IMEMLoadSize+0xae0):])
dmem.init_cmd = cmd_id
patched_image[dmem_mapper_offset : dmem_mapper_offset+len(bytes(dmem))] = bytes(dmem)
patched_image[(cmd_off:=self.desc_v3.IMEMLoadSize+dmem.cmd_in_buffer_offset) : cmd_off+len(cmd)] = cmd
patched_image[(sig_off:=self.desc_v3.IMEMLoadSize+self.desc_v3.PKCDataOffset) : sig_off+0x180] = signature[0x180:]
return System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
self.frts_image_va, self.frts_image_sysmem = __patch(0x15, bytes(frts_cmd))
def prep_booter(self):
image = self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "image_prod_data")
sig = self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "sig_prod_data")
header = self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "header_prod_data")
patch_loc = int.from_bytes(self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "patch_loc_data"), 'little')
sig_len = len(sig) // int.from_bytes(self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "num_sigs_data"), 'little')
patched_image = bytearray(image)
patched_image[patch_loc:patch_loc+sig_len] = sig[:sig_len]
self.booter_image_va, self.booter_image_sysmem = System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
_, _, self.booter_data_off, self.booter_data_sz, _, self.booter_code_off, self.booter_code_sz, _, _ = struct.unpack("9I", header)
def init_hw(self):
self.falcon, self.sec2 = 0x00110000, 0x00840000
self.reset(self.falcon)
self.execute_hs(self.falcon, self.frts_image_sysmem[0], code_off=0x0, data_off=self.desc_v3.IMEMLoadSize,
imemPa=self.desc_v3.IMEMPhysBase, imemVa=self.desc_v3.IMEMVirtBase, imemSz=self.desc_v3.IMEMLoadSize,
dmemPa=self.desc_v3.DMEMPhysBase, dmemVa=0x0, dmemSz=self.desc_v3.DMEMLoadSize,
pkc_off=self.desc_v3.PKCDataOffset, engid=self.desc_v3.EngineIdMask, ucodeid=self.desc_v3.UcodeId)
self.reset(self.falcon, riscv=True)
# set up the mailbox
self.nvdev.NV_PGSP_FALCON_MAILBOX0.write(lo32(self.nvdev.gsp.libos_args_sysmem[0]))
self.nvdev.NV_PGSP_FALCON_MAILBOX1.write(hi32(self.nvdev.gsp.libos_args_sysmem[0]))
# booter
self.reset(self.sec2)
mbx = self.execute_hs(self.sec2, self.booter_image_sysmem[0], code_off=self.booter_code_off, data_off=self.booter_data_off,
imemPa=0x0, imemVa=self.booter_code_off, imemSz=self.booter_code_sz, dmemPa=0x0, dmemVa=0x0, dmemSz=self.booter_data_sz,
pkc_off=0x10, engid=1, ucodeid=3, mailbox=self.nvdev.gsp.wpr_meta_sysmem)
assert mbx[0] == 0x0, f"Booster failed to execute, mailbox is {mbx[0]:08x}, {mbx[1]:08x}"
self.nvdev.NV_PFALCON_FALCON_OS.with_base(self.falcon).write(0x0)
assert self.nvdev.NV_PRISCV_RISCV_CPUCTL.with_base(self.falcon).read_bitfields()['active_stat'] == 1, "GSP Core is not active"
def execute_dma(self, base, cmd, dest, mem_off, sysmem, size):
while self.nvdev.NV_PFALCON_FALCON_DMATRFCMD.with_base(base).read_bitfields()['full'] != 0: pass
self.nvdev.NV_PFALCON_FALCON_DMATRFBASE.with_base(base).write(lo32(sysmem >> 8))
self.nvdev.NV_PFALCON_FALCON_DMATRFBASE1.with_base(base).write(hi32(sysmem >> 8) & 0x1ff)
xfered = 0
while xfered < size:
while self.nvdev.NV_PFALCON_FALCON_DMATRFCMD.with_base(base).read_bitfields()['full'] != 0: pass
self.nvdev.NV_PFALCON_FALCON_DMATRFMOFFS.with_base(base).write(dest + xfered)
self.nvdev.NV_PFALCON_FALCON_DMATRFFBOFFS.with_base(base).write(mem_off + xfered)
self.nvdev.NV_PFALCON_FALCON_DMATRFCMD.with_base(base).write(cmd)
xfered += 256
while self.nvdev.NV_PFALCON_FALCON_DMATRFCMD.with_base(base).read_bitfields()['idle'] != 1: pass
def start_cpu(self, base):
if self.nvdev.NV_PFALCON_FALCON_CPUCTL.with_base(base).read_bitfields()['alias_en'] == 1:
self.nvdev.wreg(base + self.nvdev.NV_PFALCON_FALCON_CPUCTL_ALIAS, 0x2)
else: self.nvdev.NV_PFALCON_FALCON_CPUCTL.with_base(base).write(startcpu=1)
def wait_cpu_halted(self, base):
while self.nvdev.NV_PFALCON_FALCON_CPUCTL.with_base(base).read_bitfields()['halted'] == 0: pass
def execute_hs(self, base, img_sysmem, code_off, data_off, imemPa, imemVa, imemSz, dmemPa, dmemVa, dmemSz, pkc_off, engid, ucodeid, mailbox=None):
self.disable_ctx_req(base)
self.nvdev.NV_PFALCON_FBIF_TRANSCFG.with_base(base)[ctx_dma:=0].update(target=self.nvdev.NV_PFALCON_FBIF_TRANSCFG_TARGET_COHERENT_SYSMEM,
mem_type=self.nvdev.NV_PFALCON_FBIF_TRANSCFG_MEM_TYPE_PHYSICAL)
cmd = self.nvdev.NV_PFALCON_FALCON_DMATRFCMD.with_base(base).encode(write=0, size=self.nvdev.NV_PFALCON_FALCON_DMATRFCMD_SIZE_256B,
ctxdma=ctx_dma, imem=1, sec=1)
self.execute_dma(base, cmd, dest=imemPa, mem_off=imemVa, sysmem=img_sysmem+code_off-imemVa, size=imemSz)
cmd = self.nvdev.NV_PFALCON_FALCON_DMATRFCMD.with_base(base).encode(write=0, size=self.nvdev.NV_PFALCON_FALCON_DMATRFCMD_SIZE_256B,
ctxdma=ctx_dma, imem=0, sec=0)
self.execute_dma(base, cmd, dest=dmemPa, mem_off=dmemVa, sysmem=img_sysmem+data_off-dmemVa, size=dmemSz)
self.nvdev.NV_PFALCON2_FALCON_BROM_PARAADDR.with_base(base)[0].write(pkc_off)
self.nvdev.NV_PFALCON2_FALCON_BROM_ENGIDMASK.with_base(base).write(engid)
self.nvdev.NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID.with_base(base).write(val=ucodeid)
self.nvdev.NV_PFALCON2_FALCON_MOD_SEL.with_base(base).write(algo=self.nvdev.NV_PFALCON2_FALCON_MOD_SEL_ALGO_RSA3K)
self.nvdev.NV_PFALCON_FALCON_BOOTVEC.with_base(base).write(imemVa)
if mailbox is not None:
self.nvdev.NV_PFALCON_FALCON_MAILBOX0.with_base(base).write(lo32(mailbox))
self.nvdev.NV_PFALCON_FALCON_MAILBOX1.with_base(base).write(hi32(mailbox))
self.start_cpu(base)
self.wait_cpu_halted(base)
if mailbox is not None:
return self.nvdev.NV_PFALCON_FALCON_MAILBOX0.with_base(base).read(), self.nvdev.NV_PFALCON_FALCON_MAILBOX1.with_base(base).read()
def disable_ctx_req(self, base):
self.nvdev.NV_PFALCON_FBIF_CTL.with_base(base).update(allow_phys_no_ctx=1)
self.nvdev.NV_PFALCON_FALCON_DMACTL.with_base(base).write(0x0)
def reset(self, base, riscv=False):
engine_reg = self.nvdev.NV_PGSP_FALCON_ENGINE if base == self.falcon else self.nvdev.NV_PSEC_FALCON_ENGINE
engine_reg.write(reset=1)
time.sleep(0.1)
engine_reg.write(reset=0)
while self.nvdev.NV_PFALCON_FALCON_HWCFG2.with_base(base).read_bitfields()['mem_scrubbing'] != 0: time.sleep(0.1)
if riscv: self.nvdev.NV_PRISCV_RISCV_BCR_CTRL.with_base(base).write(core_select=1, valid=0, brfetch=1)
elif self.nvdev.NV_PFALCON_FALCON_HWCFG2.with_base(base).read_bitfields()['riscv'] == 1:
self.nvdev.NV_PRISCV_RISCV_BCR_CTRL.with_base(base).write(core_select=0)
while self.nvdev.NV_PRISCV_RISCV_BCR_CTRL.with_base(base).read_bitfields()['valid'] != 1: pass
self.nvdev.NV_PFALCON_FALCON_RM.with_base(base).write(self.nvdev.chip_id)
class NV_GSP(NV_IP):
def init_sw(self):
self.handle_gen = itertools.count(0xcf000000)
self.init_rm_args()
self.init_libos_args()
self.init_wpr_meta()
# Prefill cmd queue with info for gsp to start.
self.rpc_set_gsp_system_info()
self.rpc_set_registry_table()
def init_rm_args(self, queue_size=0x40000):
# Alloc queues
pte_cnt = ((queue_pte_cnt:=(queue_size * 2) // 0x1000)) + round_up(queue_pte_cnt * 8, 0x1000) // 0x1000
pt_size = round_up(pte_cnt * 8, 0x1000)
queues_va, queues_sysmem = System.alloc_sysmem(pt_size + queue_size * 2, contiguous=False)
# Fill up ptes
for i, sysmem in enumerate(queues_sysmem): to_mv(queues_va + i * 0x8, 0x8).cast('Q')[0] = sysmem
# Fill up arguments
queue_args = nv.MESSAGE_QUEUE_INIT_ARGUMENTS(sharedMemPhysAddr=queues_sysmem[0], pageTableEntryCount=pte_cnt, cmdQueueOffset=pt_size,
statQueueOffset=pt_size + queue_size)
rm_args, self.rm_args_sysmem = self.nvdev._alloc_boot_struct(nv.GSP_ARGUMENTS_CACHED(bDmemStack=True, messageQueueInitArguments=queue_args))
# Build command queue header
self.cmd_q_va, self.stat_q_va = queues_va + pt_size, queues_va + pt_size + queue_size
cmd_q_tx = nv.msgqTxHeader(version=0, size=queue_size, entryOff=0x1000, msgSize=0x1000, msgCount=(queue_size - 0x1000) // 0x1000,
writePtr=0, flags=1, rxHdrOff=ctypes.sizeof(nv.msgqTxHeader))
to_mv(self.cmd_q_va, ctypes.sizeof(nv.msgqTxHeader))[:] = bytes(cmd_q_tx)
self.cmd_q = NVRpcQueue(self, self.cmd_q_va, None)
def init_libos_args(self):
_, logbuf_sysmem = System.alloc_sysmem((2 << 20), contiguous=True)
libos_args_va, self.libos_args_sysmem = System.alloc_sysmem(0x1000, contiguous=True)
libos_structs = (nv.LibosMemoryRegionInitArgument * 6).from_address(libos_args_va)
for i, name in enumerate(["INIT", "INTR", "RM", "MNOC", "KRNL"]):
libos_structs[i] = nv.LibosMemoryRegionInitArgument(kind=nv.LIBOS_MEMORY_REGION_CONTIGUOUS, loc=nv.LIBOS_MEMORY_REGION_LOC_SYSMEM, size=0x10000,
id8=int.from_bytes(bytes(f"LOG{name}", 'utf-8'), 'big'), pa=logbuf_sysmem[0] + 0x10000 * i)
libos_structs[5] = nv.LibosMemoryRegionInitArgument(kind=nv.LIBOS_MEMORY_REGION_CONTIGUOUS, loc=nv.LIBOS_MEMORY_REGION_LOC_SYSMEM, size=0x1000,
id8=int.from_bytes(bytes("RMARGS", 'utf-8'), 'big'), pa=self.rm_args_sysmem)
def init_gsp_image(self):
fw = fetch("https://github.com/NVIDIA/linux-firmware/raw/refs/heads/nvidia-staging/nvidia/ga102/gsp/gsp-570.144.bin", subdir="fw").read_bytes()
_, sections, _ = elf_loader(fw)
self.gsp_image = next((sh.content for sh in sections if sh.name == ".fwimage"))
signature = next((sh.content for sh in sections if sh.name == (f".fwsignature_{self.nvdev.chip_name[:4].lower()}x")))
# Build radix3
npages = [0, 0, 0, round_up(len(self.gsp_image), 0x1000) // 0x1000]
for i in range(3, 0, -1): npages[i-1] = ((npages[i] - 1) >> (nv.LIBOS_MEMORY_REGION_RADIX_PAGE_LOG2 - 3)) + 1
offsets = [sum(npages[:i]) * 0x1000 for i in range(4)]
radix_va, self.gsp_radix3_sysmem = System.alloc_sysmem(offsets[-1] + len(self.gsp_image), contiguous=False)
# Copy image
to_mv(radix_va + offsets[-1], len(self.gsp_image))[:] = self.gsp_image
# Copy level and image pages.
for i in range(0, 3):
cur_offset = sum(npages[:i+1])
to_mv(radix_va + offsets[i], npages[i+1] * 8).cast('Q')[:] = array.array('Q', self.gsp_radix3_sysmem[cur_offset:cur_offset+npages[i+1]])
# Copy signature
self.gsp_signature_va, self.gsp_signature_sysmem = System.alloc_sysmem(len(signature), contiguous=True, data=signature)
def init_boot_binary_image(self):
self.booter_image = self.nvdev.extract_fw("kgspBinArchiveGspRmBoot", "ucode_image_prod_data")
self.booter_desc = nv.RM_RISCV_UCODE_DESC.from_buffer_copy(self.nvdev.extract_fw("kgspBinArchiveGspRmBoot", "ucode_desc_prod_data"))
_, self.booter_sysmem = System.alloc_sysmem(len(self.booter_image), contiguous=True, data=self.booter_image)
def init_wpr_meta(self):
self.init_gsp_image()
self.init_boot_binary_image()
m = nv.GspFwWprMeta(revision=nv.GSP_FW_WPR_META_REVISION, magic=nv.GSP_FW_WPR_META_MAGIC,
fbSize=self.nvdev.vram_size, vgaWorkspaceSize=(vga_sz:=0x100000), vgaWorkspaceOffset=(vga_off:=self.nvdev.vram_size-vga_sz),
sizeOfRadix3Elf=(radix3_sz:=len(self.gsp_image)), gspFwWprEnd=vga_off, frtsSize=(frts_sz:=0x100000), frtsOffset=(frts_off:=vga_off-frts_sz),
sizeOfBootloader=(boot_sz:=len(self.booter_image)), bootBinOffset=(boot_off:=frts_off-boot_sz),
gspFwOffset=(gsp_off:=round_down(boot_off-radix3_sz, 0x10000)), gspFwHeapSize=(gsp_heap_sz:=0x8100000),
gspFwHeapOffset=(gsp_heap_off:=round_down(gsp_off-gsp_heap_sz, 0x100000)), gspFwWprStart=(wpr_start:=round_down(gsp_heap_off-0x1000, 0x100000)),
nonWprHeapSize=(non_wpr_sz:=0x100000), nonWprHeapOffset=(non_wpr_off:=round_down(wpr_start-non_wpr_sz, 0x100000)), gspFwRsvdStart=non_wpr_off,
sysmemAddrOfRadix3Elf=self.gsp_radix3_sysmem[0],sysmemAddrOfBootloader=self.booter_sysmem[0],sysmemAddrOfSignature=self.gsp_signature_sysmem[0],
bootloaderCodeOffset=self.booter_desc.monitorCodeOffset, bootloaderDataOffset=self.booter_desc.monitorDataOffset,
bootloaderManifestOffset=self.booter_desc.manifestOffset, sizeOfSignature=0x1000)
self.wpr_meta, self.wpr_meta_sysmem = self.nvdev._alloc_boot_struct(m)
assert self.nvdev.flcn.frts_offset == self.wpr_meta.frtsOffset, f"FRTS mismatch: {self.nvdev.flcn.frts_offset} != {self.wpr_meta.frtsOffset}"
def promote_ctx(self, client, subdevice, obj, ctxbufs, bufs=None, virt=None, phys=None):
res, prom = {}, nv_gpu.NV2080_CTRL_GPU_PROMOTE_CTX_PARAMS(entryCount=len(ctxbufs), engineType=0x1, hChanClient=client, hObject=obj)
for i,(buf,desc) in enumerate(ctxbufs.items()):
use_v, use_p = (desc.v if virt is None else virt), (desc.p if phys is None else phys)
x = (bufs or {}).get(buf, self.nvdev.mm.valloc(desc.size, contiguous=True)) # allocate buffers
prom.promoteEntry[i] = nv_gpu.NV2080_CTRL_GPU_PROMOTE_CTX_BUFFER_ENTRY(bufferId=buf, gpuVirtAddr=x.va_addr if use_v else 0, bInitialize=use_p,
gpuPhysAddr=x.paddrs[0][0] if use_p else 0, size=desc.size if use_p else 0, physAttr=0x4 if use_p else 0, bNonmapped=(use_p and not use_v))
res[buf] = x
self.rpc_rm_control(hObject=subdevice, cmd=nv_gpu.NV2080_CTRL_CMD_GPU_PROMOTE_CTX, params=prom, client=client)
return res
def init_golden_image(self):
self.rpc_rm_alloc(hParent=0x0, hClass=0x0, params=nv_gpu.NV0000_ALLOC_PARAMETERS())
dev = self.rpc_rm_alloc(hParent=self.priv_root, hClass=nv_gpu.NV01_DEVICE_0, params=nv_gpu.NV0080_ALLOC_PARAMETERS(hClientShare=self.priv_root))
subdev = self.rpc_rm_alloc(hParent=dev, hClass=nv_gpu.NV20_SUBDEVICE_0, params=nv_gpu.NV2080_ALLOC_PARAMETERS())
vaspace = self.rpc_rm_alloc(hParent=dev, hClass=nv_gpu.FERMI_VASPACE_A, params=nv_gpu.NV_VASPACE_ALLOCATION_PARAMETERS())
# reserve 512MB for the reserved PDES
res_va = self.nvdev.mm.alloc_vaddr(512 << 20)
bufs_p = nv_gpu.struct_NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS(pageSize=(512<<20), numLevelsToCopy=3,
virtAddrLo=res_va, virtAddrHi=res_va+(512<<20)-1)
for i,pt in enumerate(self.nvdev.mm.page_tables(res_va, size=512<<20)[:3]):
bufs_p.levels[i] = nv_gpu.struct_NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS_0(physAddress=pt.paddr, size=0x20 if i == 0 else 0x1000,
pageShift=47 - (9 * i), aperture=1)
self.rpc_rm_control(hObject=vaspace, cmd=0x90f10106, params=bufs_p)
gpfifo_area = self.nvdev.mm.valloc(2<<20, contiguous=True)
userd = nv_gpu.NV_MEMORY_DESC_PARAMS(base=gpfifo_area.paddrs[0][0] + 0x400 * 8, size=0x400, addressSpace=2, cacheAttrib=0)
gg_params = nv_gpu.NV_CHANNELGPFIFO_ALLOCATION_PARAMETERS(gpFifoOffset=gpfifo_area.va_addr, gpFifoEntries=32, engineType=0x1, cid=3,
hVASpace=vaspace, userdOffset=(ctypes.c_uint64*8)(0x400 * 8), userdMem=userd, internalFlags=0x1a, flags=0x200320)
ch_gpfifo = self.rpc_rm_alloc(hParent=dev, hClass=nv_gpu.AMPERE_CHANNEL_GPFIFO_A, params=gg_params)
self.grctx_bufs = {0: GRBufDesc(0x237000, p=1, v=1), 1: GRBufDesc(0x6000, p=1, v=1, lc=1), 2: GRBufDesc(0x6000, p=1, v=1),
3: GRBufDesc(0x3000, p=0, v=1), 4: GRBufDesc(0x20000, p=0, v=1), 5: GRBufDesc(0x2600000, p=0, v=1), 6: GRBufDesc(0x80000, p=0, v=1),
9: GRBufDesc(0x10000, p=1, v=1), 10: GRBufDesc(0x80000, p=1, v=0), 11: GRBufDesc(0x80000, p=1, v=1)}
self.promote_ctx(self.priv_root, subdev, ch_gpfifo, {k:v for k, v in self.grctx_bufs.items() if v.lc == 0})
self.rpc_rm_alloc(hParent=ch_gpfifo, hClass=nv_gpu.ADA_COMPUTE_A, params=None)
self.rpc_rm_alloc(hParent=ch_gpfifo, hClass=nv_gpu.AMPERE_DMA_COPY_B, params=None)
def init_hw(self):
self.stat_q = NVRpcQueue(self, self.stat_q_va, self.cmd_q_va)
self.cmd_q.rx = nv.msgqRxHeader.from_address(self.stat_q.va + self.stat_q.tx.rxHdrOff)
self.stat_q.wait_resp(nv.NV_VGPU_MSG_EVENT_GSP_INIT_DONE)
self.nvdev.NV_PBUS_BAR1_BLOCK.write(mode=0, target=0, ptr=0)
self.priv_root = 0xc1e00004
self.init_golden_image()
### RPCs
def rpc_rm_alloc(self, hParent, hClass, params, client=None) -> int:
if hClass == nv_gpu.AMPERE_CHANNEL_GPFIFO_A:
ramfc_alloc = self.nvdev.mm.valloc(0x1000, contiguous=True)
params.ramfcMem = nv_gpu.NV_MEMORY_DESC_PARAMS(base=ramfc_alloc.paddrs[0][0], size=0x200, addressSpace=2, cacheAttrib=0)
params.instanceMem = nv_gpu.NV_MEMORY_DESC_PARAMS(base=ramfc_alloc.paddrs[0][0], size=0x1000, addressSpace=2, cacheAttrib=0)
method_va, method_sysmem = System.alloc_sysmem(0x5000, contiguous=True)
params.mthdbufMem = nv_gpu.NV_MEMORY_DESC_PARAMS(base=method_sysmem[0], size=0x5000, addressSpace=1, cacheAttrib=0)
if client is not None and client != self.priv_root and params.hObjectError != 0:
params.errorNotifierMem = nv_gpu.NV_MEMORY_DESC_PARAMS(base=0, size=0xecc, addressSpace=0, cacheAttrib=0)
params.userdMem = nv_gpu.NV_MEMORY_DESC_PARAMS(base=params.hUserdMemory[0] + params.userdOffset[0], size=0x400, addressSpace=2, cacheAttrib=0)
alloc_args = nv.rpc_gsp_rm_alloc_v(hClient=(client:=client or self.priv_root), hParent=hParent, hObject=(obj:=next(self.handle_gen)),
hClass=hClass, flags=0x0, paramsSize=ctypes.sizeof(params) if params is not None else 0x0)
self.cmd_q.send_rpc(nv.NV_VGPU_MSG_FUNCTION_GSP_RM_ALLOC, bytes(alloc_args) + (bytes(params) if params is not None else b''))
self.stat_q.wait_resp(nv.NV_VGPU_MSG_FUNCTION_GSP_RM_ALLOC)
if hClass == nv_gpu.FERMI_VASPACE_A and client != self.priv_root:
self.rpc_set_page_directory(device=hParent, hVASpace=obj, pdir_paddr=self.nvdev.mm.root_page_table.paddr, client=client)
if hClass == nv_gpu.NV20_SUBDEVICE_0: self.subdevice = obj # save subdevice handle
if hClass == nv_gpu.ADA_COMPUTE_A and client != self.priv_root:
phys_gr_ctx = self.promote_ctx(client, self.subdevice, hParent, {k:v for k,v in self.grctx_bufs.items() if k in [0, 1, 2]}, virt=False)
self.promote_ctx(client, self.subdevice, hParent, {k:v for k,v in self.grctx_bufs.items() if k in [0, 1, 2]}, phys_gr_ctx, phys=False)
return obj if hClass != nv_gpu.NV1_ROOT else client
def rpc_rm_control(self, hObject, cmd, params, client=None):
control_args = nv.rpc_gsp_rm_control_v(hClient=(client:=client or self.priv_root), hObject=hObject, cmd=cmd, flags=0x0,
paramsSize=ctypes.sizeof(params) if params is not None else 0x0)
self.cmd_q.send_rpc(nv.NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL, bytes(control_args) + (bytes(params) if params is not None else b''))
return self.stat_q.wait_resp(nv.NV_VGPU_MSG_FUNCTION_GSP_RM_CONTROL)[len(bytes(control_args)):]
def rpc_set_page_directory(self, device, hVASpace, pdir_paddr, client=None):
params = nv.struct_NV0080_CTRL_DMA_SET_PAGE_DIRECTORY_PARAMS_v1E_05(physAddress=pdir_paddr,
numEntries=self.nvdev.mm.pte_cnt[0], flags=0x8, hVASpace=hVASpace, pasid=0xffffffff, subDeviceId=1, chId=0) # flags field is all channels.
alloc_args = nv.rpc_set_page_directory_v(hClient=client or self.priv_root, hDevice=device, pasid=0xffffffff, params=params)
self.cmd_q.send_rpc(nv.NV_VGPU_MSG_FUNCTION_SET_PAGE_DIRECTORY, bytes(alloc_args))
self.stat_q.wait_resp(nv.NV_VGPU_MSG_FUNCTION_SET_PAGE_DIRECTORY)
def rpc_set_gsp_system_info(self):
def bdf_as_int(s): return (int(s[5:7],16)<<8) | (int(s[8:10],16)<<3) | int(s[-1],16)
data = nv.GspSystemInfo(gpuPhysAddr=self.nvdev.bars[0][0], gpuPhysFbAddr=self.nvdev.bars[1][0], gpuPhysInstAddr=self.nvdev.bars[3][0],
pciConfigMirrorBase=0x88000, pciConfigMirrorSize=0x1000, nvDomainBusDeviceFunc=bdf_as_int(self.nvdev.devfmt), bIsPassthru=1,
PCIDeviceID=self.nvdev.venid, PCISubDeviceID=self.nvdev.subvenid, PCIRevisionID=self.nvdev.rev, maxUserVa=0x7ffffffff000)
self.cmd_q.send_rpc(nv.NV_VGPU_MSG_FUNCTION_GSP_SET_SYSTEM_INFO, bytes(data))
def rpc_set_registry_table(self):
table = {'RMForcePcieConfigSave': 0x1, 'RMSecBusResetEnable': 0x1}
entries_bytes, data_bytes = bytes(), bytes()
hdr_size, entries_size = ctypes.sizeof(nv.PACKED_REGISTRY_TABLE), ctypes.sizeof(nv.PACKED_REGISTRY_ENTRY) * len(table)
for k,v in table.items():
entries_bytes += bytes(nv.PACKED_REGISTRY_ENTRY(nameOffset=hdr_size + entries_size + len(data_bytes),
type=nv.REGISTRY_TABLE_ENTRY_TYPE_DWORD, data=v, length=4))
data_bytes += k.encode('utf-8') + b'\x00'
header = nv.PACKED_REGISTRY_TABLE(size=hdr_size + len(entries_bytes) + len(data_bytes), numEntries=len(table))
self.cmd_q.send_rpc(nv.NV_VGPU_MSG_FUNCTION_SET_REGISTRY, bytes(header) + entries_bytes + data_bytes)
def run_cpu_seq(self, seq_buf):
hdr = nv.rpc_run_cpu_sequencer_v17_00.from_address(mv_address(seq_buf))
cmd_iter = iter(seq_buf[ctypes.sizeof(nv.rpc_run_cpu_sequencer_v17_00):].cast('I')[:hdr.cmdIndex])
for op in cmd_iter:
if op == 0x0: self.nvdev.wreg(next(cmd_iter), next(cmd_iter)) # reg write
elif op == 0x1: # reg modify
addr, val, mask = next(cmd_iter), next(cmd_iter), next(cmd_iter)
self.nvdev.wreg(addr, (self.nvdev.rreg(addr) & ~mask) | (val & mask))
elif op == 0x2: # reg poll
addr, mask, val, _, _ = next(cmd_iter), next(cmd_iter), next(cmd_iter), next(cmd_iter), next(cmd_iter)
while (self.nvdev.rreg(addr) & mask) != val: pass
elif op == 0x3: time.sleep(next(cmd_iter) / 1e6) # delay us
elif op == 0x4: # save reg
addr, index = next(cmd_iter), next(cmd_iter)
hdr.regSaveArea[index] = self.nvdev.rreg(addr)
elif op == 0x5: # core reset
self.nvdev.flcn.reset(self.nvdev.flcn.falcon)
self.nvdev.flcn.disable_ctx_req(self.nvdev.flcn.falcon)
elif op == 0x6: self.nvdev.flcn.start_cpu(self.nvdev.flcn.falcon)
elif op == 0x7: self.nvdev.flcn.wait_cpu_halted(self.nvdev.flcn.falcon)
elif op == 0x8: # core resume
self.nvdev.flcn.reset(self.nvdev.flcn.falcon, riscv=True)
self.nvdev.NV_PGSP_FALCON_MAILBOX0.write(lo32(self.libos_args_sysmem[0]))
self.nvdev.NV_PGSP_FALCON_MAILBOX1.write(hi32(self.libos_args_sysmem[0]))
self.nvdev.flcn.start_cpu(self.nvdev.flcn.sec2)
while self.nvdev.NV_PGC6_BSI_SECURE_SCRATCH_14.read_bitfields()['boot_stage_3_handoff'] == 0: pass
mailbox = self.nvdev.NV_PFALCON_FALCON_MAILBOX0.with_base(self.nvdev.flcn.sec2).read()
assert mailbox == 0x0, f"Falcon SEC2 failed to execute, mailbox is {mailbox:08x}"
else: raise ValueError(f"Unknown op code {op} in run_cpu_seq")