openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

506 lines
28 KiB

import ctypes, time, contextlib, importlib
from typing import Literal
from tinygrad.runtime.autogen.am import am
from tinygrad.helpers import to_mv, data64, lo32, hi32, DEBUG
class AM_IP:
def __init__(self, adev): self.adev = adev
def init_sw(self): pass # Prepare sw/allocations for this IP
def init_hw(self): pass # Initialize hw for this IP
def fini_hw(self): pass # Finalize hw for this IP
def set_clockgating_state(self): pass # Set clockgating state for this IP
class AM_SOC(AM_IP):
def init_sw(self):
self.soc_ver = 24 if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0) else 21
self.module = importlib.import_module(f"tinygrad.runtime.autogen.am.soc{self.soc_ver}")
def init_hw(self):
self.adev.regRCC_DEV0_EPF2_STRAP2.update(strap_no_soft_reset_dev0_f2=0x0)
self.adev.regRCC_DEV0_EPF0_RCC_DOORBELL_APER_EN.write(0x1)
def set_clockgating_state(self): self.adev.regHDP_MEM_POWER_CTRL.update(atomic_mem_power_ctrl_en=1, atomic_mem_power_ds_en=1)
def doorbell_enable(self, port, awid=0, awaddr_31_28_value=0, offset=0, size=0):
self.adev.reg(f"{'regGDC_S2A0_S2A' if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0) else 'regS2A'}_DOORBELL_ENTRY_{port}_CTRL").update(
**{f"s2a_doorbell_port{port}_enable":1, f"s2a_doorbell_port{port}_awid":awid, f"s2a_doorbell_port{port}_awaddr_31_28_value":awaddr_31_28_value,
f"s2a_doorbell_port{port}_range_offset":offset, f"s2a_doorbell_port{port}_range_size":size})
class AM_GMC(AM_IP):
def init_sw(self):
# Memory controller aperture
self.mc_base = (self.adev.regMMMC_VM_FB_LOCATION_BASE.read() & 0xFFFFFF) << 24
self.mc_end = self.mc_base + self.adev.mm.vram_size - 1
# VM aperture
self.vm_base = self.adev.mm.va_allocator.base
self.vm_end = self.vm_base + self.adev.mm.va_allocator.size - 1
# GFX11 has 44-bit address space
self.address_space_mask = (1 << 44) - 1
self.memscratch_paddr = self.adev.mm.palloc(0x1000, zero=not self.adev.partial_boot, boot=True)
self.dummy_page_paddr = self.adev.mm.palloc(0x1000, zero=not self.adev.partial_boot, boot=True)
self.hub_initted = {"MM": False, "GC": False}
def init_hw(self): self.init_hub("MM")
def flush_hdp(self): self.adev.wreg(self.adev.reg("regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL").read() // 4, 0x0)
def flush_tlb(self, ip:Literal["MM", "GC"], vmid, flush_type=0):
self.flush_hdp()
# Can't issue TLB invalidation if the hub isn't initialized.
if not self.hub_initted[ip]: return
if ip == "MM": self.adev.wait_reg(self.adev.regMMVM_INVALIDATE_ENG17_SEM, mask=0x1, value=0x1)
self.adev.reg(f"reg{ip}VM_INVALIDATE_ENG17_REQ").write(flush_type=flush_type, per_vmid_invalidate_req=(1 << vmid), invalidate_l2_ptes=1,
invalidate_l2_pde0=1, invalidate_l2_pde1=1, invalidate_l2_pde2=1, invalidate_l1_ptes=1, clear_protection_fault_status_addr=0)
self.adev.wait_reg(self.adev.reg(f"reg{ip}VM_INVALIDATE_ENG17_ACK"), mask=(1 << vmid), value=(1 << vmid))
if ip == "MM":
self.adev.regMMVM_INVALIDATE_ENG17_SEM.write(0x0)
self.adev.regMMVM_L2_BANK_SELECT_RESERVED_CID2.update(reserved_cache_private_invalidation=1)
# Read back the register to ensure the invalidation is complete
self.adev.regMMVM_L2_BANK_SELECT_RESERVED_CID2.read()
def enable_vm_addressing(self, page_table, ip:Literal["MM", "GC"], vmid):
self.adev.wreg_pair(f"reg{ip}VM_CONTEXT{vmid}_PAGE_TABLE_START_ADDR", "_LO32", "_HI32", self.vm_base >> 12)
self.adev.wreg_pair(f"reg{ip}VM_CONTEXT{vmid}_PAGE_TABLE_END_ADDR", "_LO32", "_HI32", self.vm_end >> 12)
self.adev.wreg_pair(f"reg{ip}VM_CONTEXT{vmid}_PAGE_TABLE_BASE_ADDR", "_LO32", "_HI32", page_table.paddr | 1)
self.adev.reg(f"reg{ip}VM_CONTEXT{vmid}_CNTL").write(0x1800000, pde0_protection_fault_enable_interrupt=1, pde0_protection_fault_enable_default=1,
dummy_page_protection_fault_enable_interrupt=1, dummy_page_protection_fault_enable_default=1,
range_protection_fault_enable_interrupt=1, range_protection_fault_enable_default=1,
valid_protection_fault_enable_interrupt=1, valid_protection_fault_enable_default=1,
read_protection_fault_enable_interrupt=1, read_protection_fault_enable_default=1,
write_protection_fault_enable_interrupt=1, write_protection_fault_enable_default=1,
execute_protection_fault_enable_interrupt=1, execute_protection_fault_enable_default=1,
enable_context=1, page_table_depth=(3 - page_table.lv))
def init_hub(self, ip:Literal["MM", "GC"]):
# Init system apertures
self.adev.reg(f"reg{ip}MC_VM_AGP_BASE").write(0)
self.adev.reg(f"reg{ip}MC_VM_AGP_BOT").write(0xffffffffffff >> 24) # disable AGP
self.adev.reg(f"reg{ip}MC_VM_AGP_TOP").write(0)
self.adev.reg(f"reg{ip}MC_VM_SYSTEM_APERTURE_LOW_ADDR").write(self.mc_base >> 18)
self.adev.reg(f"reg{ip}MC_VM_SYSTEM_APERTURE_HIGH_ADDR").write(self.mc_end >> 18)
self.adev.wreg_pair(f"reg{ip}MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR", "_LSB", "_MSB", self.memscratch_paddr >> 12)
self.adev.wreg_pair(f"reg{ip}VM_L2_PROTECTION_FAULT_DEFAULT_ADDR", "_LO32", "_HI32", self.dummy_page_paddr >> 12)
self.adev.reg(f"reg{ip}VM_L2_PROTECTION_FAULT_CNTL2").update(active_page_migration_pte_read_retry=1)
# Init TLB and cache
self.adev.reg(f"reg{ip}MC_VM_MX_L1_TLB_CNTL").update(enable_l1_tlb=1, system_access_mode=3, enable_advanced_driver_model=1,
system_aperture_unmapped_access=0, eco_bits=0, mtype=self.adev.soc.module.MTYPE_UC)
self.adev.reg(f"reg{ip}VM_L2_CNTL").update(enable_l2_cache=1, enable_l2_fragment_processing=0, enable_default_page_out_to_system_memory=1,
l2_pde0_cache_tag_generation_mode=0, pde_fault_classification=0, context1_identity_access_mode=1, identity_mode_fragment_size=0)
self.adev.reg(f"reg{ip}VM_L2_CNTL2").update(invalidate_all_l1_tlbs=1, invalidate_l2_cache=1)
self.adev.reg(f"reg{ip}VM_L2_CNTL3").write(bank_select=9, l2_cache_bigk_fragment_size=6,l2_cache_4k_associativity=1,l2_cache_bigk_associativity=1)
self.adev.reg(f"reg{ip}VM_L2_CNTL4").write(l2_cache_4k_partition_count=1)
self.adev.reg(f"reg{ip}VM_L2_CNTL5").write(walker_priority_client_id=0x1ff)
self.enable_vm_addressing(self.adev.mm.root_page_table, ip, vmid=0)
# Disable identity aperture
self.adev.wreg_pair(f"reg{ip}VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR", "_LO32", "_HI32", 0xfffffffff)
self.adev.wreg_pair(f"reg{ip}VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR", "_LO32", "_HI32", 0x0)
self.adev.wreg_pair(f"reg{ip}VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET", "_LO32", "_HI32", 0x0)
for eng_i in range(18): self.adev.wreg_pair(f"reg{ip}VM_INVALIDATE_ENG{eng_i}_ADDR_RANGE", "_LO32", "_HI32", 0x1fffffffff)
self.hub_initted[ip] = True
def get_pte_flags(self, pte_lv, is_table, frag, uncached, system, snooped, valid, extra=0):
extra |= (am.AMDGPU_PTE_SYSTEM * system) | (am.AMDGPU_PTE_SNOOPED * snooped) | (am.AMDGPU_PTE_VALID * valid) | am.AMDGPU_PTE_FRAG(frag)
if not is_table: extra |= (am.AMDGPU_PTE_WRITEABLE | am.AMDGPU_PTE_READABLE | am.AMDGPU_PTE_EXECUTABLE)
if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0):
extra |= am.AMDGPU_PTE_MTYPE_GFX12(0, self.adev.soc.module.MTYPE_UC if uncached else 0)
extra |= (am.AMDGPU_PDE_PTE_GFX12 if not is_table and pte_lv != am.AMDGPU_VM_PTB else (am.AMDGPU_PTE_IS_PTE if not is_table else 0))
else:
extra |= am.AMDGPU_PTE_MTYPE_NV10(0, self.adev.soc.module.MTYPE_UC if uncached else 0)
extra |= (am.AMDGPU_PDE_PTE if not is_table and pte_lv != am.AMDGPU_VM_PTB else 0)
return extra
def is_pte_huge_page(self, pte): return pte & (am.AMDGPU_PDE_PTE_GFX12 if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0) else am.AMDGPU_PDE_PTE)
def on_interrupt(self):
for ip in ["MM", "GC"]:
st = self.adev.reg(f"reg{ip}VM_L2_PROTECTION_FAULT_STATUS{'_LO32' if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0) else ''}").read()
va = (self.adev.reg(f'reg{ip}VM_L2_PROTECTION_FAULT_ADDR_LO32').read()
| (self.adev.reg(f'reg{ip}VM_L2_PROTECTION_FAULT_ADDR_HI32').read()) << 32) << 12
if st: raise RuntimeError(f"{ip}VM_L2_PROTECTION_FAULT_STATUS: {st:#x} {va:#x}")
class AM_SMU(AM_IP):
def init_sw(self):
self.smu_mod = self.adev._ip_module("smu", am.MP1_HWIP, prever_prefix='v')
self.driver_table_paddr = self.adev.mm.palloc(0x4000, zero=not self.adev.partial_boot, boot=True)
def init_hw(self):
self._send_msg(self.smu_mod.PPSMC_MSG_SetDriverDramAddrHigh, hi32(self.adev.paddr2mc(self.driver_table_paddr)))
self._send_msg(self.smu_mod.PPSMC_MSG_SetDriverDramAddrLow, lo32(self.adev.paddr2mc(self.driver_table_paddr)))
self._send_msg(self.smu_mod.PPSMC_MSG_EnableAllSmuFeatures, 0)
def is_smu_alive(self):
with contextlib.suppress(RuntimeError): self._send_msg(self.smu_mod.PPSMC_MSG_GetSmuVersion, 0, timeout=100)
return self.adev.mmMP1_SMN_C2PMSG_90.read() != 0
def mode1_reset(self):
if DEBUG >= 2: print(f"am {self.adev.devfmt}: mode1 reset")
if self.adev.ip_ver[am.MP0_HWIP] >= (14,0,0): self._send_msg(__DEBUGSMC_MSG_Mode1Reset:=2, 0, debug=True)
else: self._send_msg(self.smu_mod.PPSMC_MSG_Mode1Reset, 0)
time.sleep(0.5) # 500ms
def read_table(self, table_t, cmd):
self._send_msg(self.smu_mod.PPSMC_MSG_TransferTableSmu2Dram, cmd)
return table_t.from_buffer(to_mv(self.adev.paddr2cpu(self.driver_table_paddr), ctypes.sizeof(table_t)))
def read_metrics(self): return self.read_table(self.smu_mod.SmuMetricsExternal_t, self.smu_mod.TABLE_SMU_METRICS)
def set_clocks(self, level):
if not hasattr(self, 'clcks'):
self.clcks = {}
for clck in [self.smu_mod.PPCLK_GFXCLK, self.smu_mod.PPCLK_UCLK, self.smu_mod.PPCLK_FCLK, self.smu_mod.PPCLK_SOCCLK]:
cnt = self._send_msg(self.smu_mod.PPSMC_MSG_GetDpmFreqByIndex, (clck<<16)|0xff, read_back_arg=True)&0x7fffffff
self.clcks[clck] = [self._send_msg(self.smu_mod.PPSMC_MSG_GetDpmFreqByIndex, (clck<<16)|i, read_back_arg=True)&0x7fffffff for i in range(cnt)]
for clck, vals in self.clcks.items():
self._send_msg(self.smu_mod.PPSMC_MSG_SetSoftMinByFreq, clck << 16 | (vals[level]))
self._send_msg(self.smu_mod.PPSMC_MSG_SetSoftMaxByFreq, clck << 16 | (vals[level]))
def _smu_cmn_send_msg(self, msg, param=0, debug=False):
(self.adev.mmMP1_SMN_C2PMSG_90 if not debug else self.adev.mmMP1_SMN_C2PMSG_54).write(0) # resp reg
(self.adev.mmMP1_SMN_C2PMSG_82 if not debug else self.adev.mmMP1_SMN_C2PMSG_53).write(param)
(self.adev.mmMP1_SMN_C2PMSG_66 if not debug else self.adev.mmMP1_SMN_C2PMSG_75).write(msg)
def _send_msg(self, msg, param, read_back_arg=False, timeout=10000, debug=False): # 10s
self._smu_cmn_send_msg(msg, param, debug=debug)
self.adev.wait_reg(self.adev.mmMP1_SMN_C2PMSG_90 if not debug else self.adev.mmMP1_SMN_C2PMSG_54, mask=0xFFFFFFFF, value=1, timeout=timeout)
return (self.adev.mmMP1_SMN_C2PMSG_82 if not debug else self.adev.mmMP1_SMN_C2PMSG_53).read() if read_back_arg else None
class AM_GFX(AM_IP):
def init_hw(self):
# Wait for RLC autoload to complete
while self.adev.regCP_STAT.read() != 0 and self.adev.regRLC_RLCS_BOOTLOAD_STATUS.read_bitfields()['bootload_complete'] != 0: pass
self._config_gfx_rs64()
self.adev.gmc.init_hub("GC")
# NOTE: Golden reg for gfx11. No values for this reg provided. The kernel just ors 0x20000000 to this reg.
self.adev.regTCP_CNTL.write(self.adev.regTCP_CNTL.read() | 0x20000000)
self.adev.regRLC_SRM_CNTL.update(srm_enable=1, auto_incr_addr=1)
self.adev.soc.doorbell_enable(port=0, awid=0x3, awaddr_31_28_value=0x3)
self.adev.soc.doorbell_enable(port=3, awid=0x6, awaddr_31_28_value=0x3)
self.adev.regGRBM_CNTL.update(read_timeout=0xff)
for i in range(0, 16):
self._grbm_select(vmid=i)
self.adev.regSH_MEM_CONFIG.write(address_mode=self.adev.soc.module.SH_MEM_ADDRESS_MODE_64,
alignment_mode=self.adev.soc.module.SH_MEM_ALIGNMENT_MODE_UNALIGNED, initial_inst_prefetch=3)
# Configure apertures:
# LDS: 0x10000000'00000000 - 0x10000001'00000000 (4GB)
# Scratch: 0x20000000'00000000 - 0x20000001'00000000 (4GB)
self.adev.regSH_MEM_BASES.write(shared_base=0x1, private_base=0x2)
self._grbm_select()
# Configure MEC doorbell range
self.adev.regCP_MEC_DOORBELL_RANGE_LOWER.write(0x0)
self.adev.regCP_MEC_DOORBELL_RANGE_UPPER.write(0x450)
# Enable MEC
self.adev.regCP_MEC_RS64_CNTL.update(mec_invalidate_icache=0, mec_pipe0_reset=0, mec_pipe1_reset=0, mec_pipe2_reset=0, mec_pipe3_reset=0,
mec_pipe0_active=1, mec_pipe1_active=1, mec_pipe2_active=1, mec_pipe3_active=1, mec_halt=0)
# NOTE: Wait for MEC to be ready. The kernel does udelay here as well.
time.sleep(0.05)
def fini_hw(self):
self._grbm_select(me=1, pipe=0, queue=0)
self.adev.regCP_HQD_DEQUEUE_REQUEST.write(0x2) # 1 - DRAIN_PIPE; 2 - RESET_WAVES
self.adev.regSPI_COMPUTE_QUEUE_RESET.write(1)
self._grbm_select()
self.adev.regGCVM_CONTEXT0_CNTL.write(0)
def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, eop_addr:int, eop_size:int, doorbell:int, pipe:int, queue:int):
mqd = self.adev.mm.valloc(0x1000, uncached=True, contigous=True)
struct_t = getattr(am, f"struct_v{self.adev.ip_ver[am.GC_HWIP][0]}_compute_mqd")
mqd_struct = struct_t(header=0xC0310800, cp_mqd_base_addr_lo=lo32(mqd.va_addr), cp_mqd_base_addr_hi=hi32(mqd.va_addr),
cp_hqd_persistent_state=self.adev.regCP_HQD_PERSISTENT_STATE.encode(preload_size=0x55, preload_req=1),
cp_hqd_pipe_priority=0x2, cp_hqd_queue_priority=0xf, cp_hqd_quantum=0x111,
cp_hqd_pq_base_lo=lo32(ring_addr>>8), cp_hqd_pq_base_hi=hi32(ring_addr>>8),
cp_hqd_pq_rptr_report_addr_lo=lo32(rptr_addr), cp_hqd_pq_rptr_report_addr_hi=hi32(rptr_addr),
cp_hqd_pq_wptr_poll_addr_lo=lo32(wptr_addr), cp_hqd_pq_wptr_poll_addr_hi=hi32(wptr_addr),
cp_hqd_pq_doorbell_control=self.adev.regCP_HQD_PQ_DOORBELL_CONTROL.encode(doorbell_offset=doorbell*2, doorbell_en=1),
cp_hqd_pq_control=self.adev.regCP_HQD_PQ_CONTROL.encode(rptr_block_size=5, unord_dispatch=0, queue_size=(ring_size//4).bit_length()-2),
cp_hqd_ib_control=self.adev.regCP_HQD_IB_CONTROL.encode(min_ib_avail_size=0x3), cp_hqd_hq_status0=0x20004000,
cp_mqd_control=self.adev.regCP_MQD_CONTROL.encode(priv_state=1), cp_hqd_vmid=0,
cp_hqd_eop_base_addr_lo=lo32(eop_addr>>8), cp_hqd_eop_base_addr_hi=hi32(eop_addr>>8),
cp_hqd_eop_control=self.adev.regCP_HQD_EOP_CONTROL.encode(eop_size=(eop_size//4).bit_length()-2))
# Copy mqd into memory
ctypes.memmove(self.adev.paddr2cpu(mqd.paddrs[0][0]), ctypes.addressof(mqd_struct), ctypes.sizeof(mqd_struct))
self.adev.gmc.flush_hdp()
self._grbm_select(me=1, pipe=pipe, queue=queue)
mqd_st_mv = to_mv(ctypes.addressof(mqd_struct), ctypes.sizeof(mqd_struct)).cast('I')
for i, reg in enumerate(range(self.adev.regCP_MQD_BASE_ADDR.addr, self.adev.regCP_HQD_PQ_WPTR_HI.addr + 1)):
self.adev.wreg(reg, mqd_st_mv[0x80 + i])
self.adev.regCP_HQD_ACTIVE.write(0x1)
self._grbm_select()
self.adev.reg(f"regCP_ME1_PIPE{pipe}_INT_CNTL").update(time_stamp_int_enable=1, generic0_int_enable=1)
def set_clockgating_state(self):
if hasattr(self.adev, 'regMM_ATC_L2_MISC_CG'): self.adev.regMM_ATC_L2_MISC_CG.write(enable=1, mem_ls_enable=1)
self.adev.regRLC_SAFE_MODE.write(message=1, cmd=1)
self.adev.wait_reg(self.adev.regRLC_SAFE_MODE, mask=0x1, value=0x0)
self.adev.regRLC_CGCG_CGLS_CTRL.update(cgcg_gfx_idle_threshold=0x36, cgcg_en=1, cgls_rep_compansat_delay=0xf, cgls_en=1)
self.adev.regCP_RB_WPTR_POLL_CNTL.update(poll_frequency=0x100, idle_poll_count=0x90)
self.adev.regCP_INT_CNTL.update(cntx_busy_int_enable=1, cntx_empty_int_enable=1, cmp_busy_int_enable=1, gfx_idle_int_enable=1)
self.adev.regSDMA0_RLC_CGCG_CTRL.update(cgcg_int_enable=1)
self.adev.regSDMA1_RLC_CGCG_CTRL.update(cgcg_int_enable=1)
self.adev.regRLC_CGTT_MGCG_OVERRIDE.update(perfmon_clock_state=0, gfxip_fgcg_override=0, gfxip_repeater_fgcg_override=0,
grbm_cgtt_sclk_override=0, rlc_cgtt_sclk_override=0, gfxip_mgcg_override=0, gfxip_cgls_override=0, gfxip_cgcg_override=0)
self.adev.regRLC_SAFE_MODE.write(message=0, cmd=1)
def _grbm_select(self, me=0, pipe=0, queue=0, vmid=0): self.adev.regGRBM_GFX_CNTL.write(meid=me, pipeid=pipe, vmid=vmid, queueid=queue)
def _config_gfx_rs64(self):
def _config_helper(eng_name, cntl_reg, eng_reg, pipe_cnt, me=0):
for pipe in range(pipe_cnt):
self._grbm_select(me=me, pipe=pipe)
self.adev.wreg_pair(f"regCP_{eng_reg}_PRGRM_CNTR_START", "", "_HI", self.adev.fw.ucode_start[eng_name] >> 2)
self._grbm_select()
self.adev.reg(f"regCP_{cntl_reg}_CNTL").update(**{f"{eng_name.lower()}_pipe{pipe}_reset": 1 for pipe in range(pipe_cnt)})
self.adev.reg(f"regCP_{cntl_reg}_CNTL").update(**{f"{eng_name.lower()}_pipe{pipe}_reset": 0 for pipe in range(pipe_cnt)})
_config_helper(eng_name="PFP", cntl_reg="ME", eng_reg="PFP", pipe_cnt=2)
_config_helper(eng_name="ME", cntl_reg="ME", eng_reg="ME", pipe_cnt=2)
_config_helper(eng_name="MEC", cntl_reg="MEC_RS64", eng_reg="MEC_RS64", pipe_cnt=4, me=1)
class AM_IH(AM_IP):
def init_sw(self):
self.ring_size = 512 << 10
def _alloc_ring(size): return (self.adev.mm.palloc(size, zero=not self.adev.partial_boot, boot=True),
self.adev.mm.palloc(0x1000, zero=not self.adev.partial_boot, boot=True))
self.rings = [(*_alloc_ring(self.ring_size), "", 0), (*_alloc_ring(self.ring_size), "_RING1", 1)]
def init_hw(self):
for ring_vm, rwptr_vm, suf, ring_id in self.rings:
self.adev.wreg_pair("regIH_RB_BASE", suf, f"_HI{suf}", self.adev.paddr2mc(ring_vm) >> 8)
self.adev.reg(f"regIH_RB_CNTL{suf}").write(mc_space=4, wptr_overflow_clear=1, rb_size=(self.ring_size//4).bit_length(),
mc_snoop=1, mc_ro=0, mc_vmid=0, **({'wptr_overflow_enable': 1, 'rptr_rearm': 1} if ring_id == 0 else {'rb_full_drain_enable': 1}))
if ring_id == 0: self.adev.wreg_pair("regIH_RB_WPTR_ADDR", "_LO", "_HI", self.adev.paddr2mc(rwptr_vm))
self.adev.reg(f"regIH_RB_WPTR{suf}").write(0)
self.adev.reg(f"regIH_RB_RPTR{suf}").write(0)
self.adev.reg(f"regIH_DOORBELL_RPTR{suf}").write(offset=(am.AMDGPU_NAVI10_DOORBELL_IH + ring_id) * 2, enable=1)
self.adev.regIH_STORM_CLIENT_LIST_CNTL.update(client18_is_storm_client=1)
self.adev.regIH_INT_FLOOD_CNTL.update(flood_cntl_enable=1)
self.adev.regIH_MSI_STORM_CTRL.update(delay=3)
# toggle interrupts
for _, rwptr_vm, suf, ring_id in self.rings:
self.adev.reg(f"regIH_RB_CNTL{suf}").update(rb_enable=1, **({'enable_intr': 1} if ring_id == 0 else {}))
self.adev.soc.doorbell_enable(port=1, awid=0x0, awaddr_31_28_value=0x0, offset=am.AMDGPU_NAVI10_DOORBELL_IH*2, size=2)
def interrupt_handler(self):
_, rwptr_vm, suf, _ = self.rings[0]
wptr = to_mv(self.adev.paddr2cpu(rwptr_vm), 8).cast('Q')[0]
if self.adev.reg(f"regIH_RB_WPTR{suf}").read_bitfields()['rb_overflow']:
self.adev.reg(f"regIH_RB_WPTR{suf}").update(rb_overflow=0)
self.adev.reg(f"regIH_RB_CNTL{suf}").update(wptr_overflow_clear=1)
self.adev.reg(f"regIH_RB_CNTL{suf}").update(wptr_overflow_clear=0)
self.adev.regIH_RB_RPTR.write(wptr % self.ring_size)
class AM_SDMA(AM_IP):
def init_sw(self): self.sdma_name = "F32" if self.adev.ip_ver[am.SDMA0_HWIP] < (7,0,0) else "MCU"
def init_hw(self):
for pipe in range(2):
self.adev.reg(f"regSDMA{pipe}_WATCHDOG_CNTL").update(queue_hang_count=100) # 10s, 100ms per unit
self.adev.reg(f"regSDMA{pipe}_UTCL1_CNTL").update(resp_mode=3, redo_delay=9)
# rd=noa, wr=bypass
self.adev.reg(f"regSDMA{pipe}_UTCL1_PAGE").update(rd_l2_policy=0x2, wr_l2_policy=0x3, **({'llc_noalloc':1} if self.sdma_name == "F32" else {}))
self.adev.reg(f"regSDMA{pipe}_{self.sdma_name}_CNTL").update(halt=0, **{f"{'th1_' if self.sdma_name == 'F32' else ''}reset":0})
self.adev.reg(f"regSDMA{pipe}_CNTL").update(ctxempty_int_enable=1, trap_enable=1)
self.adev.soc.doorbell_enable(port=2, awid=0xe, awaddr_31_28_value=0x3, offset=am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0*2, size=4)
def fini_hw(self):
self.adev.regSDMA0_QUEUE0_RB_CNTL.update(rb_enable=0)
self.adev.regSDMA0_QUEUE0_IB_CNTL.update(ib_enable=0)
self.adev.regGRBM_SOFT_RESET.write(soft_reset_sdma0=1)
time.sleep(0.01)
self.adev.regGRBM_SOFT_RESET.write(0x0)
def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, doorbell:int, pipe:int, queue:int):
# Setup the ring
self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_MINOR_PTR_UPDATE").write(0x1)
self.adev.wreg_pair(f"regSDMA{pipe}_QUEUE{queue}_RB_RPTR", "", "_HI", 0)
self.adev.wreg_pair(f"regSDMA{pipe}_QUEUE{queue}_RB_WPTR", "", "_HI", 0)
self.adev.wreg_pair(f"regSDMA{pipe}_QUEUE{queue}_RB_BASE", "", "_HI", ring_addr >> 8)
self.adev.wreg_pair(f"regSDMA{pipe}_QUEUE{queue}_RB_RPTR_ADDR", "_LO", "_HI", rptr_addr)
self.adev.wreg_pair(f"regSDMA{pipe}_QUEUE{queue}_RB_WPTR_POLL_ADDR", "_LO", "_HI", wptr_addr)
self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_DOORBELL_OFFSET").update(offset=doorbell * 2)
self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_DOORBELL").update(enable=1)
self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_MINOR_PTR_UPDATE").write(0x0)
self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_RB_CNTL").write(rb_vmid=0, rptr_writeback_enable=1, rptr_writeback_timer=4,
**{f'{self.sdma_name.lower()}_wptr_poll_enable':1}, rb_size=(ring_size//4).bit_length()-1, rb_enable=1, rb_priv=1)
self.adev.reg(f"regSDMA{pipe}_QUEUE{queue}_IB_CNTL").update(ib_enable=1)
class AM_PSP(AM_IP):
def init_sw(self):
self.reg_pref = "regMP0_SMN_C2PMSG" if self.adev.ip_ver[am.MP0_HWIP] < (14,0,0) else "regMPASP_SMN_C2PMSG"
self.msg1_paddr = self.adev.mm.palloc(am.PSP_1_MEG, align=am.PSP_1_MEG, zero=not self.adev.partial_boot, boot=True)
self.cmd_paddr = self.adev.mm.palloc(am.PSP_CMD_BUFFER_SIZE, zero=not self.adev.partial_boot, boot=True)
self.fence_paddr = self.adev.mm.palloc(am.PSP_FENCE_BUFFER_SIZE, zero=not self.adev.partial_boot, boot=True)
self.ring_size = 0x10000
self.ring_paddr = self.adev.mm.palloc(self.ring_size, zero=not self.adev.partial_boot, boot=True)
self.max_tmr_size = 0x1300000
self.boot_time_tmr = self.adev.ip_ver[am.GC_HWIP] >= (12,0,0)
if not self.boot_time_tmr:
self.tmr_paddr = self.adev.mm.palloc(self.max_tmr_size, align=am.PSP_TMR_ALIGNMENT, zero=not self.adev.partial_boot, boot=True)
def init_hw(self):
spl_key = am.PSP_FW_TYPE_PSP_SPL if self.adev.ip_ver[am.MP0_HWIP] >= (14,0,0) else am.PSP_FW_TYPE_PSP_KDB
sos_components_load_order = [
(am.PSP_FW_TYPE_PSP_KDB, am.PSP_BL__LOAD_KEY_DATABASE), (spl_key, am.PSP_BL__LOAD_TOS_SPL_TABLE),
(am.PSP_FW_TYPE_PSP_SYS_DRV, am.PSP_BL__LOAD_SYSDRV), (am.PSP_FW_TYPE_PSP_SOC_DRV, am.PSP_BL__LOAD_SOCDRV),
(am.PSP_FW_TYPE_PSP_INTF_DRV, am.PSP_BL__LOAD_INTFDRV), (am.PSP_FW_TYPE_PSP_DBG_DRV, am.PSP_BL__LOAD_DBGDRV),
(am.PSP_FW_TYPE_PSP_RAS_DRV, am.PSP_BL__LOAD_RASDRV), (am.PSP_FW_TYPE_PSP_SOS, am.PSP_BL__LOAD_SOSDRV)]
if not self.is_sos_alive():
for fw, compid in sos_components_load_order: self._bootloader_load_component(fw, compid)
while not self.is_sos_alive(): time.sleep(0.01)
self._ring_create()
self._tmr_init()
# SMU fw should be loaded before TMR.
self._load_ip_fw_cmd(*self.adev.fw.smu_psp_desc)
if not self.boot_time_tmr: self._tmr_load_cmd()
for psp_desc in self.adev.fw.descs: self._load_ip_fw_cmd(*psp_desc)
self._rlc_autoload_cmd()
def is_sos_alive(self): return self.adev.reg(f"{self.reg_pref}_81").read() != 0x0
def _wait_for_bootloader(self): self.adev.wait_reg(self.adev.reg(f"{self.reg_pref}_35"), mask=0x80000000, value=0x80000000)
def _prep_msg1(self, data):
ctypes.memset(cpu_addr:=self.adev.paddr2cpu(self.msg1_paddr), 0, am.PSP_1_MEG)
to_mv(cpu_addr, len(data))[:] = data
self.adev.gmc.flush_hdp()
def _bootloader_load_component(self, fw, compid):
if fw not in self.adev.fw.sos_fw: return 0
self._wait_for_bootloader()
if DEBUG >= 2: print(f"am {self.adev.devfmt}: loading sos component: {am.psp_fw_type__enumvalues[fw]}")
self._prep_msg1(self.adev.fw.sos_fw[fw])
self.adev.reg(f"{self.reg_pref}_36").write(self.adev.paddr2mc(self.msg1_paddr) >> 20)
self.adev.reg(f"{self.reg_pref}_35").write(compid)
return self._wait_for_bootloader() if compid != am.PSP_BL__LOAD_SOSDRV else 0
def _tmr_init(self):
# Load TOC and calculate TMR size
self._prep_msg1(fwm:=self.adev.fw.sos_fw[am.PSP_FW_TYPE_PSP_TOC])
self.tmr_size = self._load_toc_cmd(len(fwm)).resp.tmr_size
assert self.tmr_size <= self.max_tmr_size
def _ring_create(self):
# If the ring is already created, destroy it
if self.adev.reg(f"{self.reg_pref}_71").read() != 0:
self.adev.reg(f"{self.reg_pref}_64").write(am.GFX_CTRL_CMD_ID_DESTROY_RINGS)
# There might be handshake issue with hardware which needs delay
time.sleep(0.02)
# Wait until the sOS is ready
self.adev.wait_reg(self.adev.reg(f"{self.reg_pref}_64"), mask=0x80000000, value=0x80000000)
self.adev.wreg_pair(self.reg_pref, "_69", "_70", self.adev.paddr2mc(self.ring_paddr))
self.adev.reg(f"{self.reg_pref}_71").write(self.ring_size)
self.adev.reg(f"{self.reg_pref}_64").write(am.PSP_RING_TYPE__KM << 16)
# There might be handshake issue with hardware which needs delay
time.sleep(0.02)
self.adev.wait_reg(self.adev.reg(f"{self.reg_pref}_64"), mask=0x8000FFFF, value=0x80000000)
def _ring_submit(self):
prev_wptr = self.adev.reg(f"{self.reg_pref}_67").read()
ring_entry_addr = self.adev.paddr2cpu(self.ring_paddr) + prev_wptr * 4
ctypes.memset(ring_entry_addr, 0, ctypes.sizeof(am.struct_psp_gfx_rb_frame))
write_loc = am.struct_psp_gfx_rb_frame.from_address(ring_entry_addr)
write_loc.cmd_buf_addr_hi, write_loc.cmd_buf_addr_lo = data64(self.adev.paddr2mc(self.cmd_paddr))
write_loc.fence_addr_hi, write_loc.fence_addr_lo = data64(self.adev.paddr2mc(self.fence_paddr))
write_loc.fence_value = prev_wptr
# Move the wptr
self.adev.reg(f"{self.reg_pref}_67").write(prev_wptr + ctypes.sizeof(am.struct_psp_gfx_rb_frame) // 4)
while to_mv(self.adev.paddr2cpu(self.fence_paddr), 4).cast('I')[0] != prev_wptr: pass
time.sleep(0.005)
resp = am.struct_psp_gfx_cmd_resp.from_address(self.adev.paddr2cpu(self.cmd_paddr))
if resp.resp.status != 0: raise RuntimeError(f"PSP command failed {resp.cmd_id} {resp.resp.status}")
return resp
def _prep_ring_cmd(self, hdr):
ctypes.memset(self.adev.paddr2cpu(self.cmd_paddr), 0, 0x1000)
cmd = am.struct_psp_gfx_cmd_resp.from_address(self.adev.paddr2cpu(self.cmd_paddr))
cmd.cmd_id = hdr
return cmd
def _load_ip_fw_cmd(self, fw_types, fw_bytes):
self._prep_msg1(fw_bytes)
for fw_type in fw_types:
if DEBUG >= 2: print(f"am {self.adev.devfmt}: loading fw: {am.psp_gfx_fw_type__enumvalues[fw_type]}")
cmd = self._prep_ring_cmd(am.GFX_CMD_ID_LOAD_IP_FW)
cmd.cmd.cmd_load_ip_fw.fw_phy_addr_hi, cmd.cmd.cmd_load_ip_fw.fw_phy_addr_lo = data64(self.adev.paddr2mc(self.msg1_paddr))
cmd.cmd.cmd_load_ip_fw.fw_size = len(fw_bytes)
cmd.cmd.cmd_load_ip_fw.fw_type = fw_type
self._ring_submit()
def _tmr_load_cmd(self):
cmd = self._prep_ring_cmd(am.GFX_CMD_ID_SETUP_TMR)
cmd.cmd.cmd_setup_tmr.buf_phy_addr_hi, cmd.cmd.cmd_setup_tmr.buf_phy_addr_lo = data64(self.adev.paddr2mc(self.tmr_paddr))
cmd.cmd.cmd_setup_tmr.system_phy_addr_hi, cmd.cmd.cmd_setup_tmr.system_phy_addr_lo = data64(self.tmr_paddr)
cmd.cmd.cmd_setup_tmr.bitfield.virt_phy_addr = 1
cmd.cmd.cmd_setup_tmr.buf_size = self.tmr_size
return self._ring_submit()
def _load_toc_cmd(self, toc_size):
cmd = self._prep_ring_cmd(am.GFX_CMD_ID_LOAD_TOC)
cmd.cmd.cmd_load_toc.toc_phy_addr_hi, cmd.cmd.cmd_load_toc.toc_phy_addr_lo = data64(self.adev.paddr2mc(self.msg1_paddr))
cmd.cmd.cmd_load_toc.toc_size = toc_size
return self._ring_submit()
def _rlc_autoload_cmd(self):
self._prep_ring_cmd(am.GFX_CMD_ID_AUTOLOAD_RLC)
return self._ring_submit()