openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

81 lines
2.4 KiB

.global _start
_start:
.rodata
.align 0x10
.global code.kd
.type code.kd,STT_OBJECT
# amd_kernel_code_t (must be at 0x440 for kernel_code_entry_byte_offset to be right)
code.kd:
# amd_kernel_..., amd_machine_...
.long 0,0,0,0
# kernel_code_entry_byte_offset, kernel_code_prefetch_byte_offset
.long 0x00000bc0,0x00000000,0x00000000,0x00000000
# kernel_code_prefetch_byte_size, max_scratch_backing_memory_byte_size
.long 0,0,0,0
# compute_pgm_rsrc1, compute_pgm_rsrc2, kernel_code_properties, workitem_private_segment_byte_size
.long 0x60af0000,0x0000009e,0x00000408,0x00000000
# compute_pgm_rsrc1 |= AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_32 | AMD_COMPUTE_PGM_RSRC_ONE_FLOAT_DENORM_MODE_16_64
# compute_pgm_rsrc1 |= AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_DX10_CLAMP | AMD_COMPUTE_PGM_RSRC_ONE_ENABLE_IEEE_MODE
# compute_pgm_rsrc2 |= AMD_COMPUTE_PGM_RSRC_TWO_USER_SGPR_COUNT = 0xF
# compute_pgm_rsrc2 |= AMD_COMPUTE_PGM_RSRC_TWO_ENABLE_SGPR_WORKGROUP_ID_X
# kernel_code_properties |= AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_KERNARG_SEGMENT_PTR = 1
# kernel_code_properties |= AMD_KERNEL_CODE_PROPERTIES_RESERVED1 = 1
.text
.global code
.type code,STT_FUNC
code:
# https://llvm.org/docs/AMDGPUUsage.html#initial-kernel-execution-state
# s[0:1] contains the kernarg_address
# TODO: can we use s[2:3] if this was really a wave since we only alloced 2 SGPRs?
s_load_b64 s[2:3], s[0:1], null
s_mov_b32 s8, 0
loop:
s_addk_i32 s8, 1
s_cmp_eq_u32 s8, 100000
// FLOPS
s_cbranch_scc0 loop
# wait for the s_load_b64
s_waitcnt lgkmcnt(0)
v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 2.0
global_store_b32 v0, v1, s[2:3]
# Deallocate all VGPRs for this wave. Use only when next instruction is S_ENDPGM.
s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
s_endpgm
s_code_end
.amdgpu_metadata
amdhsa.kernels:
- .args:
- .address_space: global
.name: a
.offset: 0
.size: 8
.type_name: 'float*'
.value_kind: global_buffer
.group_segment_fixed_size: 0
.kernarg_segment_align: 8
.kernarg_segment_size: 8
.language: OpenCL C
.language_version:
- 1
- 2
.max_flat_workgroup_size: 256
.name: code
.private_segment_fixed_size: 0
.sgpr_count: 2
.sgpr_spill_count: 0
.symbol: code.kd
.uses_dynamic_stack: false
.vgpr_count: 256
.vgpr_spill_count: 0
.wavefront_size: 32
amdhsa.target: amdgcn-amd-amdhsa--gfx1100
amdhsa.version:
- 1
- 2
.end_amdgpu_metadata