You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
23 lines
602 B
23 lines
602 B
#!/usr/bin/env python3
|
|
import numpy as np
|
|
from tinygrad.runtime.ops_cuda import CUDAProgram, RawCUDABuffer
|
|
|
|
if __name__ == "__main__":
|
|
test = RawCUDABuffer.fromCPU(np.zeros(10, np.float32))
|
|
prg = CUDAProgram("test", """
|
|
.version 7.8
|
|
.target sm_86
|
|
.address_size 64
|
|
.visible .entry test(.param .u64 x) {
|
|
.reg .b32 %r<2>;
|
|
.reg .b64 %rd<3>;
|
|
|
|
ld.param.u64 %rd1, [x];
|
|
cvta.to.global.u64 %rd2, %rd1;
|
|
mov.u32 %r1, 0x40000000; // 2.0 in float
|
|
st.global.u32 [%rd2], %r1;
|
|
ret;
|
|
}""", binary=True)
|
|
prg([1], [1], test)
|
|
print(test.toCPU())
|
|
|
|
|