openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

230 lines
8.2 KiB

# AssertionError: Error Domain=AGXMetalG15X_B0 Code=3 "Compiler encountered an internal error"
src = """
#include <metal_stdlib>
using namespace metal;
kernel void r_64_32_8_16_4_6_6_4(device float* data0, const device float* data1,
uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) {
int gidx0 = gid.x; /* 64 */
int lidx2 = lid.x; /* 8 */
int gidx1 = gid.y; /* 32 */
int lidx3 = lid.y; /* 16 */
int alu0 = ((gidx0*4096)+(gidx1*16)+(lidx2*512)+lidx3);
int alu1 = ((gidx0*147456)+(gidx1*576)+(lidx2*18432)+(lidx3*36));
float acc0 = 0.0f;
float acc1 = 0.0f;
float acc2 = 0.0f;
float acc3 = 0.0f;
float acc4 = 0.0f;
float acc5 = 0.0f;
float acc6 = 0.0f;
float acc7 = 0.0f;
float acc8 = 0.0f;
float acc9 = 0.0f;
float acc10 = 0.0f;
float acc11 = 0.0f;
float acc12 = 0.0f;
float acc13 = 0.0f;
float acc14 = 0.0f;
float acc15 = 0.0f;
float acc16 = 0.0f;
float acc17 = 0.0f;
float acc18 = 0.0f;
float acc19 = 0.0f;
float acc20 = 0.0f;
float acc21 = 0.0f;
float acc22 = 0.0f;
float acc23 = 0.0f;
float acc24 = 0.0f;
float acc25 = 0.0f;
float acc26 = 0.0f;
float acc27 = 0.0f;
float acc28 = 0.0f;
float acc29 = 0.0f;
float acc30 = 0.0f;
float acc31 = 0.0f;
float acc32 = 0.0f;
float acc33 = 0.0f;
float acc34 = 0.0f;
float acc35 = 0.0f;
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
int alu2 = (ridx0*6);
int alu3 = (alu2+1);
int alu4 = (alu2+2);
int alu5 = (alu2+3);
int alu6 = (alu2+4);
int alu7 = (alu2+5);
int alu8 = (alu2%7);
int alu9 = ((alu8+1)%7);
int alu10 = ((alu8+2)%7);
int alu11 = ((alu8+3)%7);
int alu12 = ((alu8+4)%7);
int alu13 = ((alu8+5)%7);
int alu14 = ((((alu0+(alu3/21))%262144)*144)+(((alu3/7)%3)*3)+(alu9*36));
int alu15 = ((((alu0+(alu4/21))%262144)*144)+(((alu4/7)%3)*3)+(alu10*36));
int alu16 = ((((alu0+(alu5/21))%262144)*144)+(((alu5/7)%3)*3)+(alu11*36));
int alu17 = ((((alu0+(alu6/21))%262144)*144)+(((alu6/7)%3)*3)+(alu12*36));
int alu18 = ((((alu0+(alu7/21))%262144)*144)+(((alu7/7)%3)*3)+(alu13*36));
int alu19 = (alu8%7);
int alu20 = ((((alu0+(alu2/21))%262144)*144)+(((alu2/7)%3)*3)+(alu19*36));
bool alu21 = ((alu2<16)&(alu13<4));
bool alu22 = ((alu2<17)&(alu12<4));
bool alu23 = ((alu2<18)&(alu11<4));
bool alu24 = ((alu2<19)&(alu10<4));
bool alu25 = ((alu2<20)&(alu9<4));
bool alu26 = ((alu2<21)&(alu19<4));
float val0 = (alu25?*(data1+alu14+1):0.0f);
float val1 = (alu25?*(data1+alu14+2):0.0f);
float val2 = (alu25?*(data1+alu14+9):0.0f);
float val3 = (alu25?*(data1+alu14+10):0.0f);
float val4 = (alu25?*(data1+alu14+11):0.0f);
float val5 = (alu25?*(data1+alu14+18):0.0f);
float val6 = (alu25?*(data1+alu14+19):0.0f);
float val7 = (alu25?*(data1+alu14+20):0.0f);
float val8 = (alu25?*(data1+alu14+27):0.0f);
float val9 = (alu25?*(data1+alu14+28):0.0f);
float val10 = (alu25?*(data1+alu14+29):0.0f);
float val11 = (alu24?*(data1+alu15+1):0.0f);
float val12 = (alu24?*(data1+alu15+2):0.0f);
float val13 = (alu24?*(data1+alu15+9):0.0f);
float val14 = (alu24?*(data1+alu15+10):0.0f);
float val15 = (alu24?*(data1+alu15+11):0.0f);
float val16 = (alu24?*(data1+alu15+18):0.0f);
float val17 = (alu24?*(data1+alu15+19):0.0f);
float val18 = (alu24?*(data1+alu15+20):0.0f);
float val19 = (alu24?*(data1+alu15+27):0.0f);
float val20 = (alu24?*(data1+alu15+28):0.0f);
float val21 = (alu24?*(data1+alu15+29):0.0f);
float val22 = (alu23?*(data1+alu16+1):0.0f);
float val23 = (alu23?*(data1+alu16+2):0.0f);
float val24 = (alu23?*(data1+alu16+9):0.0f);
float val25 = (alu23?*(data1+alu16+10):0.0f);
float val26 = (alu23?*(data1+alu16+11):0.0f);
float val27 = (alu23?*(data1+alu16+18):0.0f);
float val28 = (alu23?*(data1+alu16+19):0.0f);
float val29 = (alu23?*(data1+alu16+20):0.0f);
float val30 = (alu23?*(data1+alu16+27):0.0f);
float val31 = (alu23?*(data1+alu16+28):0.0f);
float val32 = (alu23?*(data1+alu16+29):0.0f);
float val33 = (alu22?*(data1+alu17+1):0.0f);
float val34 = (alu22?*(data1+alu17+2):0.0f);
float val35 = (alu22?*(data1+alu17+9):0.0f);
float val36 = (alu22?*(data1+alu17+10):0.0f);
float val37 = (alu22?*(data1+alu17+11):0.0f);
float val38 = (alu22?*(data1+alu17+18):0.0f);
float val39 = (alu22?*(data1+alu17+19):0.0f);
float val40 = (alu22?*(data1+alu17+20):0.0f);
float val41 = (alu22?*(data1+alu17+27):0.0f);
float val42 = (alu22?*(data1+alu17+28):0.0f);
float val43 = (alu22?*(data1+alu17+29):0.0f);
float val44 = (alu21?*(data1+alu18+1):0.0f);
float val45 = (alu21?*(data1+alu18+2):0.0f);
float val46 = (alu21?*(data1+alu18+9):0.0f);
float val47 = (alu21?*(data1+alu18+10):0.0f);
float val48 = (alu21?*(data1+alu18+11):0.0f);
float val49 = (alu21?*(data1+alu18+18):0.0f);
float val50 = (alu21?*(data1+alu18+19):0.0f);
float val51 = (alu21?*(data1+alu18+20):0.0f);
float val52 = (alu21?*(data1+alu18+27):0.0f);
float val53 = (alu21?*(data1+alu18+28):0.0f);
float val54 = (alu21?*(data1+alu18+29):0.0f);
float val55 = (alu26?*(data1+alu20+1):0.0f);
float val56 = (alu26?*(data1+alu20+2):0.0f);
float val57 = (alu26?*(data1+alu20+9):0.0f);
float val58 = (alu26?*(data1+alu20+10):0.0f);
float val59 = (alu26?*(data1+alu20+11):0.0f);
float val60 = (alu26?*(data1+alu20+18):0.0f);
float val61 = (alu26?*(data1+alu20+19):0.0f);
float val62 = (alu26?*(data1+alu20+20):0.0f);
float val63 = (alu26?*(data1+alu20+27):0.0f);
float val64 = (alu26?*(data1+alu20+28):0.0f);
float val65 = (alu26?*(data1+alu20+29):0.0f);
float val66 = (alu25?*(data1+alu14):0.0f);
float val67 = (alu24?*(data1+alu15):0.0f);
float val68 = (alu23?*(data1+alu16):0.0f);
float val69 = (alu22?*(data1+alu17):0.0f);
float val70 = (alu21?*(data1+alu18):0.0f);
float val71 = (alu26?*(data1+alu20):0.0f);
acc0 = (acc0+val71);
acc1 = (acc1+val66);
acc2 = (acc2+val67);
acc3 = (acc3+val68);
acc4 = (acc4+val69);
acc5 = (acc5+val70);
acc6 = (acc6+val57+val55);
acc7 = (acc7+val2+val0);
acc8 = (acc8+val13+val11);
acc9 = (acc9+val24+val22);
acc10 = (acc10+val35+val33);
acc11 = (acc11+val46+val44);
acc12 = (acc12+val60+val58+val56);
acc13 = (acc13+val5+val3+val1);
acc14 = (acc14+val16+val14+val12);
acc15 = (acc15+val27+val25+val23);
acc16 = (acc16+val38+val36+val34);
acc17 = (acc17+val49+val47+val45);
acc18 = (acc18+val63+val61+val59);
acc19 = (acc19+val8+val6+val4);
acc20 = (acc20+val19+val17+val15);
acc21 = (acc21+val30+val28+val26);
acc22 = (acc22+val41+val39+val37);
acc23 = (acc23+val52+val50+val48);
acc24 = (acc24+val64+val62);
acc25 = (acc25+val9+val7);
acc26 = (acc26+val20+val18);
acc27 = (acc27+val31+val29);
acc28 = (acc28+val42+val40);
acc29 = (acc29+val53+val51);
acc30 = (acc30+val65);
acc31 = (acc31+val10);
acc32 = (acc32+val21);
acc33 = (acc33+val32);
acc34 = (acc34+val43);
acc35 = (acc35+val54);
}
*(data0+alu1+1) = acc6;
*(data0+alu1+2) = acc12;
*(data0+alu1+3) = acc18;
*(data0+alu1+4) = acc24;
*(data0+alu1+5) = acc30;
*(data0+alu1+6) = acc1;
*(data0+alu1+7) = acc7;
*(data0+alu1+8) = acc13;
*(data0+alu1+9) = acc19;
*(data0+alu1+10) = acc25;
*(data0+alu1+11) = acc31;
*(data0+alu1+12) = acc2;
*(data0+alu1+13) = acc8;
*(data0+alu1+14) = acc14;
*(data0+alu1+15) = acc20;
*(data0+alu1+16) = acc26;
*(data0+alu1+17) = acc32;
*(data0+alu1+18) = acc3;
*(data0+alu1+19) = acc9;
*(data0+alu1+20) = acc15;
*(data0+alu1+21) = acc21;
*(data0+alu1+22) = acc27;
*(data0+alu1+23) = acc33;
*(data0+alu1+24) = acc4;
*(data0+alu1+25) = acc10;
*(data0+alu1+26) = acc16;
*(data0+alu1+27) = acc22;
*(data0+alu1+28) = acc28;
*(data0+alu1+29) = acc34;
*(data0+alu1+30) = acc5;
*(data0+alu1+31) = acc11;
*(data0+alu1+32) = acc17;
*(data0+alu1+33) = acc23;
*(data0+alu1+34) = acc29;
*(data0+alu1+35) = acc35;
*(data0+alu1) = acc0;
}
"""
from tinygrad.runtime.ops_metal import MetalDevice, MetalCompiler, MetalProgram
if __name__ == "__main__":
dev = MetalDevice("METAL")
lib = MetalCompiler().compile(src)
prg = MetalProgram(dev, "r_64_32_8_16_4_6_6_4", lib)