// clang++ -O2 repro.cc && ./a.out #include #include #include #include #include #include #include #include static inline double millis_since_boot() { struct timespec t; clock_gettime(CLOCK_BOOTTIME, &t); return t.tv_sec * 1000.0 + t.tv_nsec * 1e-6; } #define MODEL_WIDTH 320 #define MODEL_HEIGHT 640 // null function still breaks it #define input_lambda(x) x // this is copied from models/dmonitoring.cc, and is the code that triggers the issue void inner(uint8_t *resized_buf, float *net_input_buf) { int resized_width = MODEL_WIDTH; int resized_height = MODEL_HEIGHT; // one shot conversion, O(n) anyway // yuvframe2tensor, normalize for (int r = 0; r < MODEL_HEIGHT/2; r++) { for (int c = 0; c < MODEL_WIDTH/2; c++) { // Y_ul net_input_buf[(c*MODEL_HEIGHT/2) + r] = input_lambda(resized_buf[(2*r*resized_width) + (2*c)]); // Y_ur net_input_buf[(c*MODEL_HEIGHT/2) + r + (2*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width) + (2*c+1)]); // Y_dl net_input_buf[(c*MODEL_HEIGHT/2) + r + ((MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c)]); // Y_dr net_input_buf[(c*MODEL_HEIGHT/2) + r + (3*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c+1)]); // U net_input_buf[(c*MODEL_HEIGHT/2) + r + (4*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + (r*resized_width/2) + c]); // V net_input_buf[(c*MODEL_HEIGHT/2) + r + (5*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + ((resized_width/2)*(resized_height/2)) + (r*resized_width/2) + c]); } } } float trial() { int resized_width = MODEL_WIDTH; int resized_height = MODEL_HEIGHT; int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v -> y|y|y|y|u|v // allocate the buffers uint8_t *resized_buf = (uint8_t*)malloc(resized_width*resized_height*3/2); float *net_input_buf = (float*)malloc(yuv_buf_len*sizeof(float)); printf("allocate -- %p 0x%x -- %p 0x%lx\n", resized_buf, resized_width*resized_height*3/2, net_input_buf, yuv_buf_len*sizeof(float)); // test for bad buffers static int CNT = 20; float avg = 0.0; for (int i = 0; i < CNT; i++) { double s4 = millis_since_boot(); inner(resized_buf, net_input_buf); double s5 = millis_since_boot(); avg += s5-s4; } avg /= CNT; // once it's bad, it's reliably bad if (avg > 10) { printf("HIT %f\n", avg); printf("BAD\n"); for (int i = 0; i < 200; i++) { double s4 = millis_since_boot(); inner(resized_buf, net_input_buf); double s5 = millis_since_boot(); printf("%.2f ", s5-s4); } printf("\n"); exit(0); } // don't free so we get a different buffer each time //free(resized_buf); //free(net_input_buf); return avg; } int main() { while (true) { float ret = trial(); printf("got %f\n", ret); } }