You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
101 lines
3.0 KiB
101 lines
3.0 KiB
// clang++ -O2 repro.cc && ./a.out
|
|
|
|
#include <sched.h>
|
|
#include <sys/types.h>
|
|
#include <unistd.h>
|
|
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <ctime>
|
|
|
|
static inline double millis_since_boot() {
|
|
struct timespec t;
|
|
clock_gettime(CLOCK_BOOTTIME, &t);
|
|
return t.tv_sec * 1000.0 + t.tv_nsec * 1e-6;
|
|
}
|
|
|
|
#define MODEL_WIDTH 320
|
|
#define MODEL_HEIGHT 640
|
|
|
|
// null function still breaks it
|
|
#define input_lambda(x) x
|
|
|
|
// this is copied from models/dmonitoring.cc, and is the code that triggers the issue
|
|
void inner(uint8_t *resized_buf, float *net_input_buf) {
|
|
int resized_width = MODEL_WIDTH;
|
|
int resized_height = MODEL_HEIGHT;
|
|
|
|
// one shot conversion, O(n) anyway
|
|
// yuvframe2tensor, normalize
|
|
for (int r = 0; r < MODEL_HEIGHT/2; r++) {
|
|
for (int c = 0; c < MODEL_WIDTH/2; c++) {
|
|
// Y_ul
|
|
net_input_buf[(c*MODEL_HEIGHT/2) + r] = input_lambda(resized_buf[(2*r*resized_width) + (2*c)]);
|
|
// Y_ur
|
|
net_input_buf[(c*MODEL_HEIGHT/2) + r + (2*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width) + (2*c+1)]);
|
|
// Y_dl
|
|
net_input_buf[(c*MODEL_HEIGHT/2) + r + ((MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c)]);
|
|
// Y_dr
|
|
net_input_buf[(c*MODEL_HEIGHT/2) + r + (3*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c+1)]);
|
|
// U
|
|
net_input_buf[(c*MODEL_HEIGHT/2) + r + (4*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + (r*resized_width/2) + c]);
|
|
// V
|
|
net_input_buf[(c*MODEL_HEIGHT/2) + r + (5*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + ((resized_width/2)*(resized_height/2)) + (r*resized_width/2) + c]);
|
|
}
|
|
}
|
|
}
|
|
|
|
float trial() {
|
|
int resized_width = MODEL_WIDTH;
|
|
int resized_height = MODEL_HEIGHT;
|
|
|
|
int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v -> y|y|y|y|u|v
|
|
|
|
// allocate the buffers
|
|
uint8_t *resized_buf = (uint8_t*)malloc(resized_width*resized_height*3/2);
|
|
float *net_input_buf = (float*)malloc(yuv_buf_len*sizeof(float));
|
|
printf("allocate -- %p 0x%x -- %p 0x%lx\n", resized_buf, resized_width*resized_height*3/2, net_input_buf, yuv_buf_len*sizeof(float));
|
|
|
|
// test for bad buffers
|
|
static int CNT = 20;
|
|
float avg = 0.0;
|
|
for (int i = 0; i < CNT; i++) {
|
|
double s4 = millis_since_boot();
|
|
inner(resized_buf, net_input_buf);
|
|
double s5 = millis_since_boot();
|
|
avg += s5-s4;
|
|
}
|
|
avg /= CNT;
|
|
|
|
// once it's bad, it's reliably bad
|
|
if (avg > 10) {
|
|
printf("HIT %f\n", avg);
|
|
printf("BAD\n");
|
|
|
|
for (int i = 0; i < 200; i++) {
|
|
double s4 = millis_since_boot();
|
|
inner(resized_buf, net_input_buf);
|
|
double s5 = millis_since_boot();
|
|
printf("%.2f ", s5-s4);
|
|
}
|
|
printf("\n");
|
|
|
|
exit(0);
|
|
}
|
|
|
|
// don't free so we get a different buffer each time
|
|
//free(resized_buf);
|
|
//free(net_input_buf);
|
|
|
|
return avg;
|
|
}
|
|
|
|
int main() {
|
|
while (true) {
|
|
float ret = trial();
|
|
printf("got %f\n", ret);
|
|
}
|
|
}
|
|
|
|
|