You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
111 lines
3.4 KiB
111 lines
3.4 KiB
5 years ago
|
// clang++ -mcpu=cortex-a57 -O2 repro.cc
|
||
|
|
||
|
#include <vector>
|
||
|
#include <time.h>
|
||
|
#include <sched.h>
|
||
|
|
||
|
static inline double millis_since_boot() {
|
||
|
struct timespec t;
|
||
|
clock_gettime(CLOCK_BOOTTIME, &t);
|
||
|
return t.tv_sec * 1000.0 + t.tv_nsec * 1e-6;
|
||
|
}
|
||
|
|
||
|
int set_realtime_priority(int level) {
|
||
|
long tid = getpid();
|
||
|
|
||
|
// should match python using chrt
|
||
|
struct sched_param sa;
|
||
|
memset(&sa, 0, sizeof(sa));
|
||
|
sa.sched_priority = level;
|
||
|
return sched_setscheduler(tid, SCHED_FIFO, &sa);
|
||
|
}
|
||
|
|
||
|
#define MODEL_WIDTH 320
|
||
|
#define MODEL_HEIGHT 640
|
||
|
#define input_lambda(x) (x - 128.f) * 0.0078125f
|
||
|
|
||
|
template <class T>
|
||
|
static inline T *get_buffer(std::vector<T> &buf, const size_t size) {
|
||
|
if (buf.size() < size) {
|
||
|
buf.resize(size);
|
||
|
}
|
||
|
return buf.data();
|
||
|
}
|
||
|
|
||
|
void inner(uint8_t *resized_buf, float *net_input_buf) {
|
||
|
int resized_width = MODEL_WIDTH;
|
||
|
int resized_height = MODEL_HEIGHT;
|
||
|
|
||
|
// one shot conversion, O(n) anyway
|
||
|
// yuvframe2tensor, normalize
|
||
|
for (int r = 0; r < MODEL_HEIGHT/2; r++) {
|
||
|
for (int c = 0; c < MODEL_WIDTH/2; c++) {
|
||
|
// Y_ul
|
||
|
net_input_buf[(c*MODEL_HEIGHT/2) + r] = input_lambda(resized_buf[(2*r*resized_width) + (2*c)]);
|
||
|
// Y_ur
|
||
|
net_input_buf[(c*MODEL_HEIGHT/2) + r + (2*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width) + (2*c+1)]);
|
||
|
// Y_dl
|
||
|
net_input_buf[(c*MODEL_HEIGHT/2) + r + ((MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c)]);
|
||
|
// Y_dr
|
||
|
net_input_buf[(c*MODEL_HEIGHT/2) + r + (3*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c+1)]);
|
||
|
// U
|
||
|
net_input_buf[(c*MODEL_HEIGHT/2) + r + (4*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + (r*resized_width/2) + c]);
|
||
|
// V
|
||
|
net_input_buf[(c*MODEL_HEIGHT/2) + r + (5*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + ((resized_width/2)*(resized_height/2)) + (r*resized_width/2) + c]);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
float trial() {
|
||
|
std::vector<uint8_t> vec_resized_buf;
|
||
|
std::vector<float> vec_net_input_buf;
|
||
|
|
||
|
int resized_width = MODEL_WIDTH;
|
||
|
int resized_height = MODEL_HEIGHT;
|
||
|
uint8_t *resized_buf = get_buffer(vec_resized_buf, resized_width*resized_height*3/2);
|
||
|
|
||
|
int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v -> y|y|y|y|u|v
|
||
|
float *net_input_buf = get_buffer(vec_net_input_buf, yuv_buf_len);
|
||
|
|
||
|
float avg = 0.0;
|
||
|
for (int i = 0; i < 20; i++) {
|
||
|
//__builtin___clear_cache((char*)resized_buf, (char*)resized_buf + (resized_width*resized_height*3/2));
|
||
|
//__builtin___clear_cache((char*)net_input_buf, (char*)net_input_buf + yuv_buf_len);
|
||
|
|
||
|
double s4 = millis_since_boot();
|
||
|
inner(resized_buf, net_input_buf);
|
||
|
double s5 = millis_since_boot();
|
||
|
avg += s5-s4;
|
||
|
}
|
||
|
|
||
|
avg /= 20;
|
||
|
if (avg > 5) {
|
||
|
printf("HIT %f\n", avg);
|
||
|
printf("BAD\n");
|
||
|
|
||
|
for (int i = 0; i < 200; i++) {
|
||
|
//__builtin___clear_cache((char*)resized_buf, (char*)resized_buf + (resized_width*resized_height*3/2));
|
||
|
//__builtin___clear_cache((char*)net_input_buf, (char*)net_input_buf + yuv_buf_len);
|
||
|
|
||
|
double s4 = millis_since_boot();
|
||
|
inner(resized_buf, net_input_buf);
|
||
|
double s5 = millis_since_boot();
|
||
|
printf("%.2f ", s5-s4);
|
||
|
}
|
||
|
printf("\n");
|
||
|
|
||
|
exit(0);
|
||
|
}
|
||
|
return avg;
|
||
|
}
|
||
|
|
||
|
int main() {
|
||
|
set_realtime_priority(51);
|
||
|
|
||
|
while (1) {
|
||
|
float ret = trial();
|
||
|
printf("got %f\n", ret);
|
||
|
}
|
||
|
}
|
||
|
|