From b445e5003397ad28e89c1aae0e3c90f334e9f5eb Mon Sep 17 00:00:00 2001 From: Comma Device Date: Thu, 21 Apr 2022 16:46:46 -0700 Subject: [PATCH] less cpu more dsp --- models/dmonitoring_model_q.dlc | 4 +- selfdrive/modeld/SConscript | 2 +- selfdrive/modeld/models/dmonitoring.cc | 83 +++++++------------------- selfdrive/modeld/models/dmonitoring.h | 4 +- 4 files changed, 25 insertions(+), 68 deletions(-) diff --git a/models/dmonitoring_model_q.dlc b/models/dmonitoring_model_q.dlc index 5b1269265f..385d012436 100644 --- a/models/dmonitoring_model_q.dlc +++ b/models/dmonitoring_model_q.dlc @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6159d44f43b69f4e4884e8ff9b3a0fe8d0f5f076416b34704ed91e99a48e36a -size 3474928 +oid sha256:95a69358bc59b32a9b6c437c87c88d1142ef82828ce6811719193076da8a835c +size 3334269 diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index 20d3fb8acc..f7ad6d1192 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -32,7 +32,7 @@ thneed_src = [ use_thneed = not GetOption('no_thneed') if arch == "aarch64" or arch == "larch64": - libs += ['gsl', 'CB'] + libs += ['gsl', 'CB', 'jpeg'] libs += ['gnustl_shared'] if arch == "aarch64" else ['pthread', 'dl'] if use_thneed: diff --git a/selfdrive/modeld/models/dmonitoring.cc b/selfdrive/modeld/models/dmonitoring.cc index 1afe68ab4d..406a16cc92 100644 --- a/selfdrive/modeld/models/dmonitoring.cc +++ b/selfdrive/modeld/models/dmonitoring.cc @@ -10,8 +10,8 @@ #include "selfdrive/modeld/models/dmonitoring.h" -constexpr int MODEL_WIDTH = 704; -constexpr int MODEL_HEIGHT = 448; +constexpr int MODEL_WIDTH = 1440; +constexpr int MODEL_HEIGHT = 960; template static inline T *get_buffer(std::vector &buf, const size_t size) { @@ -21,9 +21,6 @@ static inline T *get_buffer(std::vector &buf, const size_t size) { void dmonitoring_init(DMonitoringModelState* s) { s->is_rhd = Params().getBool("IsRHD"); - for (int x = 0; x < std::size(s->tensor); ++x) { - s->tensor[x] = (x - 128.f) * 0.0078125f; - } #ifdef USE_ONNX_MODEL s->m = new ONNXModel("../../models/dmonitoring_model.onnx", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME); @@ -42,65 +39,27 @@ static inline auto get_yuv_buf(std::vector &buf, const int width, int h } DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, float *calib) { - uint8_t *raw_y = (uint8_t *) stream_buf; - uint8_t *raw_u = raw_y + (width * height); - uint8_t *raw_v = raw_u + ((width / 2) * (height / 2)); - - int resized_width = MODEL_WIDTH; - int resized_height = MODEL_HEIGHT; - - auto [resized_y, resized_u, resized_v] = get_yuv_buf(s->resized_buf, resized_width, resized_height); - libyuv::FilterMode mode = libyuv::FilterModeEnum::kFilterLinear; - if (!s->is_rhd) { - libyuv::I420Scale(raw_y, width, - raw_u, width / 2, - raw_v, width / 2, - width, height, - resized_y, resized_width, - resized_u, resized_width / 2, - resized_v, resized_width / 2, - resized_width, resized_height, - mode); - } else { - auto [mirror_y, mirror_u, mirror_v] = get_yuv_buf(s->premirror_resized_buf, resized_width, resized_height); - libyuv::I420Scale(raw_y, width, - raw_u, width / 2, - raw_v, width / 2, - width, height, - mirror_y, resized_width, - mirror_u, resized_width / 2, - mirror_v, resized_width / 2, - resized_width, resized_height, - mode); - libyuv::I420Mirror(mirror_y, resized_width, - mirror_u, resized_width / 2, - mirror_v, resized_width / 2, - resized_y, resized_width, - resized_u, resized_width / 2, - resized_v, resized_width / 2, - resized_width, resized_height); - } + int v_off = height - MODEL_HEIGHT; + int h_off = (width - MODEL_WIDTH) / 2; + int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v, frame2tensor done in dsp - int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v -> y|y|y|y|u|v + uint8_t *raw_buf = (uint8_t *) stream_buf; + auto [cropped_y, cropped_u, cropped_v] = get_yuv_buf(s->cropped_buf, MODEL_WIDTH, MODEL_HEIGHT); float *net_input_buf = get_buffer(s->net_input_buf, yuv_buf_len); - // one shot conversion, O(n) anyway - // yuvframe2tensor, normalize - for (int r = 0; r < MODEL_HEIGHT/2; r++) { - for (int c = 0; c < MODEL_WIDTH/2; c++) { - // Y_ul - net_input_buf[(r*MODEL_WIDTH/2) + c + (0*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r)*resized_width + 2*c]]; - // Y_dl - net_input_buf[(r*MODEL_WIDTH/2) + c + (1*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r+1)*resized_width + 2*c]]; - // Y_ur - net_input_buf[(r*MODEL_WIDTH/2) + c + (2*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r)*resized_width + 2*c+1]]; - // Y_dr - net_input_buf[(r*MODEL_WIDTH/2) + c + (3*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r+1)*resized_width + 2*c+1]]; - // U - net_input_buf[(r*MODEL_WIDTH/2) + c + (4*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_u[r*resized_width/2 + c]]; - // V - net_input_buf[(r*MODEL_WIDTH/2) + c + (5*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_v[r*resized_width/2 + c]]; - } - } + + libyuv::ConvertToI420(raw_buf, (width/2)*(height/2)*6, + cropped_y, MODEL_WIDTH, + cropped_u, MODEL_WIDTH/2, + cropped_v, MODEL_WIDTH/2, + h_off, v_off, + width, height, + MODEL_WIDTH, MODEL_HEIGHT, + libyuv::kRotate0, + libyuv::FOURCC_I420); + + // snpe UserBufferEncodingUnsigned8Bit doesn't work + // fast float conversion instead, also scales to 0-1 + libyuv::ByteToFloat(cropped_y, net_input_buf, 0.003921569f, yuv_buf_len); // printf("preprocess completed. %d \n", yuv_buf_len); // FILE *dump_yuv_file = fopen("/tmp/rawdump.yuv", "wb"); diff --git a/selfdrive/modeld/models/dmonitoring.h b/selfdrive/modeld/models/dmonitoring.h index 72166e316f..eaabfbfd53 100644 --- a/selfdrive/modeld/models/dmonitoring.h +++ b/selfdrive/modeld/models/dmonitoring.h @@ -37,11 +37,9 @@ typedef struct DMonitoringModelState { RunModel *m; bool is_rhd; float output[OUTPUT_SIZE]; - std::vector resized_buf; - std::vector premirror_resized_buf; + std::vector cropped_buf; std::vector net_input_buf; float calib[CALIB_LEN]; - float tensor[UINT8_MAX + 1]; } DMonitoringModelState; void dmonitoring_init(DMonitoringModelState* s);