diff --git a/selfdrive/modeld/models/dmonitoring.cc b/selfdrive/modeld/models/dmonitoring.cc index e7e6d46612..2903d5807d 100644 --- a/selfdrive/modeld/models/dmonitoring.cc +++ b/selfdrive/modeld/models/dmonitoring.cc @@ -22,12 +22,13 @@ static inline T *get_buffer(std::vector &buf, const size_t size) { void dmonitoring_init(DMonitoringModelState* s) { #ifdef USE_ONNX_MODEL - s->m = new ONNXModel("models/dmonitoring_model.onnx", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, false, true); + s->m = new ONNXModel("models/dmonitoring_model.onnx", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, true); #else - s->m = new SNPEModel("models/dmonitoring_model_q.dlc", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, false, true); + s->m = new SNPEModel("models/dmonitoring_model_q.dlc", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, true); #endif - s->m->addCalib(s->calib, CALIB_LEN); + s->m->addInput("input_imgs", NULL, 0); + s->m->addInput("calib", s->calib, CALIB_LEN); } void parse_driver_data(DriverStateResult &ds_res, const DMonitoringModelState* s, int out_idx_offset) { @@ -92,7 +93,7 @@ DMonitoringModelResult dmonitoring_eval_frame(DMonitoringModelState* s, void* st // fclose(dump_yuv_file); double t1 = millis_since_boot(); - s->m->addImage((float*)net_input_buf, yuv_buf_len / 4); + s->m->setInputBuffer("input_imgs", (float*)net_input_buf, yuv_buf_len / 4); for (int i = 0; i < CALIB_LEN; i++) { s->calib[i] = calib[i]; } diff --git a/selfdrive/modeld/models/driving.cc b/selfdrive/modeld/models/driving.cc index 087a4acfa6..01f8f7234c 100644 --- a/selfdrive/modeld/models/driving.cc +++ b/selfdrive/modeld/models/driving.cc @@ -33,26 +33,30 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) { #else s->m = std::make_unique("models/supercombo.dlc", #endif - &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, true, false, context); + &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, false, context); -#ifdef TEMPORAL - s->m->addRecurrent(&s->feature_buffer[0], TEMPORAL_SIZE); -#endif + s->m->addInput("input_imgs", NULL, 0); + s->m->addInput("big_input_imgs", NULL, 0); + // TODO: the input is important here, still need to fix this #ifdef DESIRE - s->m->addDesire(s->pulse_desire, DESIRE_LEN*(HISTORY_BUFFER_LEN+1)); + s->m->addInput("desire_pulse", s->pulse_desire, DESIRE_LEN*(HISTORY_BUFFER_LEN+1)); #endif #ifdef TRAFFIC_CONVENTION - s->m->addTrafficConvention(s->traffic_convention, TRAFFIC_CONVENTION_LEN); + s->m->addInput("traffic_convention", s->traffic_convention, TRAFFIC_CONVENTION_LEN); #endif #ifdef DRIVING_STYLE - s->m->addDrivingStyle(s->driving_style, DRIVING_STYLE_LEN); + s->m->addInput("driving_style", s->driving_style, DRIVING_STYLE_LEN); #endif #ifdef NAV - s->m->addNavFeatures(s->nav_features, NAV_FEATURE_LEN); + s->m->addInput("nav_features", s->nav_features, NAV_FEATURE_LEN); +#endif + +#ifdef TEMPORAL + s->m->addInput("feature_buffer", &s->feature_buffer[0], TEMPORAL_SIZE); #endif } @@ -89,13 +93,13 @@ LOGT("Desire enqueued"); s->traffic_convention[1-rhd_idx] = 0.0; // if getInputBuf is not NULL, net_input_buf will be - auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, buf->stride, buf->uv_offset, transform, static_cast(s->m->getInputBuf())); - s->m->addImage(net_input_buf, s->frame->buf_size); + auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, buf->stride, buf->uv_offset, transform, static_cast(s->m->getCLBuffer("input_imgs"))); + s->m->setInputBuffer("input_imgs", net_input_buf, s->frame->buf_size); LOGT("Image added"); if (wbuf != nullptr) { - auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, wbuf->stride, wbuf->uv_offset, transform_wide, static_cast(s->m->getExtraBuf())); - s->m->addExtra(net_extra_buf, s->wide_frame->buf_size); + auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, wbuf->stride, wbuf->uv_offset, transform_wide, static_cast(s->m->getCLBuffer("big_input_imgs"))); + s->m->setInputBuffer("big_input_imgs", net_extra_buf, s->wide_frame->buf_size); LOGT("Extra image added"); } diff --git a/selfdrive/modeld/models/nav.cc b/selfdrive/modeld/models/nav.cc index 861795e172..48d354ae90 100644 --- a/selfdrive/modeld/models/nav.cc +++ b/selfdrive/modeld/models/nav.cc @@ -10,17 +10,19 @@ void navmodel_init(NavModelState* s) { #ifdef USE_ONNX_MODEL - s->m = new ONNXModel("models/navmodel.onnx", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, false, true); + s->m = new ONNXModel("models/navmodel.onnx", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, true); #else - s->m = new SNPEModel("models/navmodel_q.dlc", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, false, true); + s->m = new SNPEModel("models/navmodel_q.dlc", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, true); #endif + + s->m->addInput("map", NULL, 0); } NavModelResult* navmodel_eval_frame(NavModelState* s, VisionBuf* buf) { memcpy(s->net_input_buf, buf->addr, NAV_INPUT_SIZE); double t1 = millis_since_boot(); - s->m->addImage((float*)s->net_input_buf, NAV_INPUT_SIZE/sizeof(float)); + s->m->setInputBuffer("map", (float*)s->net_input_buf, NAV_INPUT_SIZE/sizeof(float)); s->m->execute(); double t2 = millis_since_boot(); diff --git a/selfdrive/modeld/runners/onnxmodel.cc b/selfdrive/modeld/runners/onnxmodel.cc index a5a83bccdc..e9a5d09406 100644 --- a/selfdrive/modeld/runners/onnxmodel.cc +++ b/selfdrive/modeld/runners/onnxmodel.cc @@ -1,25 +1,18 @@ #include "selfdrive/modeld/runners/onnxmodel.h" -#include -#include - -#include #include #include #include -#include -#include -#include +#include +#include -#include "common/swaglog.h" #include "common/util.h" -ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int runtime, bool _use_extra, bool _use_tf8, cl_context context) { - LOGD("loading model %s", path); +ONNXModel::ONNXModel(const std::string path, float *_output, size_t _output_size, int runtime, bool _use_tf8, cl_context context) { + LOGD("loading model %s", path.c_str()); output = _output; output_size = _output_size; - use_extra = _use_extra; use_tf8 = _use_tf8; int err = pipe(pipein); @@ -34,7 +27,7 @@ ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int proc_pid = fork(); if (proc_pid == 0) { LOGD("spawning onnx process %s", onnx_runner.c_str()); - char *argv[] = {(char*)onnx_runner.c_str(), (char*)path, (char*)tf8_arg.c_str(), nullptr}; + char *argv[] = {(char*)onnx_runner.c_str(), (char*)path.c_str(), (char*)tf8_arg.c_str(), nullptr}; dup2(pipein[0], 0); dup2(pipeout[1], 1); close(pipein[0]); @@ -87,72 +80,9 @@ void ONNXModel::pread(float *buf, int size) { LOGD("host read done"); } -void ONNXModel::addRecurrent(float *state, int state_size) { - rnn_input_buf = state; - rnn_state_size = state_size; -} - -void ONNXModel::addDesire(float *state, int state_size) { - desire_input_buf = state; - desire_state_size = state_size; -} - -void ONNXModel::addNavFeatures(float *state, int state_size) { - nav_features_input_buf = state; - nav_features_size = state_size; -} - -void ONNXModel::addDrivingStyle(float *state, int state_size) { - driving_style_input_buf = state; - driving_style_size = state_size; -} - -void ONNXModel::addTrafficConvention(float *state, int state_size) { - traffic_convention_input_buf = state; - traffic_convention_size = state_size; -} - -void ONNXModel::addCalib(float *state, int state_size) { - calib_input_buf = state; - calib_size = state_size; -} - -void ONNXModel::addImage(float *image_buf, int buf_size) { - image_input_buf = image_buf; - image_buf_size = buf_size; -} - -void ONNXModel::addExtra(float *image_buf, int buf_size) { - extra_input_buf = image_buf; - extra_buf_size = buf_size; -} - void ONNXModel::execute() { - // order must be this - if (image_input_buf != NULL) { - pwrite(image_input_buf, image_buf_size); - } - if (extra_input_buf != NULL) { - pwrite(extra_input_buf, extra_buf_size); - } - if (desire_input_buf != NULL) { - pwrite(desire_input_buf, desire_state_size); - } - if (traffic_convention_input_buf != NULL) { - pwrite(traffic_convention_input_buf, traffic_convention_size); - } - if (driving_style_input_buf != NULL) { - pwrite(driving_style_input_buf, driving_style_size); - } - if (nav_features_input_buf != NULL) { - pwrite(nav_features_input_buf, nav_features_size); - } - if (calib_input_buf != NULL) { - pwrite(calib_input_buf, calib_size); - } - if (rnn_input_buf != NULL) { - pwrite(rnn_input_buf, rnn_state_size); + for (auto &input : inputs) { + pwrite(input->buffer, input->size); } pread(output, output_size); } - diff --git a/selfdrive/modeld/runners/onnxmodel.h b/selfdrive/modeld/runners/onnxmodel.h index 9990bf1b45..6c325f644e 100644 --- a/selfdrive/modeld/runners/onnxmodel.h +++ b/selfdrive/modeld/runners/onnxmodel.h @@ -1,51 +1,21 @@ #pragma once -#include - #include "selfdrive/modeld/runners/runmodel.h" class ONNXModel : public RunModel { public: - ONNXModel(const char *path, float *output, size_t output_size, int runtime, bool use_extra = false, bool _use_tf8 = false, cl_context context = NULL); + ONNXModel(const std::string path, float *output, size_t output_size, int runtime, bool _use_tf8 = false, cl_context context = NULL); ~ONNXModel(); - void addRecurrent(float *state, int state_size); - void addDesire(float *state, int state_size); - void addNavFeatures(float *state, int state_size); - void addDrivingStyle(float *state, int state_size); - void addTrafficConvention(float *state, int state_size); - void addCalib(float *state, int state_size); - void addImage(float *image_buf, int buf_size); - void addExtra(float *image_buf, int buf_size); void execute(); private: int proc_pid; - float *output; size_t output_size; - - float *rnn_input_buf = NULL; - int rnn_state_size; - float *desire_input_buf = NULL; - int desire_state_size; - float *nav_features_input_buf = NULL; - int nav_features_size; - float *driving_style_input_buf = NULL; - int driving_style_size; - float *traffic_convention_input_buf = NULL; - int traffic_convention_size; - float *calib_input_buf = NULL; - int calib_size; - float *image_input_buf = NULL; - int image_buf_size; bool use_tf8; - float *extra_input_buf = NULL; - int extra_buf_size; - bool use_extra; - // pipe to communicate to keras subprocess + // pipe to communicate to onnx_runner subprocess void pread(float *buf, int size); void pwrite(float *buf, int size); int pipein[2]; int pipeout[2]; }; - diff --git a/selfdrive/modeld/runners/runmodel.h b/selfdrive/modeld/runners/runmodel.h index 673ddb50b5..00c88131bf 100644 --- a/selfdrive/modeld/runners/runmodel.h +++ b/selfdrive/modeld/runners/runmodel.h @@ -1,18 +1,45 @@ #pragma once + +#include +#include +#include +#include + #include "common/clutil.h" +#include "common/swaglog.h" + +struct ModelInput { + const std::string name; + float *buffer; + int size; + + ModelInput(const std::string _name, float *_buffer, int _size) : name(_name), buffer(_buffer), size(_size) {} + virtual void setBuffer(float *_buffer, int _size) { + assert(size == _size || size == 0); + buffer = _buffer; + size = _size; + } +}; + class RunModel { public: + std::vector> inputs; + virtual ~RunModel() {} - virtual void addRecurrent(float *state, int state_size) {} - virtual void addDesire(float *state, int state_size) {} - virtual void addNavFeatures(float *state, int state_size) {} - virtual void addDrivingStyle(float *state, int state_size) {} - virtual void addTrafficConvention(float *state, int state_size) {} - virtual void addCalib(float *state, int state_size) {} - virtual void addImage(float *image_buf, int buf_size) {} - virtual void addExtra(float *image_buf, int buf_size) {} virtual void execute() {} - virtual void* getInputBuf() { return nullptr; } - virtual void* getExtraBuf() { return nullptr; } -}; + virtual void* getCLBuffer(const std::string name) { return nullptr; } + virtual void addInput(const std::string name, float *buffer, int size) { + inputs.push_back(std::unique_ptr(new ModelInput(name, buffer, size))); + } + virtual void setInputBuffer(const std::string name, float *buffer, int size) { + for (auto &input : inputs) { + if (name == input->name) { + input->setBuffer(buffer, size); + return; + } + } + LOGE("Tried to update input `%s` but no input with this name exists", name.c_str()); + assert(false); + } +}; diff --git a/selfdrive/modeld/runners/snpemodel.cc b/selfdrive/modeld/runners/snpemodel.cc index 609a7a6657..aa5ee3bb0e 100644 --- a/selfdrive/modeld/runners/snpemodel.cc +++ b/selfdrive/modeld/runners/snpemodel.cc @@ -2,8 +2,6 @@ #include "selfdrive/modeld/runners/snpemodel.h" -#include -#include #include #include "common/util.h" @@ -14,20 +12,20 @@ void PrintErrorStringAndExit() { std::exit(EXIT_FAILURE); } -SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra, bool luse_tf8, cl_context context) { - output = loutput; - output_size = loutput_size; - use_extra = luse_extra; - use_tf8 = luse_tf8; +SNPEModel::SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool _use_tf8, cl_context context) { + output = _output; + output_size = _output_size; + use_tf8 = _use_tf8; + #ifdef QCOM2 - if (runtime==USE_GPU_RUNTIME) { - Runtime = zdl::DlSystem::Runtime_t::GPU; - } else if (runtime==USE_DSP_RUNTIME) { - Runtime = zdl::DlSystem::Runtime_t::DSP; + if (runtime == USE_GPU_RUNTIME) { + snpe_runtime = zdl::DlSystem::Runtime_t::GPU; + } else if (runtime == USE_DSP_RUNTIME) { + snpe_runtime = zdl::DlSystem::Runtime_t::DSP; } else { - Runtime = zdl::DlSystem::Runtime_t::CPU; + snpe_runtime = zdl::DlSystem::Runtime_t::CPU; } - assert(zdl::SNPE::SNPEFactory::isRuntimeAvailable(Runtime)); + assert(zdl::SNPE::SNPEFactory::isRuntimeAvailable(snpe_runtime)); #endif model_data = util::read_file(path); assert(model_data.size() > 0); @@ -38,172 +36,83 @@ SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int printf("loaded model with size: %lu\n", model_data.size()); // create model runner - zdl::SNPE::SNPEBuilder snpeBuilder(container.get()); + zdl::SNPE::SNPEBuilder snpe_builder(container.get()); while (!snpe) { #ifdef QCOM2 - snpe = snpeBuilder.setOutputLayers({}) - .setRuntimeProcessor(Runtime) - .setUseUserSuppliedBuffers(true) - .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE) - .build(); + snpe = snpe_builder.setOutputLayers({}) + .setRuntimeProcessor(snpe_runtime) + .setUseUserSuppliedBuffers(true) + .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE) + .build(); #else - snpe = snpeBuilder.setOutputLayers({}) - .setUseUserSuppliedBuffers(true) - .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE) - .build(); + snpe = snpe_builder.setOutputLayers({}) + .setUseUserSuppliedBuffers(true) + .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE) + .build(); #endif if (!snpe) std::cerr << zdl::DlSystem::getLastErrorString() << std::endl; } - // get input and output names - const auto &strListi_opt = snpe->getInputTensorNames(); - if (!strListi_opt) throw std::runtime_error("Error obtaining Input tensor names"); - const auto &strListi = *strListi_opt; - //assert(strListi.size() == 1); - const char *input_tensor_name = strListi.at(0); - - const auto &strListo_opt = snpe->getOutputTensorNames(); - if (!strListo_opt) throw std::runtime_error("Error obtaining Output tensor names"); - const auto &strListo = *strListo_opt; - assert(strListo.size() == 1); - const char *output_tensor_name = strListo.at(0); - - printf("model: %s -> %s\n", input_tensor_name, output_tensor_name); - - zdl::DlSystem::UserBufferEncodingFloat userBufferEncodingFloat; - zdl::DlSystem::UserBufferEncodingTf8 userBufferEncodingTf8(0, 1./255); // network takes 0-1 - zdl::DlSystem::IUserBufferFactory& ubFactory = zdl::SNPE::SNPEFactory::getUserBufferFactory(); - size_t size_of_input = use_tf8 ? sizeof(uint8_t) : sizeof(float); - - // create input buffer - { - const auto &inputDims_opt = snpe->getInputDimensions(input_tensor_name); - const zdl::DlSystem::TensorShape& bufferShape = *inputDims_opt; - std::vector strides(bufferShape.rank()); - strides[strides.size() - 1] = size_of_input; - size_t product = 1; - for (size_t i = 0; i < bufferShape.rank(); i++) product *= bufferShape[i]; - size_t stride = strides[strides.size() - 1]; - for (size_t i = bufferShape.rank() - 1; i > 0; i--) { - stride *= bufferShape[i]; - strides[i-1] = stride; - } - printf("input product is %lu\n", product); - inputBuffer = ubFactory.createUserBuffer(NULL, - product*size_of_input, - strides, - use_tf8 ? (zdl::DlSystem::UserBufferEncoding*)&userBufferEncodingTf8 : (zdl::DlSystem::UserBufferEncoding*)&userBufferEncodingFloat); - - inputMap.add(input_tensor_name, inputBuffer.get()); - } - - if (use_extra) { - const char *extra_tensor_name = strListi.at(1); - const auto &extraDims_opt = snpe->getInputDimensions(extra_tensor_name); - const zdl::DlSystem::TensorShape& bufferShape = *extraDims_opt; - std::vector strides(bufferShape.rank()); - strides[strides.size() - 1] = sizeof(float); - size_t product = 1; - for (size_t i = 0; i < bufferShape.rank(); i++) product *= bufferShape[i]; - size_t stride = strides[strides.size() - 1]; - for (size_t i = bufferShape.rank() - 1; i > 0; i--) { - stride *= bufferShape[i]; - strides[i-1] = stride; - } - printf("extra product is %lu\n", product); - extraBuffer = ubFactory.createUserBuffer(NULL, product*sizeof(float), strides, &userBufferEncodingFloat); - - inputMap.add(extra_tensor_name, extraBuffer.get()); - } - // create output buffer - { - const zdl::DlSystem::TensorShape& bufferShape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims(); - if (output_size != 0) { - assert(output_size == bufferShape[1]); - } else { - output_size = bufferShape[1]; - } - - std::vector outputStrides = {output_size * sizeof(float), sizeof(float)}; - outputBuffer = ubFactory.createUserBuffer(output, output_size * sizeof(float), outputStrides, &userBufferEncodingFloat); - outputMap.add(output_tensor_name, outputBuffer.get()); + zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float; + zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory(); + + const auto &output_tensor_names_opt = snpe->getOutputTensorNames(); + if (!output_tensor_names_opt) throw std::runtime_error("Error obtaining output tensor names"); + const auto &output_tensor_names = *output_tensor_names_opt; + assert(output_tensor_names.size() == 1); + const char *output_tensor_name = output_tensor_names.at(0); + const zdl::DlSystem::TensorShape &buffer_shape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims(); + if (output_size != 0) { + assert(output_size == buffer_shape[1]); + } else { + output_size = buffer_shape[1]; } + std::vector output_strides = {output_size * sizeof(float), sizeof(float)}; + output_buffer = ub_factory.createUserBuffer(output, output_size * sizeof(float), output_strides, &ub_encoding_float); + output_map.add(output_tensor_name, output_buffer.get()); #ifdef USE_THNEED - if (Runtime == zdl::DlSystem::Runtime_t::GPU) { + if (snpe_runtime == zdl::DlSystem::Runtime_t::GPU) { thneed.reset(new Thneed()); } #endif } -void SNPEModel::addRecurrent(float *state, int state_size) { - recurrent = state; - recurrent_size = state_size; - recurrentBuffer = this->addExtra(state, state_size, 3); -} - -void SNPEModel::addTrafficConvention(float *state, int state_size) { - trafficConvention = state; - trafficConventionBuffer = this->addExtra(state, state_size, 2); -} - -void SNPEModel::addDesire(float *state, int state_size) { - desire = state; - desireBuffer = this->addExtra(state, state_size, 1); -} - -void SNPEModel::addNavFeatures(float *state, int state_size) { - navFeatures = state; - navFeaturesBuffer = this->addExtra(state, state_size, 1); -} - -void SNPEModel::addDrivingStyle(float *state, int state_size) { - drivingStyle = state; - drivingStyleBuffer = this->addExtra(state, state_size, 2); -} - -void SNPEModel::addCalib(float *state, int state_size) { - calib = state; - calibBuffer = this->addExtra(state, state_size, 1); -} - -void SNPEModel::addImage(float *image_buf, int buf_size) { - input = image_buf; - input_size = buf_size; -} - -void SNPEModel::addExtra(float *image_buf, int buf_size) { - extra = image_buf; - extra_size = buf_size; -} +void SNPEModel::addInput(const std::string name, float *buffer, int size) { + const int idx = inputs.size(); + const auto &input_tensor_names_opt = snpe->getInputTensorNames(); + if (!input_tensor_names_opt) throw std::runtime_error("Error obtaining input tensor names"); + const auto &input_tensor_names = *input_tensor_names_opt; + const char *input_tensor_name = input_tensor_names.at(idx); + const bool input_tf8 = use_tf8 && strcmp(input_tensor_name, "input_img") == 0; // TODO: This is a terrible hack, get rid of this name check both here and in onnx_runner.py + printf("adding index %d: %s\n", idx, input_tensor_name); + + zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float; + zdl::DlSystem::UserBufferEncodingTf8 ub_encoding_tf8(0, 1./255); // network takes 0-1 + zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory(); + zdl::DlSystem::UserBufferEncoding *input_encoding = input_tf8 ? (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_tf8 : (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_float; + + const auto &buffer_shape_opt = snpe->getInputDimensions(input_tensor_name); + const zdl::DlSystem::TensorShape &buffer_shape = *buffer_shape_opt; + size_t size_of_input = input_tf8 ? sizeof(uint8_t) : sizeof(float); + std::vector strides(buffer_shape.rank()); + strides[strides.size() - 1] = size_of_input; + size_t product = 1; + for (size_t i = 0; i < buffer_shape.rank(); i++) product *= buffer_shape[i]; + size_t stride = strides[strides.size() - 1]; + for (size_t i = buffer_shape.rank() - 1; i > 0; i--) { + stride *= buffer_shape[i]; + strides[i-1] = stride; + } -std::unique_ptr SNPEModel::addExtra(float *state, int state_size, int idx) { - // get input and output names - const auto real_idx = idx + (use_extra ? 1 : 0); - const auto &strListi_opt = snpe->getInputTensorNames(); - if (!strListi_opt) throw std::runtime_error("Error obtaining Input tensor names"); - const auto &strListi = *strListi_opt; - const char *input_tensor_name = strListi.at(real_idx); - printf("adding index %d: %s\n", real_idx, input_tensor_name); - - zdl::DlSystem::UserBufferEncodingFloat userBufferEncodingFloat; - zdl::DlSystem::IUserBufferFactory& ubFactory = zdl::SNPE::SNPEFactory::getUserBufferFactory(); - std::vector retStrides = {state_size * sizeof(float), sizeof(float)}; - auto ret = ubFactory.createUserBuffer(state, state_size * sizeof(float), retStrides, &userBufferEncodingFloat); - inputMap.add(input_tensor_name, ret.get()); - return ret; + auto input_buffer = ub_factory.createUserBuffer(buffer, product*size_of_input, strides, input_encoding); + input_map.add(input_tensor_name, input_buffer.get()); + inputs.push_back(std::unique_ptr(new SNPEModelInput(name, buffer, size, std::move(input_buffer)))); } void SNPEModel::execute() { - bool ret = inputBuffer->setBufferAddress(input); - assert(ret == true); - if (use_extra) { - bool extra_ret = extraBuffer->setBufferAddress(extra); - assert(extra_ret == true); - } - if (!snpe->execute(inputMap, outputMap)) { + if (!snpe->execute(input_map, output_map)) { PrintErrorStringAndExit(); } } - diff --git a/selfdrive/modeld/runners/snpemodel.h b/selfdrive/modeld/runners/snpemodel.h index 0d84d1d482..e646e5225b 100644 --- a/selfdrive/modeld/runners/snpemodel.h +++ b/selfdrive/modeld/runners/snpemodel.h @@ -11,7 +11,7 @@ #include #include -#include "runmodel.h" +#include "selfdrive/modeld/runners/runmodel.h" #define USE_CPU_RUNTIME 0 #define USE_GPU_RUNTIME 1 @@ -21,17 +21,20 @@ #include "selfdrive/modeld/thneed/thneed.h" #endif +struct SNPEModelInput : public ModelInput { + std::unique_ptr snpe_buffer; + + SNPEModelInput(const std::string _name, float *_buffer, int _size, std::unique_ptr _snpe_buffer) : ModelInput(_name, _buffer, _size), snpe_buffer(std::move(_snpe_buffer)) {} + void setBuffer(float *_buffer, int _size) { + ModelInput::setBuffer(_buffer, _size); + assert(snpe_buffer->setBufferAddress(_buffer) == true); + } +}; + class SNPEModel : public RunModel { public: - SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false, bool use_tf8 = false, cl_context context = NULL); - void addRecurrent(float *state, int state_size); - void addTrafficConvention(float *state, int state_size); - void addCalib(float *state, int state_size); - void addDesire(float *state, int state_size); - void addDrivingStyle(float *state, int state_size); - void addNavFeatures(float *state, int state_size); - void addImage(float *image_buf, int buf_size); - void addExtra(float *image_buf, int buf_size); + SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL); + void addInput(const std::string name, float *buffer, int size); void execute(); #ifdef USE_THNEED @@ -43,44 +46,16 @@ private: std::string model_data; #ifdef QCOM2 - zdl::DlSystem::Runtime_t Runtime; + zdl::DlSystem::Runtime_t snpe_runtime; #endif // snpe model stuff std::unique_ptr snpe; + zdl::DlSystem::UserBufferMap input_map; + zdl::DlSystem::UserBufferMap output_map; + std::unique_ptr output_buffer; - // snpe input stuff - zdl::DlSystem::UserBufferMap inputMap; - std::unique_ptr inputBuffer; - float *input; - size_t input_size; bool use_tf8; - - // snpe output stuff - zdl::DlSystem::UserBufferMap outputMap; - std::unique_ptr outputBuffer; float *output; size_t output_size; - - // extra input stuff - std::unique_ptr extraBuffer; - float *extra; - size_t extra_size; - bool use_extra; - - // recurrent and desire - std::unique_ptr addExtra(float *state, int state_size, int idx); - float *recurrent; - size_t recurrent_size; - std::unique_ptr recurrentBuffer; - float *trafficConvention; - std::unique_ptr trafficConventionBuffer; - float *desire; - std::unique_ptr desireBuffer; - float *navFeatures; - std::unique_ptr navFeaturesBuffer; - float *drivingStyle; - std::unique_ptr drivingStyleBuffer; - float *calib; - std::unique_ptr calibBuffer; }; diff --git a/selfdrive/modeld/runners/thneedmodel.cc b/selfdrive/modeld/runners/thneedmodel.cc index 2ac3d64495..0f35c94800 100644 --- a/selfdrive/modeld/runners/thneedmodel.cc +++ b/selfdrive/modeld/runners/thneedmodel.cc @@ -1,78 +1,56 @@ #include "selfdrive/modeld/runners/thneedmodel.h" -#include +#include "common/swaglog.h" -ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra, bool luse_tf8, cl_context context) { +ThneedModel::ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool luse_tf8, cl_context context) { thneed = new Thneed(true, context); - thneed->load(path); + thneed->load(path.c_str()); thneed->clexec(); recorded = false; - output = loutput; - use_extra = luse_extra; + output = _output; } -void ThneedModel::addRecurrent(float *state, int state_size) { - recurrent = state; -} - -void ThneedModel::addTrafficConvention(float *state, int state_size) { - trafficConvention = state; -} - -void ThneedModel::addDesire(float *state, int state_size) { - desire = state; -} - -void ThneedModel::addDrivingStyle(float *state, int state_size) { - drivingStyle = state; -} - -void ThneedModel::addNavFeatures(float *state, int state_size) { - navFeatures = state; -} - -void ThneedModel::addImage(float *image_input_buf, int buf_size) { - input = image_input_buf; -} - -void ThneedModel::addExtra(float *extra_input_buf, int buf_size) { - extra = extra_input_buf; -} +void* ThneedModel::getCLBuffer(const std::string name) { + int index = -1; + for (int i = 0; i < inputs.size(); i++) { + if (name == inputs[i]->name) { + index = i; + break; + } + } -void* ThneedModel::getInputBuf() { - if (use_extra && thneed->input_clmem.size() > 5) return &(thneed->input_clmem[5]); - else if (!use_extra && thneed->input_clmem.size() > 4) return &(thneed->input_clmem[4]); - else return nullptr; -} + if (index == -1) { + LOGE("Tried to get CL buffer for input `%s` but no input with this name exists", name.c_str()); + assert(false); + } -void* ThneedModel::getExtraBuf() { - if (thneed->input_clmem.size() > 4) return &(thneed->input_clmem[4]); - else return nullptr; + if (thneed->input_clmem.size() >= inputs.size()) { + return &thneed->input_clmem[inputs.size() - index - 1]; + } else { + return nullptr; + } } void ThneedModel::execute() { if (!recorded) { thneed->record = true; - if (use_extra) { - float *inputs[6] = {recurrent, navFeatures, trafficConvention, desire, extra, input}; - thneed->copy_inputs(inputs); - } else { - float *inputs[5] = {recurrent, navFeatures, trafficConvention, desire, input}; - thneed->copy_inputs(inputs); + float *input_buffers[inputs.size()]; + for (int i = 0; i < inputs.size(); i++) { + input_buffers[inputs.size() - i - 1] = inputs[i]->buffer; } + + thneed->copy_inputs(input_buffers); thneed->clexec(); thneed->copy_output(output); thneed->stop(); recorded = true; } else { - if (use_extra) { - float *inputs[6] = {recurrent, navFeatures, trafficConvention, desire, extra, input}; - thneed->execute(inputs, output); - } else { - float *inputs[5] = {recurrent, navFeatures, trafficConvention, desire, input}; - thneed->execute(inputs, output); + float *input_buffers[inputs.size()]; + for (int i = 0; i < inputs.size(); i++) { + input_buffers[inputs.size() - i - 1] = inputs[i]->buffer; } + thneed->execute(input_buffers, output); } } diff --git a/selfdrive/modeld/runners/thneedmodel.h b/selfdrive/modeld/runners/thneedmodel.h index 63712f1d00..90c40239bf 100644 --- a/selfdrive/modeld/runners/thneedmodel.h +++ b/selfdrive/modeld/runners/thneedmodel.h @@ -5,31 +5,11 @@ class ThneedModel : public RunModel { public: - ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false, bool use_tf8 = false, cl_context context = NULL); - void addRecurrent(float *state, int state_size); - void addTrafficConvention(float *state, int state_size); - void addDesire(float *state, int state_size); - void addNavFeatures(float *state, int state_size); - void addDrivingStyle(float *state, int state_size); - void addImage(float *image_buf, int buf_size); - void addExtra(float *image_buf, int buf_size); + ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL); + void *getCLBuffer(const std::string name); void execute(); - void* getInputBuf(); - void* getExtraBuf(); private: Thneed *thneed = NULL; bool recorded; - bool use_extra; - - float *input; - float *extra; float *output; - - // recurrent and desire - float *recurrent; - float *trafficConvention; - float *drivingStyle; - float *desire; - float *navFeatures; }; -