Refactor model runners (#28598)

* Started work on model runner refactor * Fixed some compile errors * everything compiles * Fixed bug in SNPEModel * updateInput -> setInputBuffer * I understand nothing * whoops lol * use std::string instead of char* * Move common logic into RunModel * formatting fix old-commit-hash: c9f00678af
2 years ago · 95051090a1
parent fdc8876745
commit 95051090a1
10 changed files with 190 additions and 414 deletions
--- a/selfdrive/modeld/models/dmonitoring.cc
+++ b/selfdrive/modeld/models/dmonitoring.cc
@ -22,12 +22,13 @@ static inline T *get_buffer(std::vector<T> &buf, const size_t size) {
 void dmonitoring_init(DMonitoringModelState* s) {
 #ifdef USE_ONNX_MODEL
-  s->m = new ONNXModel("models/dmonitoring_model.onnx", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, false, true);
+  s->m = new ONNXModel("models/dmonitoring_model.onnx", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, true);
 #else
-  s->m = new SNPEModel("models/dmonitoring_model_q.dlc", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, false, true);
+  s->m = new SNPEModel("models/dmonitoring_model_q.dlc", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, true);
 #endif
-  s->m->addCalib(s->calib, CALIB_LEN);
+  s->m->addInput("input_imgs", NULL, 0);
  s->m->addInput("calib", s->calib, CALIB_LEN);
 }
 void parse_driver_data(DriverStateResult &ds_res, const DMonitoringModelState* s, int out_idx_offset) {
@ -92,7 +93,7 @@ DMonitoringModelResult dmonitoring_eval_frame(DMonitoringModelState* s, void* st
  // fclose(dump_yuv_file);
  double t1 = millis_since_boot();
-  s->m->addImage((float*)net_input_buf, yuv_buf_len / 4);
+  s->m->setInputBuffer("input_imgs", (float*)net_input_buf, yuv_buf_len / 4);
  for (int i = 0; i < CALIB_LEN; i++) {
    s->calib[i] = calib[i];
  }
--- a/selfdrive/modeld/models/driving.cc
+++ b/selfdrive/modeld/models/driving.cc
@ -33,26 +33,30 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
 #else
  s->m = std::make_unique<SNPEModel>("models/supercombo.dlc",
 #endif
-   &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, true, false, context);
+   &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, false, context);
-#ifdef TEMPORAL
+  s->m->addInput("input_imgs", NULL, 0);
-  s->m->addRecurrent(&s->feature_buffer[0], TEMPORAL_SIZE);
+  s->m->addInput("big_input_imgs", NULL, 0);
 #endif
  // TODO: the input is important here, still need to fix this
 #ifdef DESIRE
-  s->m->addDesire(s->pulse_desire, DESIRE_LEN*(HISTORY_BUFFER_LEN+1));
+  s->m->addInput("desire_pulse", s->pulse_desire, DESIRE_LEN*(HISTORY_BUFFER_LEN+1));
 #endif
 #ifdef TRAFFIC_CONVENTION
-  s->m->addTrafficConvention(s->traffic_convention, TRAFFIC_CONVENTION_LEN);
+  s->m->addInput("traffic_convention", s->traffic_convention, TRAFFIC_CONVENTION_LEN);
 #endif
 #ifdef DRIVING_STYLE
-  s->m->addDrivingStyle(s->driving_style, DRIVING_STYLE_LEN);
+  s->m->addInput("driving_style", s->driving_style, DRIVING_STYLE_LEN);
 #endif
 #ifdef NAV
-  s->m->addNavFeatures(s->nav_features, NAV_FEATURE_LEN);
+  s->m->addInput("nav_features", s->nav_features, NAV_FEATURE_LEN);
 #endif
 #ifdef TEMPORAL
  s->m->addInput("feature_buffer", &s->feature_buffer[0], TEMPORAL_SIZE);
 #endif
 }
@ -89,13 +93,13 @@ LOGT("Desire enqueued");
  s->traffic_convention[1-rhd_idx] = 0.0;
  // if getInputBuf is not NULL, net_input_buf will be
-  auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, buf->stride, buf->uv_offset, transform, static_cast<cl_mem*>(s->m->getInputBuf()));
+  auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, buf->stride, buf->uv_offset, transform, static_cast<cl_mem*>(s->m->getCLBuffer("input_imgs")));
-  s->m->addImage(net_input_buf, s->frame->buf_size);
+  s->m->setInputBuffer("input_imgs", net_input_buf, s->frame->buf_size);
  LOGT("Image added");
  if (wbuf != nullptr) {
-    auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, wbuf->stride, wbuf->uv_offset, transform_wide, static_cast<cl_mem*>(s->m->getExtraBuf()));
+    auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, wbuf->stride, wbuf->uv_offset, transform_wide, static_cast<cl_mem*>(s->m->getCLBuffer("big_input_imgs")));
-    s->m->addExtra(net_extra_buf, s->wide_frame->buf_size);
+    s->m->setInputBuffer("big_input_imgs", net_extra_buf, s->wide_frame->buf_size);
    LOGT("Extra image added");
  }
--- a/selfdrive/modeld/models/nav.cc
+++ b/selfdrive/modeld/models/nav.cc
@ -10,17 +10,19 @@
 void navmodel_init(NavModelState* s) {
  #ifdef USE_ONNX_MODEL
-    s->m = new ONNXModel("models/navmodel.onnx", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, false, true);
+    s->m = new ONNXModel("models/navmodel.onnx", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, true);
  #else
-    s->m = new SNPEModel("models/navmodel_q.dlc", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, false, true);
+    s->m = new SNPEModel("models/navmodel_q.dlc", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, true);
  #endif
  s->m->addInput("map", NULL, 0);
 }
 NavModelResult* navmodel_eval_frame(NavModelState* s, VisionBuf* buf) {
  memcpy(s->net_input_buf, buf->addr, NAV_INPUT_SIZE);
  double t1 = millis_since_boot();
-  s->m->addImage((float*)s->net_input_buf, NAV_INPUT_SIZE/sizeof(float));
+  s->m->setInputBuffer("map", (float*)s->net_input_buf, NAV_INPUT_SIZE/sizeof(float));
  s->m->execute();
  double t2 = millis_since_boot();
--- a/selfdrive/modeld/runners/onnxmodel.cc
+++ b/selfdrive/modeld/runners/onnxmodel.cc
@ -1,25 +1,18 @@
 #include "selfdrive/modeld/runners/onnxmodel.h"
 #include <poll.h>
 #include <unistd.h>
 #include <cassert>
 #include <csignal>
 #include <cstdio>
 #include <cstdlib>
-#include <cstring>
+#include <poll.h>
-#include <stdexcept>
+#include <unistd.h>
 #include <string>
 #include "common/swaglog.h"
 #include "common/util.h"
-ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int runtime, bool _use_extra, bool _use_tf8, cl_context context) {
+ONNXModel::ONNXModel(const std::string path, float *_output, size_t _output_size, int runtime, bool _use_tf8, cl_context context) {
-  LOGD("loading model %s", path);
+  LOGD("loading model %s", path.c_str());
  output = _output;
  output_size = _output_size;
  use_extra = _use_extra;
  use_tf8 = _use_tf8;
  int err = pipe(pipein);
@ -34,7 +27,7 @@ ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int
  proc_pid = fork();
  if (proc_pid == 0) {
    LOGD("spawning onnx process %s", onnx_runner.c_str());
-    char *argv[] = {(char*)onnx_runner.c_str(), (char*)path, (char*)tf8_arg.c_str(), nullptr};
+    char *argv[] = {(char*)onnx_runner.c_str(), (char*)path.c_str(), (char*)tf8_arg.c_str(), nullptr};
    dup2(pipein[0], 0);
    dup2(pipeout[1], 1);
    close(pipein[0]);
@ -87,72 +80,9 @@ void ONNXModel::pread(float *buf, int size) {
  LOGD("host read done");
 }
 void ONNXModel::addRecurrent(float *state, int state_size) {
  rnn_input_buf = state;
  rnn_state_size = state_size;
 }
 void ONNXModel::addDesire(float *state, int state_size) {
  desire_input_buf = state;
  desire_state_size = state_size;
 }
 void ONNXModel::addNavFeatures(float *state, int state_size) {
  nav_features_input_buf = state;
  nav_features_size = state_size;
 }
 void ONNXModel::addDrivingStyle(float *state, int state_size) {
    driving_style_input_buf = state;
    driving_style_size = state_size;
 }
 void ONNXModel::addTrafficConvention(float *state, int state_size) {
  traffic_convention_input_buf = state;
  traffic_convention_size = state_size;
 }
 void ONNXModel::addCalib(float *state, int state_size) {
  calib_input_buf = state;
  calib_size = state_size;
 }
 void ONNXModel::addImage(float *image_buf, int buf_size) {
  image_input_buf = image_buf;
  image_buf_size = buf_size;
 }
 void ONNXModel::addExtra(float *image_buf, int buf_size) {
  extra_input_buf = image_buf;
  extra_buf_size = buf_size;
 }
 void ONNXModel::execute() {
-  // order must be this
+  for (auto &input : inputs) {
-  if (image_input_buf != NULL) {
+    pwrite(input->buffer, input->size);
    pwrite(image_input_buf, image_buf_size);
  }
  if (extra_input_buf != NULL) {
    pwrite(extra_input_buf, extra_buf_size);
  }
  if (desire_input_buf != NULL) {
    pwrite(desire_input_buf, desire_state_size);
  }
  if (traffic_convention_input_buf != NULL) {
    pwrite(traffic_convention_input_buf, traffic_convention_size);
  }
  if (driving_style_input_buf != NULL) {
    pwrite(driving_style_input_buf, driving_style_size);
  }
  if (nav_features_input_buf != NULL) {
    pwrite(nav_features_input_buf, nav_features_size);
  }
  if (calib_input_buf != NULL) {
    pwrite(calib_input_buf, calib_size);
  }
  if (rnn_input_buf != NULL) {
    pwrite(rnn_input_buf, rnn_state_size);
  }
  pread(output, output_size);
 }
--- a/selfdrive/modeld/runners/onnxmodel.h
+++ b/selfdrive/modeld/runners/onnxmodel.h
@ -1,51 +1,21 @@
 #pragma once
 #include <cstdlib>
 #include "selfdrive/modeld/runners/runmodel.h"
 class ONNXModel : public RunModel {
 public:
-  ONNXModel(const char *path, float *output, size_t output_size, int runtime, bool use_extra = false, bool _use_tf8 = false, cl_context context = NULL);
+  ONNXModel(const std::string path, float *output, size_t output_size, int runtime, bool _use_tf8 = false, cl_context context = NULL);
 	~ONNXModel();
  void addRecurrent(float *state, int state_size);
  void addDesire(float *state, int state_size);
  void addNavFeatures(float *state, int state_size);
  void addDrivingStyle(float *state, int state_size);
  void addTrafficConvention(float *state, int state_size);
  void addCalib(float *state, int state_size);
  void addImage(float *image_buf, int buf_size);
  void addExtra(float *image_buf, int buf_size);
  void execute();
 private:
  int proc_pid;
  float *output;
  size_t output_size;
  float *rnn_input_buf = NULL;
  int rnn_state_size;
  float *desire_input_buf = NULL;
  int desire_state_size;
  float *nav_features_input_buf = NULL;
  int nav_features_size;
  float *driving_style_input_buf = NULL;
  int driving_style_size;
  float *traffic_convention_input_buf = NULL;
  int traffic_convention_size;
  float *calib_input_buf = NULL;
  int calib_size;
  float *image_input_buf = NULL;
  int image_buf_size;
  bool use_tf8;
  float *extra_input_buf = NULL;
  int extra_buf_size;
  bool use_extra;
-  // pipe to communicate to keras subprocess
+  // pipe to communicate to onnx_runner subprocess
  void pread(float *buf, int size);
  void pwrite(float *buf, int size);
  int pipein[2];
  int pipeout[2];
 };
--- a/selfdrive/modeld/runners/runmodel.h
+++ b/selfdrive/modeld/runners/runmodel.h
@ -1,18 +1,45 @@
 #pragma once
 #include <string>
 #include <vector>
 #include <memory>
 #include <cassert>
 #include "common/clutil.h"
 #include "common/swaglog.h"
 struct ModelInput {
  const std::string name;
  float *buffer;
  int size;
  ModelInput(const std::string _name, float *_buffer, int _size) : name(_name), buffer(_buffer), size(_size) {}
  virtual void setBuffer(float *_buffer, int _size) {
    assert(size == _size || size == 0);
    buffer = _buffer;
    size = _size;
  }
 };
 class RunModel {
 public:
  std::vector<std::unique_ptr<ModelInput>> inputs;
  virtual ~RunModel() {}
  virtual void addRecurrent(float *state, int state_size) {}
  virtual void addDesire(float *state, int state_size) {}
  virtual void addNavFeatures(float *state, int state_size) {}
  virtual void addDrivingStyle(float *state, int state_size) {}
  virtual void addTrafficConvention(float *state, int state_size) {}
  virtual void addCalib(float *state, int state_size) {}
  virtual void addImage(float *image_buf, int buf_size) {}
  virtual void addExtra(float *image_buf, int buf_size) {}
  virtual void execute() {}
-  virtual void* getInputBuf() { return nullptr; }
+  virtual void* getCLBuffer(const std::string name) { return nullptr; }
  virtual void* getExtraBuf() { return nullptr; }
 };
  virtual void addInput(const std::string name, float *buffer, int size) {
    inputs.push_back(std::unique_ptr<ModelInput>(new ModelInput(name, buffer, size)));
  }
  virtual void setInputBuffer(const std::string name, float *buffer, int size) {
    for (auto &input : inputs) {
      if (name == input->name) {
        input->setBuffer(buffer, size);
        return;
      }
    }
    LOGE("Tried to update input `%s` but no input with this name exists", name.c_str());
    assert(false);
  }
 };
--- a/selfdrive/modeld/runners/snpemodel.cc
+++ b/selfdrive/modeld/runners/snpemodel.cc
@ -2,8 +2,6 @@
 #include "selfdrive/modeld/runners/snpemodel.h"
 #include <cassert>
 #include <cstdlib>
 #include <cstring>
 #include "common/util.h"
@ -14,20 +12,20 @@ void PrintErrorStringAndExit() {
  std::exit(EXIT_FAILURE);
 }
-SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra, bool luse_tf8, cl_context context) {
+SNPEModel::SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool _use_tf8, cl_context context) {
-  output = loutput;
+  output = _output;
-  output_size = loutput_size;
+  output_size = _output_size;
-  use_extra = luse_extra;
+  use_tf8 = _use_tf8;
-  use_tf8 = luse_tf8;
+
 #ifdef QCOM2
-  if (runtime==USE_GPU_RUNTIME) {
+  if (runtime == USE_GPU_RUNTIME) {
-    Runtime = zdl::DlSystem::Runtime_t::GPU;
+    snpe_runtime = zdl::DlSystem::Runtime_t::GPU;
-  } else if (runtime==USE_DSP_RUNTIME) {
+  } else if (runtime == USE_DSP_RUNTIME) {
-    Runtime = zdl::DlSystem::Runtime_t::DSP;
+    snpe_runtime = zdl::DlSystem::Runtime_t::DSP;
  } else {
-    Runtime = zdl::DlSystem::Runtime_t::CPU;
+    snpe_runtime = zdl::DlSystem::Runtime_t::CPU;
  }
-  assert(zdl::SNPE::SNPEFactory::isRuntimeAvailable(Runtime));
+  assert(zdl::SNPE::SNPEFactory::isRuntimeAvailable(snpe_runtime));
 #endif
  model_data = util::read_file(path);
  assert(model_data.size() > 0);
@ -38,172 +36,83 @@ SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int
  printf("loaded model with size: %lu\n", model_data.size());
  // create model runner
-  zdl::SNPE::SNPEBuilder snpeBuilder(container.get());
+  zdl::SNPE::SNPEBuilder snpe_builder(container.get());
  while (!snpe) {
 #ifdef QCOM2
-    snpe = snpeBuilder.setOutputLayers({})
+    snpe = snpe_builder.setOutputLayers({})
-                      .setRuntimeProcessor(Runtime)
+                       .setRuntimeProcessor(snpe_runtime)
-                      .setUseUserSuppliedBuffers(true)
+                       .setUseUserSuppliedBuffers(true)
-                      .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
+                       .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
-                      .build();
+                       .build();
 #else
-    snpe = snpeBuilder.setOutputLayers({})
+    snpe = snpe_builder.setOutputLayers({})
-                      .setUseUserSuppliedBuffers(true)
+                       .setUseUserSuppliedBuffers(true)
-                      .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
+                       .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
-                      .build();
+                       .build();
 #endif
    if (!snpe) std::cerr << zdl::DlSystem::getLastErrorString() << std::endl;
  }
  // get input and output names
  const auto &strListi_opt = snpe->getInputTensorNames();
  if (!strListi_opt) throw std::runtime_error("Error obtaining Input tensor names");
  const auto &strListi = *strListi_opt;
  //assert(strListi.size() == 1);
  const char *input_tensor_name = strListi.at(0);
  const auto &strListo_opt = snpe->getOutputTensorNames();
  if (!strListo_opt) throw std::runtime_error("Error obtaining Output tensor names");
  const auto &strListo = *strListo_opt;
  assert(strListo.size() == 1);
  const char *output_tensor_name = strListo.at(0);
  printf("model: %s -> %s\n", input_tensor_name, output_tensor_name);
  zdl::DlSystem::UserBufferEncodingFloat userBufferEncodingFloat;
  zdl::DlSystem::UserBufferEncodingTf8 userBufferEncodingTf8(0, 1./255); // network takes 0-1
  zdl::DlSystem::IUserBufferFactory& ubFactory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
  size_t size_of_input = use_tf8 ? sizeof(uint8_t) : sizeof(float);
  // create input buffer
  {
    const auto &inputDims_opt = snpe->getInputDimensions(input_tensor_name);
    const zdl::DlSystem::TensorShape& bufferShape = *inputDims_opt;
    std::vector<size_t> strides(bufferShape.rank());
    strides[strides.size() - 1] = size_of_input;
    size_t product = 1;
    for (size_t i = 0; i < bufferShape.rank(); i++) product *= bufferShape[i];
    size_t stride = strides[strides.size() - 1];
    for (size_t i = bufferShape.rank() - 1; i > 0; i--) {
      stride *= bufferShape[i];
      strides[i-1] = stride;
    }
    printf("input product is %lu\n", product);
    inputBuffer = ubFactory.createUserBuffer(NULL,
                                             product*size_of_input,
                                             strides,
                                             use_tf8 ? (zdl::DlSystem::UserBufferEncoding*)&userBufferEncodingTf8 : (zdl::DlSystem::UserBufferEncoding*)&userBufferEncodingFloat);
    inputMap.add(input_tensor_name, inputBuffer.get());
  }
  if (use_extra) {
    const char *extra_tensor_name = strListi.at(1);
    const auto &extraDims_opt = snpe->getInputDimensions(extra_tensor_name);
    const zdl::DlSystem::TensorShape& bufferShape = *extraDims_opt;
    std::vector<size_t> strides(bufferShape.rank());
    strides[strides.size() - 1] = sizeof(float);
    size_t product = 1;
    for (size_t i = 0; i < bufferShape.rank(); i++) product *= bufferShape[i];
    size_t stride = strides[strides.size() - 1];
    for (size_t i = bufferShape.rank() - 1; i > 0; i--) {
      stride *= bufferShape[i];
      strides[i-1] = stride;
    }
    printf("extra product is %lu\n", product);
    extraBuffer = ubFactory.createUserBuffer(NULL, product*sizeof(float), strides, &userBufferEncodingFloat);
    inputMap.add(extra_tensor_name, extraBuffer.get());
  }
  // create output buffer
-  {
+  zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
-    const zdl::DlSystem::TensorShape& bufferShape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims();
+  zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
-    if (output_size != 0) {
+
-      assert(output_size == bufferShape[1]);
+  const auto &output_tensor_names_opt = snpe->getOutputTensorNames();
-    } else {
+  if (!output_tensor_names_opt) throw std::runtime_error("Error obtaining output tensor names");
-      output_size = bufferShape[1];
+  const auto &output_tensor_names = *output_tensor_names_opt;
-    }
+  assert(output_tensor_names.size() == 1);
-
+  const char *output_tensor_name = output_tensor_names.at(0);
-    std::vector<size_t> outputStrides = {output_size * sizeof(float), sizeof(float)};
+  const zdl::DlSystem::TensorShape &buffer_shape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims();
-    outputBuffer = ubFactory.createUserBuffer(output, output_size * sizeof(float), outputStrides, &userBufferEncodingFloat);
+  if (output_size != 0) {
-    outputMap.add(output_tensor_name, outputBuffer.get());
+    assert(output_size == buffer_shape[1]);
  } else {
    output_size = buffer_shape[1];
  }
  std::vector<size_t> output_strides = {output_size * sizeof(float), sizeof(float)};
  output_buffer = ub_factory.createUserBuffer(output, output_size * sizeof(float), output_strides, &ub_encoding_float);
  output_map.add(output_tensor_name, output_buffer.get());
 #ifdef USE_THNEED
-  if (Runtime == zdl::DlSystem::Runtime_t::GPU) {
+  if (snpe_runtime == zdl::DlSystem::Runtime_t::GPU) {
    thneed.reset(new Thneed());
  }
 #endif
 }
-void SNPEModel::addRecurrent(float *state, int state_size) {
+void SNPEModel::addInput(const std::string name, float *buffer, int size) {
-  recurrent = state;
+  const int idx = inputs.size();
-  recurrent_size = state_size;
+  const auto &input_tensor_names_opt = snpe->getInputTensorNames();
-  recurrentBuffer = this->addExtra(state, state_size, 3);
+  if (!input_tensor_names_opt) throw std::runtime_error("Error obtaining input tensor names");
-}
+  const auto &input_tensor_names = *input_tensor_names_opt;
-
+  const char *input_tensor_name = input_tensor_names.at(idx);
-void SNPEModel::addTrafficConvention(float *state, int state_size) {
+  const bool input_tf8 = use_tf8 && strcmp(input_tensor_name, "input_img") == 0;  // TODO: This is a terrible hack, get rid of this name check both here and in onnx_runner.py
-  trafficConvention = state;
+  printf("adding index %d: %s\n", idx, input_tensor_name);
-  trafficConventionBuffer = this->addExtra(state, state_size, 2);
+
-}
+  zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
-
+  zdl::DlSystem::UserBufferEncodingTf8 ub_encoding_tf8(0, 1./255); // network takes 0-1
-void SNPEModel::addDesire(float *state, int state_size) {
+  zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
-  desire = state;
+  zdl::DlSystem::UserBufferEncoding *input_encoding = input_tf8 ? (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_tf8 : (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_float;
-  desireBuffer = this->addExtra(state, state_size, 1);
+
-}
+  const auto &buffer_shape_opt = snpe->getInputDimensions(input_tensor_name);
-
+  const zdl::DlSystem::TensorShape &buffer_shape = *buffer_shape_opt;
-void SNPEModel::addNavFeatures(float *state, int state_size) {
+  size_t size_of_input = input_tf8 ? sizeof(uint8_t) : sizeof(float);
-  navFeatures = state;
+  std::vector<size_t> strides(buffer_shape.rank());
-  navFeaturesBuffer = this->addExtra(state, state_size, 1);
+  strides[strides.size() - 1] = size_of_input;
-}
+  size_t product = 1;
-
+  for (size_t i = 0; i < buffer_shape.rank(); i++) product *= buffer_shape[i];
-void SNPEModel::addDrivingStyle(float *state, int state_size) {
+  size_t stride = strides[strides.size() - 1];
-    drivingStyle = state;
+  for (size_t i = buffer_shape.rank() - 1; i > 0; i--) {
-    drivingStyleBuffer = this->addExtra(state, state_size, 2);
+    stride *= buffer_shape[i];
-}
+    strides[i-1] = stride;
-
+  }
 void SNPEModel::addCalib(float *state, int state_size) {
  calib = state;
  calibBuffer = this->addExtra(state, state_size, 1);
 }
 void SNPEModel::addImage(float *image_buf, int buf_size) {
  input = image_buf;
  input_size = buf_size;
 }
 void SNPEModel::addExtra(float *image_buf, int buf_size) {
  extra = image_buf;
  extra_size = buf_size;
 }
-std::unique_ptr<zdl::DlSystem::IUserBuffer> SNPEModel::addExtra(float *state, int state_size, int idx) {
+  auto input_buffer = ub_factory.createUserBuffer(buffer, product*size_of_input, strides, input_encoding);
-  // get input and output names
+  input_map.add(input_tensor_name, input_buffer.get());
-  const auto real_idx = idx + (use_extra ? 1 : 0);
+  inputs.push_back(std::unique_ptr<SNPEModelInput>(new SNPEModelInput(name, buffer, size, std::move(input_buffer))));
  const auto &strListi_opt = snpe->getInputTensorNames();
  if (!strListi_opt) throw std::runtime_error("Error obtaining Input tensor names");
  const auto &strListi = *strListi_opt;
  const char *input_tensor_name = strListi.at(real_idx);
  printf("adding index %d: %s\n", real_idx, input_tensor_name);
  zdl::DlSystem::UserBufferEncodingFloat userBufferEncodingFloat;
  zdl::DlSystem::IUserBufferFactory& ubFactory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
  std::vector<size_t> retStrides = {state_size * sizeof(float), sizeof(float)};
  auto ret = ubFactory.createUserBuffer(state, state_size * sizeof(float), retStrides, &userBufferEncodingFloat);
  inputMap.add(input_tensor_name, ret.get());
  return ret;
 }
 void SNPEModel::execute() {
-  bool ret = inputBuffer->setBufferAddress(input);
+  if (!snpe->execute(input_map, output_map)) {
  assert(ret == true);
  if (use_extra) {
    bool extra_ret = extraBuffer->setBufferAddress(extra);
    assert(extra_ret == true);
  }
  if (!snpe->execute(inputMap, outputMap)) {
    PrintErrorStringAndExit();
  }
 }
--- a/selfdrive/modeld/runners/snpemodel.h
+++ b/selfdrive/modeld/runners/snpemodel.h
@ -11,7 +11,7 @@
 #include <SNPE/SNPEBuilder.hpp>
 #include <SNPE/SNPEFactory.hpp>
-#include "runmodel.h"
+#include "selfdrive/modeld/runners/runmodel.h"
 #define USE_CPU_RUNTIME 0
 #define USE_GPU_RUNTIME 1
@ -21,17 +21,20 @@
 #include "selfdrive/modeld/thneed/thneed.h"
 #endif
 struct SNPEModelInput : public ModelInput {
  std::unique_ptr<zdl::DlSystem::IUserBuffer> snpe_buffer;
  SNPEModelInput(const std::string _name, float *_buffer, int _size, std::unique_ptr<zdl::DlSystem::IUserBuffer> _snpe_buffer) : ModelInput(_name, _buffer, _size), snpe_buffer(std::move(_snpe_buffer)) {}
  void setBuffer(float *_buffer, int _size) {
    ModelInput::setBuffer(_buffer, _size);
    assert(snpe_buffer->setBufferAddress(_buffer) == true);
  }
 };
 class SNPEModel : public RunModel {
 public:
-  SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false, bool use_tf8 = false, cl_context context = NULL);
+  SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
-  void addRecurrent(float *state, int state_size);
+  void addInput(const std::string name, float *buffer, int size);
  void addTrafficConvention(float *state, int state_size);
  void addCalib(float *state, int state_size);
  void addDesire(float *state, int state_size);
  void addDrivingStyle(float *state, int state_size);
  void addNavFeatures(float *state, int state_size);
  void addImage(float *image_buf, int buf_size);
  void addExtra(float *image_buf, int buf_size);
  void execute();
 #ifdef USE_THNEED
@ -43,44 +46,16 @@ private:
  std::string model_data;
 #ifdef QCOM2
-  zdl::DlSystem::Runtime_t Runtime;
+  zdl::DlSystem::Runtime_t snpe_runtime;
 #endif
  // snpe model stuff
  std::unique_ptr<zdl::SNPE::SNPE> snpe;
  zdl::DlSystem::UserBufferMap input_map;
  zdl::DlSystem::UserBufferMap output_map;
  std::unique_ptr<zdl::DlSystem::IUserBuffer> output_buffer;
  // snpe input stuff
  zdl::DlSystem::UserBufferMap inputMap;
  std::unique_ptr<zdl::DlSystem::IUserBuffer> inputBuffer;
  float *input;
  size_t input_size;
  bool use_tf8;
  // snpe output stuff
  zdl::DlSystem::UserBufferMap outputMap;
  std::unique_ptr<zdl::DlSystem::IUserBuffer> outputBuffer;
  float *output;
  size_t output_size;
  // extra input stuff
  std::unique_ptr<zdl::DlSystem::IUserBuffer> extraBuffer;
  float *extra;
  size_t extra_size;
  bool use_extra;
  // recurrent and desire
  std::unique_ptr<zdl::DlSystem::IUserBuffer> addExtra(float *state, int state_size, int idx);
  float *recurrent;
  size_t recurrent_size;
  std::unique_ptr<zdl::DlSystem::IUserBuffer> recurrentBuffer;
  float *trafficConvention;
  std::unique_ptr<zdl::DlSystem::IUserBuffer> trafficConventionBuffer;
  float *desire;
  std::unique_ptr<zdl::DlSystem::IUserBuffer> desireBuffer;
  float *navFeatures;
  std::unique_ptr<zdl::DlSystem::IUserBuffer> navFeaturesBuffer;
  float *drivingStyle;
  std::unique_ptr<zdl::DlSystem::IUserBuffer> drivingStyleBuffer;
  float *calib;
  std::unique_ptr<zdl::DlSystem::IUserBuffer> calibBuffer;
 };
--- a/selfdrive/modeld/runners/thneedmodel.cc
+++ b/selfdrive/modeld/runners/thneedmodel.cc
@ -1,78 +1,56 @@
 #include "selfdrive/modeld/runners/thneedmodel.h"
-#include <cassert>
+#include "common/swaglog.h"
-ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra, bool luse_tf8, cl_context context) {
+ThneedModel::ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool luse_tf8, cl_context context) {
  thneed = new Thneed(true, context);
-  thneed->load(path);
+  thneed->load(path.c_str());
  thneed->clexec();
  recorded = false;
-  output = loutput;
+  output = _output;
  use_extra = luse_extra;
 }
-void ThneedModel::addRecurrent(float *state, int state_size) {
+void* ThneedModel::getCLBuffer(const std::string name) {
-  recurrent = state;
+  int index = -1;
-}
+  for (int i = 0; i < inputs.size(); i++) {
-
+    if (name == inputs[i]->name) {
-void ThneedModel::addTrafficConvention(float *state, int state_size) {
+      index = i;
-  trafficConvention = state;
+      break;
-}
+    }
-
+  }
 void ThneedModel::addDesire(float *state, int state_size) {
  desire = state;
 }
 void ThneedModel::addDrivingStyle(float *state, int state_size) {
    drivingStyle = state;
 }
 void ThneedModel::addNavFeatures(float *state, int state_size) {
  navFeatures = state;
 }
 void ThneedModel::addImage(float *image_input_buf, int buf_size) {
  input = image_input_buf;
 }
 void ThneedModel::addExtra(float *extra_input_buf, int buf_size) {
  extra = extra_input_buf;
 }
-void* ThneedModel::getInputBuf() {
+  if (index == -1) {
-  if (use_extra && thneed->input_clmem.size() > 5) return &(thneed->input_clmem[5]);
+    LOGE("Tried to get CL buffer for input `%s` but no input with this name exists", name.c_str());
-  else if (!use_extra && thneed->input_clmem.size() > 4) return &(thneed->input_clmem[4]);
+    assert(false);
-  else return nullptr;
+  }
 }
-void* ThneedModel::getExtraBuf() {
+  if (thneed->input_clmem.size() >= inputs.size()) {
-  if (thneed->input_clmem.size() > 4) return &(thneed->input_clmem[4]);
+    return &thneed->input_clmem[inputs.size() - index - 1];
-  else return nullptr;
+  } else {
    return nullptr;
  }
 }
 void ThneedModel::execute() {
  if (!recorded) {
    thneed->record = true;
-    if (use_extra) {
+    float *input_buffers[inputs.size()];
-      float *inputs[6] = {recurrent, navFeatures, trafficConvention, desire, extra, input};
+    for (int i = 0; i < inputs.size(); i++) {
-      thneed->copy_inputs(inputs);
+      input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
    } else {
      float *inputs[5] = {recurrent, navFeatures, trafficConvention, desire, input};
      thneed->copy_inputs(inputs);
    }
    thneed->copy_inputs(input_buffers);
    thneed->clexec();
    thneed->copy_output(output);
    thneed->stop();
    recorded = true;
  } else {
-    if (use_extra) {
+    float *input_buffers[inputs.size()];
-      float *inputs[6] = {recurrent, navFeatures, trafficConvention, desire, extra, input};
+    for (int i = 0; i < inputs.size(); i++) {
-      thneed->execute(inputs, output);
+      input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
    } else {
      float *inputs[5] = {recurrent, navFeatures, trafficConvention, desire, input};
      thneed->execute(inputs, output);
    }
    thneed->execute(input_buffers, output);
  }
 }
--- a/selfdrive/modeld/runners/thneedmodel.h
+++ b/selfdrive/modeld/runners/thneedmodel.h
@ -5,31 +5,11 @@
 class ThneedModel : public RunModel {
 public:
-  ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false, bool use_tf8 = false, cl_context context = NULL);
+  ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
-  void addRecurrent(float *state, int state_size);
+  void *getCLBuffer(const std::string name);
  void addTrafficConvention(float *state, int state_size);
  void addDesire(float *state, int state_size);
  void addNavFeatures(float *state, int state_size);
  void addDrivingStyle(float *state, int state_size);
  void addImage(float *image_buf, int buf_size);
  void addExtra(float *image_buf, int buf_size);
  void execute();
  void* getInputBuf();
  void* getExtraBuf();
 private:
  Thneed *thneed = NULL;
  bool recorded;
  bool use_extra;
  float *input;
  float *extra;
  float *output;
  // recurrent and desire
  float *recurrent;
  float *trafficConvention;
  float *drivingStyle;
  float *desire;
  float *navFeatures;
 };