Refactor model runners (#28598)

* Started work on model runner refactor * Fixed some compile errors * everything compiles * Fixed bug in SNPEModel * updateInput -> setInputBuffer * I understand nothing * whoops lol * use std::string instead of char* * Move common logic into RunModel * formatting fix old-commit-hash: c9f00678af
2 years ago · 95051090a1
parent fdc8876745
commit 95051090a1
10 changed files with 190 additions and 414 deletions
--- a/selfdrive/modeld/models/dmonitoring.cc
+++ b/selfdrive/modeld/models/dmonitoring.cc
@ -22,12 +22,13 @@ static inline T *get_buffer(std::vector<T> &buf, const size_t size) {
 void dmonitoring_init(DMonitoringModelState* s) {

 #ifdef USE_ONNX_MODEL
-  s->m = new ONNXModel("models/dmonitoring_model.onnx", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, false, true);
+  s->m = new ONNXModel("models/dmonitoring_model.onnx", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, true);
 #else
-  s->m = new SNPEModel("models/dmonitoring_model_q.dlc", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, false, true);
+  s->m = new SNPEModel("models/dmonitoring_model_q.dlc", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, true);
 #endif

-  s->m->addCalib(s->calib, CALIB_LEN);
+  s->m->addInput("input_imgs", NULL, 0);
+  s->m->addInput("calib", s->calib, CALIB_LEN);
 }

 void parse_driver_data(DriverStateResult &ds_res, const DMonitoringModelState* s, int out_idx_offset) {
@ -92,7 +93,7 @@ DMonitoringModelResult dmonitoring_eval_frame(DMonitoringModelState* s, void* st
  // fclose(dump_yuv_file);

  double t1 = millis_since_boot();
-  s->m->addImage((float*)net_input_buf, yuv_buf_len / 4);
+  s->m->setInputBuffer("input_imgs", (float*)net_input_buf, yuv_buf_len / 4);
  for (int i = 0; i < CALIB_LEN; i++) {
    s->calib[i] = calib[i];
  }
--- a/selfdrive/modeld/models/driving.cc
+++ b/selfdrive/modeld/models/driving.cc
@ -33,26 +33,30 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) {
 #else
  s->m = std::make_unique<SNPEModel>("models/supercombo.dlc",
 #endif
-   &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, true, false, context);
+   &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, false, context);

-#ifdef TEMPORAL
-  s->m->addRecurrent(&s->feature_buffer[0], TEMPORAL_SIZE);
-#endif
+  s->m->addInput("input_imgs", NULL, 0);
+  s->m->addInput("big_input_imgs", NULL, 0);

+  // TODO: the input is important here, still need to fix this
 #ifdef DESIRE
-  s->m->addDesire(s->pulse_desire, DESIRE_LEN*(HISTORY_BUFFER_LEN+1));
+  s->m->addInput("desire_pulse", s->pulse_desire, DESIRE_LEN*(HISTORY_BUFFER_LEN+1));
 #endif

 #ifdef TRAFFIC_CONVENTION
-  s->m->addTrafficConvention(s->traffic_convention, TRAFFIC_CONVENTION_LEN);
+  s->m->addInput("traffic_convention", s->traffic_convention, TRAFFIC_CONVENTION_LEN);
 #endif

 #ifdef DRIVING_STYLE
-  s->m->addDrivingStyle(s->driving_style, DRIVING_STYLE_LEN);
+  s->m->addInput("driving_style", s->driving_style, DRIVING_STYLE_LEN);
 #endif

 #ifdef NAV
-  s->m->addNavFeatures(s->nav_features, NAV_FEATURE_LEN);
+  s->m->addInput("nav_features", s->nav_features, NAV_FEATURE_LEN);
+#endif
+
+#ifdef TEMPORAL
+  s->m->addInput("feature_buffer", &s->feature_buffer[0], TEMPORAL_SIZE);
 #endif

 }
@ -89,13 +93,13 @@ LOGT("Desire enqueued");
  s->traffic_convention[1-rhd_idx] = 0.0;

  // if getInputBuf is not NULL, net_input_buf will be
-  auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, buf->stride, buf->uv_offset, transform, static_cast<cl_mem*>(s->m->getInputBuf()));
-  s->m->addImage(net_input_buf, s->frame->buf_size);
+  auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, buf->stride, buf->uv_offset, transform, static_cast<cl_mem*>(s->m->getCLBuffer("input_imgs")));
+  s->m->setInputBuffer("input_imgs", net_input_buf, s->frame->buf_size);
  LOGT("Image added");

  if (wbuf != nullptr) {
-    auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, wbuf->stride, wbuf->uv_offset, transform_wide, static_cast<cl_mem*>(s->m->getExtraBuf()));
-    s->m->addExtra(net_extra_buf, s->wide_frame->buf_size);
+    auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, wbuf->stride, wbuf->uv_offset, transform_wide, static_cast<cl_mem*>(s->m->getCLBuffer("big_input_imgs")));
+    s->m->setInputBuffer("big_input_imgs", net_extra_buf, s->wide_frame->buf_size);
    LOGT("Extra image added");
  }

--- a/selfdrive/modeld/models/nav.cc
+++ b/selfdrive/modeld/models/nav.cc
@ -10,17 +10,19 @@

 void navmodel_init(NavModelState* s) {
  #ifdef USE_ONNX_MODEL
-    s->m = new ONNXModel("models/navmodel.onnx", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, false, true);
+    s->m = new ONNXModel("models/navmodel.onnx", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, true);
  #else
-    s->m = new SNPEModel("models/navmodel_q.dlc", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, false, true);
+    s->m = new SNPEModel("models/navmodel_q.dlc", &s->output[0], NAV_NET_OUTPUT_SIZE, USE_DSP_RUNTIME, true);
  #endif
+
+  s->m->addInput("map", NULL, 0);
 }

 NavModelResult* navmodel_eval_frame(NavModelState* s, VisionBuf* buf) {
  memcpy(s->net_input_buf, buf->addr, NAV_INPUT_SIZE);

  double t1 = millis_since_boot();
-  s->m->addImage((float*)s->net_input_buf, NAV_INPUT_SIZE/sizeof(float));
+  s->m->setInputBuffer("map", (float*)s->net_input_buf, NAV_INPUT_SIZE/sizeof(float));
  s->m->execute();
  double t2 = millis_since_boot();

--- a/selfdrive/modeld/runners/onnxmodel.cc
+++ b/selfdrive/modeld/runners/onnxmodel.cc
@ -1,25 +1,18 @@
 #include "selfdrive/modeld/runners/onnxmodel.h"

-#include <poll.h>
-#include <unistd.h>
-
-#include <cassert>
 #include <csignal>
 #include <cstdio>
 #include <cstdlib>
-#include <cstring>
-#include <stdexcept>
-#include <string>
+#include <poll.h>
+#include <unistd.h>

-#include "common/swaglog.h"
 #include "common/util.h"

-ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int runtime, bool _use_extra, bool _use_tf8, cl_context context) {
-  LOGD("loading model %s", path);
+ONNXModel::ONNXModel(const std::string path, float *_output, size_t _output_size, int runtime, bool _use_tf8, cl_context context) {
+  LOGD("loading model %s", path.c_str());

  output = _output;
  output_size = _output_size;
-  use_extra = _use_extra;
  use_tf8 = _use_tf8;

  int err = pipe(pipein);
@ -34,7 +27,7 @@ ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int
  proc_pid = fork();
  if (proc_pid == 0) {
    LOGD("spawning onnx process %s", onnx_runner.c_str());
-    char *argv[] = {(char*)onnx_runner.c_str(), (char*)path, (char*)tf8_arg.c_str(), nullptr};
+    char *argv[] = {(char*)onnx_runner.c_str(), (char*)path.c_str(), (char*)tf8_arg.c_str(), nullptr};
    dup2(pipein[0], 0);
    dup2(pipeout[1], 1);
    close(pipein[0]);
@ -87,72 +80,9 @@ void ONNXModel::pread(float *buf, int size) {
  LOGD("host read done");
 }

-void ONNXModel::addRecurrent(float *state, int state_size) {
-  rnn_input_buf = state;
-  rnn_state_size = state_size;
-}
-
-void ONNXModel::addDesire(float *state, int state_size) {
-  desire_input_buf = state;
-  desire_state_size = state_size;
-}
-
-void ONNXModel::addNavFeatures(float *state, int state_size) {
-  nav_features_input_buf = state;
-  nav_features_size = state_size;
-}
-
-void ONNXModel::addDrivingStyle(float *state, int state_size) {
-    driving_style_input_buf = state;
-    driving_style_size = state_size;
-}
-
-void ONNXModel::addTrafficConvention(float *state, int state_size) {
-  traffic_convention_input_buf = state;
-  traffic_convention_size = state_size;
-}
-
-void ONNXModel::addCalib(float *state, int state_size) {
-  calib_input_buf = state;
-  calib_size = state_size;
-}
-
-void ONNXModel::addImage(float *image_buf, int buf_size) {
-  image_input_buf = image_buf;
-  image_buf_size = buf_size;
-}
-
-void ONNXModel::addExtra(float *image_buf, int buf_size) {
-  extra_input_buf = image_buf;
-  extra_buf_size = buf_size;
-}
-
 void ONNXModel::execute() {
-  // order must be this
-  if (image_input_buf != NULL) {
-    pwrite(image_input_buf, image_buf_size);
-  }
-  if (extra_input_buf != NULL) {
-    pwrite(extra_input_buf, extra_buf_size);
-  }
-  if (desire_input_buf != NULL) {
-    pwrite(desire_input_buf, desire_state_size);
-  }
-  if (traffic_convention_input_buf != NULL) {
-    pwrite(traffic_convention_input_buf, traffic_convention_size);
-  }
-  if (driving_style_input_buf != NULL) {
-    pwrite(driving_style_input_buf, driving_style_size);
-  }
-  if (nav_features_input_buf != NULL) {
-    pwrite(nav_features_input_buf, nav_features_size);
-  }
-  if (calib_input_buf != NULL) {
-    pwrite(calib_input_buf, calib_size);
-  }
-  if (rnn_input_buf != NULL) {
-    pwrite(rnn_input_buf, rnn_state_size);
+  for (auto &input : inputs) {
+    pwrite(input->buffer, input->size);
  }
  pread(output, output_size);
 }
-
--- a/selfdrive/modeld/runners/onnxmodel.h
+++ b/selfdrive/modeld/runners/onnxmodel.h
@ -1,51 +1,21 @@
 #pragma once

-#include <cstdlib>
-
 #include "selfdrive/modeld/runners/runmodel.h"

 class ONNXModel : public RunModel {
 public:
-  ONNXModel(const char *path, float *output, size_t output_size, int runtime, bool use_extra = false, bool _use_tf8 = false, cl_context context = NULL);
+  ONNXModel(const std::string path, float *output, size_t output_size, int runtime, bool _use_tf8 = false, cl_context context = NULL);
 	~ONNXModel();
-  void addRecurrent(float *state, int state_size);
-  void addDesire(float *state, int state_size);
-  void addNavFeatures(float *state, int state_size);
-  void addDrivingStyle(float *state, int state_size);
-  void addTrafficConvention(float *state, int state_size);
-  void addCalib(float *state, int state_size);
-  void addImage(float *image_buf, int buf_size);
-  void addExtra(float *image_buf, int buf_size);
  void execute();
 private:
  int proc_pid;
-
  float *output;
  size_t output_size;
-
-  float *rnn_input_buf = NULL;
-  int rnn_state_size;
-  float *desire_input_buf = NULL;
-  int desire_state_size;
-  float *nav_features_input_buf = NULL;
-  int nav_features_size;
-  float *driving_style_input_buf = NULL;
-  int driving_style_size;
-  float *traffic_convention_input_buf = NULL;
-  int traffic_convention_size;
-  float *calib_input_buf = NULL;
-  int calib_size;
-  float *image_input_buf = NULL;
-  int image_buf_size;
  bool use_tf8;
-  float *extra_input_buf = NULL;
-  int extra_buf_size;
-  bool use_extra;

-  // pipe to communicate to keras subprocess
+  // pipe to communicate to onnx_runner subprocess
  void pread(float *buf, int size);
  void pwrite(float *buf, int size);
  int pipein[2];
  int pipeout[2];
 };
-
--- a/selfdrive/modeld/runners/runmodel.h
+++ b/selfdrive/modeld/runners/runmodel.h
@ -1,18 +1,45 @@
 #pragma once
+
+#include <string>
+#include <vector>
+#include <memory>
+#include <cassert>
+
 #include "common/clutil.h"
+#include "common/swaglog.h"
+
+struct ModelInput {
+  const std::string name;
+  float *buffer;
+  int size;
+
+  ModelInput(const std::string _name, float *_buffer, int _size) : name(_name), buffer(_buffer), size(_size) {}
+  virtual void setBuffer(float *_buffer, int _size) {
+    assert(size == _size || size == 0);
+    buffer = _buffer;
+    size = _size;
+  }
+};
+
 class RunModel {
 public:
+  std::vector<std::unique_ptr<ModelInput>> inputs;
+
  virtual ~RunModel() {}
-  virtual void addRecurrent(float *state, int state_size) {}
-  virtual void addDesire(float *state, int state_size) {}
-  virtual void addNavFeatures(float *state, int state_size) {}
-  virtual void addDrivingStyle(float *state, int state_size) {}
-  virtual void addTrafficConvention(float *state, int state_size) {}
-  virtual void addCalib(float *state, int state_size) {}
-  virtual void addImage(float *image_buf, int buf_size) {}
-  virtual void addExtra(float *image_buf, int buf_size) {}
  virtual void execute() {}
-  virtual void* getInputBuf() { return nullptr; }
-  virtual void* getExtraBuf() { return nullptr; }
-};
+  virtual void* getCLBuffer(const std::string name) { return nullptr; }

+  virtual void addInput(const std::string name, float *buffer, int size) {
+    inputs.push_back(std::unique_ptr<ModelInput>(new ModelInput(name, buffer, size)));
+  }
+  virtual void setInputBuffer(const std::string name, float *buffer, int size) {
+    for (auto &input : inputs) {
+      if (name == input->name) {
+        input->setBuffer(buffer, size);
+        return;
+      }
+    }
+    LOGE("Tried to update input `%s` but no input with this name exists", name.c_str());
+    assert(false);
+  }
+};
--- a/selfdrive/modeld/runners/snpemodel.cc
+++ b/selfdrive/modeld/runners/snpemodel.cc
@ -2,8 +2,6 @@

 #include "selfdrive/modeld/runners/snpemodel.h"

-#include <cassert>
-#include <cstdlib>
 #include <cstring>

 #include "common/util.h"
@ -14,20 +12,20 @@ void PrintErrorStringAndExit() {
  std::exit(EXIT_FAILURE);
 }

-SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra, bool luse_tf8, cl_context context) {
-  output = loutput;
-  output_size = loutput_size;
-  use_extra = luse_extra;
-  use_tf8 = luse_tf8;
+SNPEModel::SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool _use_tf8, cl_context context) {
+  output = _output;
+  output_size = _output_size;
+  use_tf8 = _use_tf8;
+
 #ifdef QCOM2
  if (runtime == USE_GPU_RUNTIME) {
-    Runtime = zdl::DlSystem::Runtime_t::GPU;
+    snpe_runtime = zdl::DlSystem::Runtime_t::GPU;
  } else if (runtime == USE_DSP_RUNTIME) {
-    Runtime = zdl::DlSystem::Runtime_t::DSP;
+    snpe_runtime = zdl::DlSystem::Runtime_t::DSP;
  } else {
-    Runtime = zdl::DlSystem::Runtime_t::CPU;
+    snpe_runtime = zdl::DlSystem::Runtime_t::CPU;
  }
-  assert(zdl::SNPE::SNPEFactory::isRuntimeAvailable(Runtime));
+  assert(zdl::SNPE::SNPEFactory::isRuntimeAvailable(snpe_runtime));
 #endif
  model_data = util::read_file(path);
  assert(model_data.size() > 0);
@ -38,16 +36,16 @@ SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int
  printf("loaded model with size: %lu\n", model_data.size());

  // create model runner
-  zdl::SNPE::SNPEBuilder snpeBuilder(container.get());
+  zdl::SNPE::SNPEBuilder snpe_builder(container.get());
  while (!snpe) {
 #ifdef QCOM2
-    snpe = snpeBuilder.setOutputLayers({})
-                      .setRuntimeProcessor(Runtime)
+    snpe = snpe_builder.setOutputLayers({})
+                       .setRuntimeProcessor(snpe_runtime)
                       .setUseUserSuppliedBuffers(true)
                       .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
                       .build();
 #else
-    snpe = snpeBuilder.setOutputLayers({})
+    snpe = snpe_builder.setOutputLayers({})
                       .setUseUserSuppliedBuffers(true)
                       .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
                       .build();
@ -55,155 +53,66 @@ SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int
    if (!snpe) std::cerr << zdl::DlSystem::getLastErrorString() << std::endl;
  }

-  // get input and output names
-  const auto &strListi_opt = snpe->getInputTensorNames();
-  if (!strListi_opt) throw std::runtime_error("Error obtaining Input tensor names");
-  const auto &strListi = *strListi_opt;
-  //assert(strListi.size() == 1);
-  const char *input_tensor_name = strListi.at(0);
-
-  const auto &strListo_opt = snpe->getOutputTensorNames();
-  if (!strListo_opt) throw std::runtime_error("Error obtaining Output tensor names");
-  const auto &strListo = *strListo_opt;
-  assert(strListo.size() == 1);
-  const char *output_tensor_name = strListo.at(0);
-
-  printf("model: %s -> %s\n", input_tensor_name, output_tensor_name);
-
-  zdl::DlSystem::UserBufferEncodingFloat userBufferEncodingFloat;
-  zdl::DlSystem::UserBufferEncodingTf8 userBufferEncodingTf8(0, 1./255); // network takes 0-1
-  zdl::DlSystem::IUserBufferFactory& ubFactory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
-  size_t size_of_input = use_tf8 ? sizeof(uint8_t) : sizeof(float);
-
-  // create input buffer
-  {
-    const auto &inputDims_opt = snpe->getInputDimensions(input_tensor_name);
-    const zdl::DlSystem::TensorShape& bufferShape = *inputDims_opt;
-    std::vector<size_t> strides(bufferShape.rank());
-    strides[strides.size() - 1] = size_of_input;
-    size_t product = 1;
-    for (size_t i = 0; i < bufferShape.rank(); i++) product *= bufferShape[i];
-    size_t stride = strides[strides.size() - 1];
-    for (size_t i = bufferShape.rank() - 1; i > 0; i--) {
-      stride *= bufferShape[i];
-      strides[i-1] = stride;
-    }
-    printf("input product is %lu\n", product);
-    inputBuffer = ubFactory.createUserBuffer(NULL,
-                                             product*size_of_input,
-                                             strides,
-                                             use_tf8 ? (zdl::DlSystem::UserBufferEncoding*)&userBufferEncodingTf8 : (zdl::DlSystem::UserBufferEncoding*)&userBufferEncodingFloat);
-
-    inputMap.add(input_tensor_name, inputBuffer.get());
-  }
-
-  if (use_extra) {
-    const char *extra_tensor_name = strListi.at(1);
-    const auto &extraDims_opt = snpe->getInputDimensions(extra_tensor_name);
-    const zdl::DlSystem::TensorShape& bufferShape = *extraDims_opt;
-    std::vector<size_t> strides(bufferShape.rank());
-    strides[strides.size() - 1] = sizeof(float);
-    size_t product = 1;
-    for (size_t i = 0; i < bufferShape.rank(); i++) product *= bufferShape[i];
-    size_t stride = strides[strides.size() - 1];
-    for (size_t i = bufferShape.rank() - 1; i > 0; i--) {
-      stride *= bufferShape[i];
-      strides[i-1] = stride;
-    }
-    printf("extra product is %lu\n", product);
-    extraBuffer = ubFactory.createUserBuffer(NULL, product*sizeof(float), strides, &userBufferEncodingFloat);
-
-    inputMap.add(extra_tensor_name, extraBuffer.get());
-  }
-
  // create output buffer
-  {
-    const zdl::DlSystem::TensorShape& bufferShape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims();
+  zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
+  zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
+
+  const auto &output_tensor_names_opt = snpe->getOutputTensorNames();
+  if (!output_tensor_names_opt) throw std::runtime_error("Error obtaining output tensor names");
+  const auto &output_tensor_names = *output_tensor_names_opt;
+  assert(output_tensor_names.size() == 1);
+  const char *output_tensor_name = output_tensor_names.at(0);
+  const zdl::DlSystem::TensorShape &buffer_shape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims();
  if (output_size != 0) {
-      assert(output_size == bufferShape[1]);
+    assert(output_size == buffer_shape[1]);
  } else {
-      output_size = bufferShape[1];
-    }
-
-    std::vector<size_t> outputStrides = {output_size * sizeof(float), sizeof(float)};
-    outputBuffer = ubFactory.createUserBuffer(output, output_size * sizeof(float), outputStrides, &userBufferEncodingFloat);
-    outputMap.add(output_tensor_name, outputBuffer.get());
+    output_size = buffer_shape[1];
  }
+  std::vector<size_t> output_strides = {output_size * sizeof(float), sizeof(float)};
+  output_buffer = ub_factory.createUserBuffer(output, output_size * sizeof(float), output_strides, &ub_encoding_float);
+  output_map.add(output_tensor_name, output_buffer.get());

 #ifdef USE_THNEED
-  if (Runtime == zdl::DlSystem::Runtime_t::GPU) {
+  if (snpe_runtime == zdl::DlSystem::Runtime_t::GPU) {
    thneed.reset(new Thneed());
  }
 #endif
 }

-void SNPEModel::addRecurrent(float *state, int state_size) {
-  recurrent = state;
-  recurrent_size = state_size;
-  recurrentBuffer = this->addExtra(state, state_size, 3);
-}
-
-void SNPEModel::addTrafficConvention(float *state, int state_size) {
-  trafficConvention = state;
-  trafficConventionBuffer = this->addExtra(state, state_size, 2);
-}
-
-void SNPEModel::addDesire(float *state, int state_size) {
-  desire = state;
-  desireBuffer = this->addExtra(state, state_size, 1);
-}
-
-void SNPEModel::addNavFeatures(float *state, int state_size) {
-  navFeatures = state;
-  navFeaturesBuffer = this->addExtra(state, state_size, 1);
-}
-
-void SNPEModel::addDrivingStyle(float *state, int state_size) {
-    drivingStyle = state;
-    drivingStyleBuffer = this->addExtra(state, state_size, 2);
-}
-
-void SNPEModel::addCalib(float *state, int state_size) {
-  calib = state;
-  calibBuffer = this->addExtra(state, state_size, 1);
-}
-
-void SNPEModel::addImage(float *image_buf, int buf_size) {
-  input = image_buf;
-  input_size = buf_size;
-}
-
-void SNPEModel::addExtra(float *image_buf, int buf_size) {
-  extra = image_buf;
-  extra_size = buf_size;
+void SNPEModel::addInput(const std::string name, float *buffer, int size) {
+  const int idx = inputs.size();
+  const auto &input_tensor_names_opt = snpe->getInputTensorNames();
+  if (!input_tensor_names_opt) throw std::runtime_error("Error obtaining input tensor names");
+  const auto &input_tensor_names = *input_tensor_names_opt;
+  const char *input_tensor_name = input_tensor_names.at(idx);
+  const bool input_tf8 = use_tf8 && strcmp(input_tensor_name, "input_img") == 0;  // TODO: This is a terrible hack, get rid of this name check both here and in onnx_runner.py
+  printf("adding index %d: %s\n", idx, input_tensor_name);
+
+  zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
+  zdl::DlSystem::UserBufferEncodingTf8 ub_encoding_tf8(0, 1./255); // network takes 0-1
+  zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
+  zdl::DlSystem::UserBufferEncoding *input_encoding = input_tf8 ? (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_tf8 : (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_float;
+
+  const auto &buffer_shape_opt = snpe->getInputDimensions(input_tensor_name);
+  const zdl::DlSystem::TensorShape &buffer_shape = *buffer_shape_opt;
+  size_t size_of_input = input_tf8 ? sizeof(uint8_t) : sizeof(float);
+  std::vector<size_t> strides(buffer_shape.rank());
+  strides[strides.size() - 1] = size_of_input;
+  size_t product = 1;
+  for (size_t i = 0; i < buffer_shape.rank(); i++) product *= buffer_shape[i];
+  size_t stride = strides[strides.size() - 1];
+  for (size_t i = buffer_shape.rank() - 1; i > 0; i--) {
+    stride *= buffer_shape[i];
+    strides[i-1] = stride;
  }

-std::unique_ptr<zdl::DlSystem::IUserBuffer> SNPEModel::addExtra(float *state, int state_size, int idx) {
-  // get input and output names
-  const auto real_idx = idx + (use_extra ? 1 : 0);
-  const auto &strListi_opt = snpe->getInputTensorNames();
-  if (!strListi_opt) throw std::runtime_error("Error obtaining Input tensor names");
-  const auto &strListi = *strListi_opt;
-  const char *input_tensor_name = strListi.at(real_idx);
-  printf("adding index %d: %s\n", real_idx, input_tensor_name);
-
-  zdl::DlSystem::UserBufferEncodingFloat userBufferEncodingFloat;
-  zdl::DlSystem::IUserBufferFactory& ubFactory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
-  std::vector<size_t> retStrides = {state_size * sizeof(float), sizeof(float)};
-  auto ret = ubFactory.createUserBuffer(state, state_size * sizeof(float), retStrides, &userBufferEncodingFloat);
-  inputMap.add(input_tensor_name, ret.get());
-  return ret;
+  auto input_buffer = ub_factory.createUserBuffer(buffer, product*size_of_input, strides, input_encoding);
+  input_map.add(input_tensor_name, input_buffer.get());
+  inputs.push_back(std::unique_ptr<SNPEModelInput>(new SNPEModelInput(name, buffer, size, std::move(input_buffer))));
 }

 void SNPEModel::execute() {
-  bool ret = inputBuffer->setBufferAddress(input);
-  assert(ret == true);
-  if (use_extra) {
-    bool extra_ret = extraBuffer->setBufferAddress(extra);
-    assert(extra_ret == true);
-  }
-  if (!snpe->execute(inputMap, outputMap)) {
+  if (!snpe->execute(input_map, output_map)) {
    PrintErrorStringAndExit();
  }
 }
-
--- a/selfdrive/modeld/runners/snpemodel.h
+++ b/selfdrive/modeld/runners/snpemodel.h
@ -11,7 +11,7 @@
 #include <SNPE/SNPEBuilder.hpp>
 #include <SNPE/SNPEFactory.hpp>

-#include "runmodel.h"
+#include "selfdrive/modeld/runners/runmodel.h"

 #define USE_CPU_RUNTIME 0
 #define USE_GPU_RUNTIME 1
@ -21,17 +21,20 @@
 #include "selfdrive/modeld/thneed/thneed.h"
 #endif

+struct SNPEModelInput : public ModelInput {
+  std::unique_ptr<zdl::DlSystem::IUserBuffer> snpe_buffer;
+
+  SNPEModelInput(const std::string _name, float *_buffer, int _size, std::unique_ptr<zdl::DlSystem::IUserBuffer> _snpe_buffer) : ModelInput(_name, _buffer, _size), snpe_buffer(std::move(_snpe_buffer)) {}
+  void setBuffer(float *_buffer, int _size) {
+    ModelInput::setBuffer(_buffer, _size);
+    assert(snpe_buffer->setBufferAddress(_buffer) == true);
+  }
+};
+
 class SNPEModel : public RunModel {
 public:
-  SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false, bool use_tf8 = false, cl_context context = NULL);
-  void addRecurrent(float *state, int state_size);
-  void addTrafficConvention(float *state, int state_size);
-  void addCalib(float *state, int state_size);
-  void addDesire(float *state, int state_size);
-  void addDrivingStyle(float *state, int state_size);
-  void addNavFeatures(float *state, int state_size);
-  void addImage(float *image_buf, int buf_size);
-  void addExtra(float *image_buf, int buf_size);
+  SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
+  void addInput(const std::string name, float *buffer, int size);
  void execute();

 #ifdef USE_THNEED
@ -43,44 +46,16 @@ private:
  std::string model_data;

 #ifdef QCOM2
-  zdl::DlSystem::Runtime_t Runtime;
+  zdl::DlSystem::Runtime_t snpe_runtime;
 #endif

  // snpe model stuff
  std::unique_ptr<zdl::SNPE::SNPE> snpe;
+  zdl::DlSystem::UserBufferMap input_map;
+  zdl::DlSystem::UserBufferMap output_map;
+  std::unique_ptr<zdl::DlSystem::IUserBuffer> output_buffer;

-  // snpe input stuff
-  zdl::DlSystem::UserBufferMap inputMap;
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> inputBuffer;
-  float *input;
-  size_t input_size;
  bool use_tf8;
-
-  // snpe output stuff
-  zdl::DlSystem::UserBufferMap outputMap;
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> outputBuffer;
  float *output;
  size_t output_size;
-
-  // extra input stuff
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> extraBuffer;
-  float *extra;
-  size_t extra_size;
-  bool use_extra;
-
-  // recurrent and desire
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> addExtra(float *state, int state_size, int idx);
-  float *recurrent;
-  size_t recurrent_size;
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> recurrentBuffer;
-  float *trafficConvention;
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> trafficConventionBuffer;
-  float *desire;
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> desireBuffer;
-  float *navFeatures;
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> navFeaturesBuffer;
-  float *drivingStyle;
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> drivingStyleBuffer;
-  float *calib;
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> calibBuffer;
 };
--- a/selfdrive/modeld/runners/thneedmodel.cc
+++ b/selfdrive/modeld/runners/thneedmodel.cc
@ -1,78 +1,56 @@
 #include "selfdrive/modeld/runners/thneedmodel.h"

-#include <cassert>
+#include "common/swaglog.h"

-ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra, bool luse_tf8, cl_context context) {
+ThneedModel::ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool luse_tf8, cl_context context) {
  thneed = new Thneed(true, context);
-  thneed->load(path);
+  thneed->load(path.c_str());
  thneed->clexec();

  recorded = false;
-  output = loutput;
-  use_extra = luse_extra;
+  output = _output;
 }

-void ThneedModel::addRecurrent(float *state, int state_size) {
-  recurrent = state;
+void* ThneedModel::getCLBuffer(const std::string name) {
+  int index = -1;
+  for (int i = 0; i < inputs.size(); i++) {
+    if (name == inputs[i]->name) {
+      index = i;
+      break;
    }
-
-void ThneedModel::addTrafficConvention(float *state, int state_size) {
-  trafficConvention = state;
-}
-
-void ThneedModel::addDesire(float *state, int state_size) {
-  desire = state;
-}
-
-void ThneedModel::addDrivingStyle(float *state, int state_size) {
-    drivingStyle = state;
-}
-
-void ThneedModel::addNavFeatures(float *state, int state_size) {
-  navFeatures = state;
  }

-void ThneedModel::addImage(float *image_input_buf, int buf_size) {
-  input = image_input_buf;
+  if (index == -1) {
+    LOGE("Tried to get CL buffer for input `%s` but no input with this name exists", name.c_str());
+    assert(false);
  }

-void ThneedModel::addExtra(float *extra_input_buf, int buf_size) {
-  extra = extra_input_buf;
-}
-
-void* ThneedModel::getInputBuf() {
-  if (use_extra && thneed->input_clmem.size() > 5) return &(thneed->input_clmem[5]);
-  else if (!use_extra && thneed->input_clmem.size() > 4) return &(thneed->input_clmem[4]);
-  else return nullptr;
+  if (thneed->input_clmem.size() >= inputs.size()) {
+    return &thneed->input_clmem[inputs.size() - index - 1];
+  } else {
+    return nullptr;
  }
-
-void* ThneedModel::getExtraBuf() {
-  if (thneed->input_clmem.size() > 4) return &(thneed->input_clmem[4]);
-  else return nullptr;
 }

 void ThneedModel::execute() {
  if (!recorded) {
    thneed->record = true;
-    if (use_extra) {
-      float *inputs[6] = {recurrent, navFeatures, trafficConvention, desire, extra, input};
-      thneed->copy_inputs(inputs);
-    } else {
-      float *inputs[5] = {recurrent, navFeatures, trafficConvention, desire, input};
-      thneed->copy_inputs(inputs);
+    float *input_buffers[inputs.size()];
+    for (int i = 0; i < inputs.size(); i++) {
+      input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
    }
+
+    thneed->copy_inputs(input_buffers);
    thneed->clexec();
    thneed->copy_output(output);
    thneed->stop();

    recorded = true;
  } else {
-    if (use_extra) {
-      float *inputs[6] = {recurrent, navFeatures, trafficConvention, desire, extra, input};
-      thneed->execute(inputs, output);
-    } else {
-      float *inputs[5] = {recurrent, navFeatures, trafficConvention, desire, input};
-      thneed->execute(inputs, output);
+    float *input_buffers[inputs.size()];
+    for (int i = 0; i < inputs.size(); i++) {
+      input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
    }
+    thneed->execute(input_buffers, output);
  }
 }
--- a/selfdrive/modeld/runners/thneedmodel.h
+++ b/selfdrive/modeld/runners/thneedmodel.h
@ -5,31 +5,11 @@

 class ThneedModel : public RunModel {
 public:
-  ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false, bool use_tf8 = false, cl_context context = NULL);
-  void addRecurrent(float *state, int state_size);
-  void addTrafficConvention(float *state, int state_size);
-  void addDesire(float *state, int state_size);
-  void addNavFeatures(float *state, int state_size);
-  void addDrivingStyle(float *state, int state_size);
-  void addImage(float *image_buf, int buf_size);
-  void addExtra(float *image_buf, int buf_size);
+  ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
+  void *getCLBuffer(const std::string name);
  void execute();
-  void* getInputBuf();
-  void* getExtraBuf();
 private:
  Thneed *thneed = NULL;
  bool recorded;
-  bool use_extra;
-
-  float *input;
-  float *extra;
  float *output;
-
-  // recurrent and desire
-  float *recurrent;
-  float *trafficConvention;
-  float *drivingStyle;
-  float *desire;
-  float *navFeatures;
 };
-