Converted onnx runner to python (#29856)
parent
2d69f3d63e
commit
9cc00c2101
8 changed files with 72 additions and 216 deletions
@ -1,78 +0,0 @@ |
|||||||
#!/usr/bin/env python3 |
|
||||||
|
|
||||||
import os |
|
||||||
import sys |
|
||||||
import numpy as np |
|
||||||
from typing import Tuple, Dict, Union, Any |
|
||||||
|
|
||||||
os.environ["OMP_NUM_THREADS"] = "4" |
|
||||||
os.environ["OMP_WAIT_POLICY"] = "PASSIVE" |
|
||||||
|
|
||||||
import onnxruntime as ort |
|
||||||
|
|
||||||
ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8} |
|
||||||
|
|
||||||
def read(sz, tf8=False): |
|
||||||
dd = [] |
|
||||||
gt = 0 |
|
||||||
szof = 1 if tf8 else 4 |
|
||||||
while gt < sz * szof: |
|
||||||
st = os.read(0, sz * szof - gt) |
|
||||||
assert(len(st) > 0) |
|
||||||
dd.append(st) |
|
||||||
gt += len(st) |
|
||||||
r = np.frombuffer(b''.join(dd), dtype=np.uint8 if tf8 else np.float32) |
|
||||||
if tf8: |
|
||||||
r = r / 255. |
|
||||||
return r |
|
||||||
|
|
||||||
def write(d): |
|
||||||
os.write(1, d.tobytes()) |
|
||||||
|
|
||||||
def run_loop(m, tf8_input=False): |
|
||||||
ishapes = [[1]+ii.shape[1:] for ii in m.get_inputs()] |
|
||||||
keys = [x.name for x in m.get_inputs()] |
|
||||||
itypes = [ORT_TYPES_TO_NP_TYPES[x.type] for x in m.get_inputs()] |
|
||||||
|
|
||||||
# run once to initialize CUDA provider |
|
||||||
if "CUDAExecutionProvider" in m.get_providers(): |
|
||||||
m.run(None, dict(zip(keys, [np.zeros(shp, dtype=itp) for shp, itp in zip(ishapes, itypes, strict=True)], strict=True))) |
|
||||||
|
|
||||||
print("ready to run onnx model", keys, ishapes, file=sys.stderr) |
|
||||||
while 1: |
|
||||||
inputs = [] |
|
||||||
for k, shp, itp in zip(keys, ishapes, itypes, strict=True): |
|
||||||
ts = np.product(shp) |
|
||||||
#print("reshaping %s with offset %d" % (str(shp), offset), file=sys.stderr) |
|
||||||
inputs.append(read(ts, (k=='input_img' and tf8_input)).reshape(shp).astype(itp)) |
|
||||||
ret = m.run(None, dict(zip(keys, inputs, strict=True))) |
|
||||||
#print(ret, file=sys.stderr) |
|
||||||
for r in ret: |
|
||||||
write(r.astype(np.float32)) |
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__": |
|
||||||
print(sys.argv, file=sys.stderr) |
|
||||||
print("Onnx available providers: ", ort.get_available_providers(), file=sys.stderr) |
|
||||||
options = ort.SessionOptions() |
|
||||||
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL |
|
||||||
|
|
||||||
provider: Union[str, Tuple[str, Dict[Any, Any]]] |
|
||||||
if 'OpenVINOExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ: |
|
||||||
provider = 'OpenVINOExecutionProvider' |
|
||||||
elif 'CUDAExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ: |
|
||||||
options.intra_op_num_threads = 2 |
|
||||||
provider = ('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'DEFAULT'}) |
|
||||||
else: |
|
||||||
options.intra_op_num_threads = 2 |
|
||||||
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL |
|
||||||
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL |
|
||||||
provider = 'CPUExecutionProvider' |
|
||||||
|
|
||||||
try: |
|
||||||
print("Onnx selected provider: ", [provider], file=sys.stderr) |
|
||||||
ort_session = ort.InferenceSession(sys.argv[1], options, providers=[provider]) |
|
||||||
print("Onnx using ", ort_session.get_providers(), file=sys.stderr) |
|
||||||
run_loop(ort_session, tf8_input=("--use_tf8" in sys.argv)) |
|
||||||
except KeyboardInterrupt: |
|
||||||
pass |
|
@ -1,88 +0,0 @@ |
|||||||
#include "selfdrive/modeld/runners/onnxmodel.h" |
|
||||||
|
|
||||||
#include <csignal> |
|
||||||
#include <cstdio> |
|
||||||
#include <cstdlib> |
|
||||||
#include <poll.h> |
|
||||||
#include <unistd.h> |
|
||||||
|
|
||||||
#include "common/util.h" |
|
||||||
|
|
||||||
ONNXModel::ONNXModel(const std::string path, float *_output, size_t _output_size, int runtime, bool _use_tf8, cl_context context) { |
|
||||||
LOGD("loading model %s", path.c_str()); |
|
||||||
|
|
||||||
output = _output; |
|
||||||
output_size = _output_size; |
|
||||||
use_tf8 = _use_tf8; |
|
||||||
|
|
||||||
int err = pipe(pipein); |
|
||||||
assert(err == 0); |
|
||||||
err = pipe(pipeout); |
|
||||||
assert(err == 0); |
|
||||||
|
|
||||||
std::string onnx_runner = ONNXRUNNER_PATH; |
|
||||||
std::string tf8_arg = use_tf8 ? "--use_tf8" : ""; |
|
||||||
|
|
||||||
proc_pid = fork(); |
|
||||||
if (proc_pid == 0) { |
|
||||||
LOGD("spawning onnx process %s", onnx_runner.c_str()); |
|
||||||
char *argv[] = {(char*)onnx_runner.c_str(), (char*)path.c_str(), (char*)tf8_arg.c_str(), nullptr}; |
|
||||||
dup2(pipein[0], 0); |
|
||||||
dup2(pipeout[1], 1); |
|
||||||
close(pipein[0]); |
|
||||||
close(pipein[1]); |
|
||||||
close(pipeout[0]); |
|
||||||
close(pipeout[1]); |
|
||||||
execvp(onnx_runner.c_str(), argv); |
|
||||||
exit(1); // exit if the exec fails
|
|
||||||
} |
|
||||||
|
|
||||||
// parent
|
|
||||||
close(pipein[0]); |
|
||||||
close(pipeout[1]); |
|
||||||
} |
|
||||||
|
|
||||||
ONNXModel::~ONNXModel() { |
|
||||||
close(pipein[1]); |
|
||||||
close(pipeout[0]); |
|
||||||
kill(proc_pid, SIGTERM); |
|
||||||
} |
|
||||||
|
|
||||||
void ONNXModel::pwrite(float *buf, int size) { |
|
||||||
char *cbuf = (char *)buf; |
|
||||||
int tw = size*sizeof(float); |
|
||||||
while (tw > 0) { |
|
||||||
int err = write(pipein[1], cbuf, tw); |
|
||||||
//printf("host write %d\n", err);
|
|
||||||
assert(err >= 0); |
|
||||||
cbuf += err; |
|
||||||
tw -= err; |
|
||||||
} |
|
||||||
LOGD("host write of size %d done", size); |
|
||||||
} |
|
||||||
|
|
||||||
void ONNXModel::pread(float *buf, int size) { |
|
||||||
char *cbuf = (char *)buf; |
|
||||||
int tr = size*sizeof(float); |
|
||||||
struct pollfd fds[1]; |
|
||||||
fds[0].fd = pipeout[0]; |
|
||||||
fds[0].events = POLLIN; |
|
||||||
while (tr > 0) { |
|
||||||
int err; |
|
||||||
err = poll(fds, 1, 10000); // 10 second timeout
|
|
||||||
assert(err == 1 || (err == -1 && errno == EINTR)); |
|
||||||
LOGD("host read remaining %d/%lu poll %d", tr, size*sizeof(float), err); |
|
||||||
err = read(pipeout[0], cbuf, tr); |
|
||||||
assert(err > 0 || (err == 0 && errno == EINTR)); |
|
||||||
cbuf += err; |
|
||||||
tr -= err; |
|
||||||
} |
|
||||||
LOGD("host read done"); |
|
||||||
} |
|
||||||
|
|
||||||
void ONNXModel::execute() { |
|
||||||
for (auto &input : inputs) { |
|
||||||
pwrite(input->buffer, input->size); |
|
||||||
} |
|
||||||
pread(output, output_size); |
|
||||||
} |
|
@ -1,23 +0,0 @@ |
|||||||
#pragma once |
|
||||||
|
|
||||||
#include <string> |
|
||||||
|
|
||||||
#include "selfdrive/modeld/runners/runmodel.h" |
|
||||||
|
|
||||||
class ONNXModel : public RunModel { |
|
||||||
public: |
|
||||||
ONNXModel(const std::string path, float *output, size_t output_size, int runtime, bool _use_tf8 = false, cl_context context = NULL); |
|
||||||
~ONNXModel(); |
|
||||||
void execute(); |
|
||||||
private: |
|
||||||
int proc_pid; |
|
||||||
float *output; |
|
||||||
size_t output_size; |
|
||||||
bool use_tf8; |
|
||||||
|
|
||||||
// pipe to communicate to onnx_runner subprocess
|
|
||||||
void pread(float *buf, int size); |
|
||||||
void pwrite(float *buf, int size); |
|
||||||
int pipein[2]; |
|
||||||
int pipeout[2]; |
|
||||||
}; |
|
@ -1,9 +0,0 @@ |
|||||||
# distutils: language = c++ |
|
||||||
|
|
||||||
from libcpp.string cimport string |
|
||||||
|
|
||||||
from cereal.visionipc.visionipc cimport cl_context |
|
||||||
|
|
||||||
cdef extern from "selfdrive/modeld/runners/onnxmodel.h": |
|
||||||
cdef cppclass ONNXModel: |
|
||||||
ONNXModel(string, float*, size_t, int, bool, cl_context) |
|
@ -0,0 +1,70 @@ |
|||||||
|
import os |
||||||
|
import sys |
||||||
|
import numpy as np |
||||||
|
from typing import Tuple, Dict, Union, Any |
||||||
|
|
||||||
|
from openpilot.selfdrive.modeld.runners.runmodel_pyx import RunModel |
||||||
|
|
||||||
|
ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8} |
||||||
|
|
||||||
|
def create_ort_session(path): |
||||||
|
os.environ["OMP_NUM_THREADS"] = "4" |
||||||
|
os.environ["OMP_WAIT_POLICY"] = "PASSIVE" |
||||||
|
|
||||||
|
import onnxruntime as ort |
||||||
|
print("Onnx available providers: ", ort.get_available_providers(), file=sys.stderr) |
||||||
|
options = ort.SessionOptions() |
||||||
|
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL |
||||||
|
|
||||||
|
provider: Union[str, Tuple[str, Dict[Any, Any]]] |
||||||
|
if 'OpenVINOExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ: |
||||||
|
provider = 'OpenVINOExecutionProvider' |
||||||
|
elif 'CUDAExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ: |
||||||
|
options.intra_op_num_threads = 2 |
||||||
|
provider = ('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'DEFAULT'}) |
||||||
|
else: |
||||||
|
options.intra_op_num_threads = 2 |
||||||
|
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL |
||||||
|
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL |
||||||
|
provider = 'CPUExecutionProvider' |
||||||
|
|
||||||
|
print("Onnx selected provider: ", [provider], file=sys.stderr) |
||||||
|
ort_session = ort.InferenceSession(path, options, providers=[provider]) |
||||||
|
print("Onnx using ", ort_session.get_providers(), file=sys.stderr) |
||||||
|
return ort_session |
||||||
|
|
||||||
|
|
||||||
|
class ONNXModel(RunModel): |
||||||
|
def __init__(self, path, output, runtime, use_tf8, cl_context): |
||||||
|
self.inputs = {} |
||||||
|
self.output = output |
||||||
|
self.use_tf8 = use_tf8 |
||||||
|
|
||||||
|
self.session = create_ort_session(path) |
||||||
|
self.input_names = [x.name for x in self.session.get_inputs()] |
||||||
|
self.input_shapes = {x.name: [1, *x.shape[1:]] for x in self.session.get_inputs()} |
||||||
|
self.input_dtypes = {x.name: ORT_TYPES_TO_NP_TYPES[x.type] for x in self.session.get_inputs()} |
||||||
|
|
||||||
|
# run once to initialize CUDA provider |
||||||
|
if "CUDAExecutionProvider" in self.session.get_providers(): |
||||||
|
self.session.run(None, {k: np.zeros(self.input_shapes[k], dtype=self.input_dtypes[k]) for k in self.input_names}) |
||||||
|
print("ready to run onnx model", self.input_shapes, file=sys.stderr) |
||||||
|
|
||||||
|
def addInput(self, name, buffer): |
||||||
|
assert name in self.input_names |
||||||
|
self.inputs[name] = buffer |
||||||
|
|
||||||
|
def setInputBuffer(self, name, buffer): |
||||||
|
assert name in self.inputs |
||||||
|
self.inputs[name] = buffer |
||||||
|
|
||||||
|
def getCLBuffer(self, name): |
||||||
|
return None |
||||||
|
|
||||||
|
def execute(self): |
||||||
|
inputs = {k: (v.view(np.uint8) / 255. if self.use_tf8 and k == 'input_img' else v) for k,v in self.inputs.items()} |
||||||
|
inputs = {k: v.reshape(self.input_shapes[k]).astype(self.input_dtypes[k]) for k,v in inputs.items()} |
||||||
|
outputs = self.session.run(None, inputs) |
||||||
|
assert len(outputs) == 1, "Only single model outputs are supported" |
||||||
|
self.output[:] = outputs[0] |
||||||
|
return self.output |
@ -1,14 +0,0 @@ |
|||||||
# distutils: language = c++ |
|
||||||
# cython: c_string_encoding=ascii |
|
||||||
|
|
||||||
from libcpp cimport bool |
|
||||||
from libcpp.string cimport string |
|
||||||
|
|
||||||
from .onnxmodel cimport ONNXModel as cppONNXModel |
|
||||||
from selfdrive.modeld.models.commonmodel_pyx cimport CLContext |
|
||||||
from selfdrive.modeld.runners.runmodel_pyx cimport RunModel |
|
||||||
from selfdrive.modeld.runners.runmodel cimport RunModel as cppRunModel |
|
||||||
|
|
||||||
cdef class ONNXModel(RunModel): |
|
||||||
def __cinit__(self, string path, float[:] output, int runtime, bool use_tf8, CLContext context): |
|
||||||
self.model = <cppRunModel *> new cppONNXModel(path, &output[0], len(output), runtime, use_tf8, context.context) |
|
Loading…
Reference in new issue