import onnx import onnxruntime as ort import numpy as np import itertools ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8} def attributeproto_fp16_to_fp32(attr): float32_list = np.frombuffer(attr.raw_data, dtype=np.float16) attr.data_type = 1 attr.raw_data = float32_list.astype(np.float32).tobytes() def convert_fp16_to_fp32(model): for i in model.graph.initializer: if i.data_type == 10: attributeproto_fp16_to_fp32(i) for i in itertools.chain(model.graph.input, model.graph.output): if i.type.tensor_type.elem_type == 10: i.type.tensor_type.elem_type = 1 for i in model.graph.node: if i.op_type == 'Cast' and i.attribute[0].i == 10: i.attribute[0].i = 1 for a in i.attribute: if hasattr(a, 't'): if a.t.data_type == 10: attributeproto_fp16_to_fp32(a.t) return model.SerializeToString() def make_onnx_cpu_runner(model_path): options = ort.SessionOptions() options.intra_op_num_threads = 4 options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL model_data = convert_fp16_to_fp32(onnx.load(model_path)) return ort.InferenceSession(model_data, options, providers=['CPUExecutionProvider'])