You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
58 lines
1.4 KiB
58 lines
1.4 KiB
1 month ago
|
import time
|
||
|
|
||
|
onnx_path = "/tmp/my.onnx"
|
||
|
N = 2048
|
||
|
CNT = 400
|
||
|
|
||
|
"""
|
||
|
import torch
|
||
|
import torch.nn as nn
|
||
|
#dtype = torch.bfloat16
|
||
|
dtype = torch.float32
|
||
|
class MatMul(nn.Module):
|
||
|
def __init__(self):
|
||
|
super().__init__()
|
||
|
self.a = nn.Linear(N, N, bias=False)
|
||
|
def forward(self, x):
|
||
|
x = x.to(dtype)
|
||
|
for i in range(CNT): x = self.a(x).relu()
|
||
|
return x.to(torch.float32)
|
||
|
|
||
|
torch_model = MatMul().to(dtype)
|
||
|
torch.onnx.export(torch_model, torch.randn(N, N), onnx_path)
|
||
|
"""
|
||
|
|
||
|
"""
|
||
|
import onnx
|
||
|
from tinygrad.tensor import Tensor
|
||
|
from extra.onnx import get_run_onnx
|
||
|
out = get_run_onnx(onnx.load(onnx_path))({"onnx::MatMul_0": Tensor.zeros(N, N)})
|
||
|
for x in out.values(): x.realize()
|
||
|
"""
|
||
|
|
||
|
from openvino.runtime import Core
|
||
|
core = Core()
|
||
|
devices = core.available_devices
|
||
|
for device in devices:
|
||
|
device_name = core.get_property(device, "FULL_DEVICE_NAME")
|
||
|
print(f"{device}: {device_name}")
|
||
|
model = core.read_model(onnx_path)
|
||
|
compiled_model = core.compile_model(model, device_name='GPU.0')
|
||
|
print(compiled_model)
|
||
|
ireq = compiled_model.create_infer_request()
|
||
|
for model_input in compiled_model.inputs:
|
||
|
tensor = ireq.get_tensor(model_input)
|
||
|
tensor.data[:] = 2
|
||
|
print(tensor)
|
||
|
print("request")
|
||
|
ireq.infer()
|
||
|
ireq.infer()
|
||
|
print("did one")
|
||
|
|
||
|
REPS = 20
|
||
|
st = time.perf_counter()
|
||
|
for i in range(REPS): ireq.infer()
|
||
|
et = time.perf_counter() - st
|
||
|
print(f"{et*1000:.2f} ms {(CNT*N*N*N*REPS*2/et)*1e-9:.2f} GFLOPS")
|
||
|
|