openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

94 lines
3.6 KiB

import unittest, numpy as np
from tinygrad import Tensor, Device, TinyJit
from tinygrad.helpers import Timing, CI, OSX, getenv
import multiprocessing.shared_memory as shared_memory
N = getenv("NSZ", 256)
class TestCopySpeed(unittest.TestCase):
@classmethod
def setUpClass(cls): Device[Device.DEFAULT].synchronize()
def testCopySHMtoDefault(self):
s = shared_memory.SharedMemory(name="test_X", create=True, size=N*N*4)
s.close()
if CI and not OSX:
t = Tensor.empty(N, N, device="disk:/dev/shm/test_X").realize()
else:
t = Tensor.empty(N, N, device="disk:shm:test_X").realize()
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
with Timing("queue: "):
t.to(Device.DEFAULT).realize()
Device[Device.DEFAULT].synchronize()
s.unlink()
def testCopyCPUtoDefault(self):
t = Tensor.ones(N, N, device="CPU").contiguous().realize()
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
with Timing("queue: "):
t.to(Device.DEFAULT).realize()
Device[Device.DEFAULT].synchronize()
def testCopyCPUtoDefaultFresh(self):
print("fresh copy")
for _ in range(3):
t = Tensor.ones(N, N, device="CPU").contiguous().realize()
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"): # noqa: F821
with Timing("queue: "):
t.to(Device.DEFAULT).realize()
Device[Device.DEFAULT].synchronize()
del t
def testCopyDefaulttoCPU(self):
t = Tensor.ones(N, N).contiguous().realize()
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
t.to('CPU').realize()
def testCopyDefaulttoCPUJit(self):
if Device.DEFAULT == "CPU": return unittest.skip("CPU to CPU copy is a no-op")
@TinyJit
def _do_copy(t): return t.to('CPU').realize()
t = Tensor.randn(N, N).contiguous().realize()
Device[Device.DEFAULT].synchronize()
for _ in range(5):
with Timing(f"copy {Device.DEFAULT} -> CPU {t.nbytes()/(1024**2)}M: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
x = _do_copy(t)
Device[Device.DEFAULT].synchronize()
np.testing.assert_equal(t.numpy(), x.numpy())
def testCopyCPUtoDefaultJit(self):
if Device.DEFAULT == "CPU": return unittest.skip("CPU to CPU copy is a no-op")
@TinyJit
def _do_copy(x): return x.to(Device.DEFAULT).realize()
for _ in range(5):
t = Tensor.randn(N, N, device="CPU").contiguous().realize()
Device["CPU"].synchronize()
with Timing(f"copy CPU -> {Device.DEFAULT} {t.nbytes()/(1024**2)}M: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
x = _do_copy(t)
Device[Device.DEFAULT].synchronize()
np.testing.assert_equal(t.numpy(), x.numpy())
@unittest.skipIf(CI, "CI doesn't have 6 GPUs")
@unittest.skipIf(Device.DEFAULT != "CL", "only test this on CL")
def testCopyCPUto6GPUs(self):
from tinygrad.runtime.ops_cl import CLDevice
if len(CLDevice.device_ids) != 6: raise unittest.SkipTest("computer doesn't have 6 GPUs")
t = Tensor.ones(N, N, device="CPU").contiguous().realize()
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s ({t.nbytes()*6/ns:.2f} GB/s total)"):
with Timing("queue: "):
for g in range(6):
t.to(f"CL:{g}").realize()
Device["CL"].synchronize()
if __name__ == '__main__':
unittest.main()