openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

93 lines
3.4 KiB

import unittest, numpy as np
from tinygrad import Tensor, Device, TinyJit
from tinygrad.helpers import Timing, CI, OSX
import multiprocessing.shared_memory as shared_memory
N = 256 if CI else 4096
class TestCopySpeed(unittest.TestCase):
@classmethod
def setUpClass(cls): Device[Device.DEFAULT].synchronize()
def testCopySHMtoDefault(self):
s = shared_memory.SharedMemory(name="test_X", create=True, size=N*N*4)
s.close()
if CI and not OSX:
t = Tensor.empty(N, N, device="disk:/dev/shm/test_X").realize()
else:
t = Tensor.empty(N, N, device="disk:shm:test_X").realize()
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
with Timing("queue: "):
t.to(Device.DEFAULT).realize()
Device[Device.DEFAULT].synchronize()
s.unlink()
def testCopyCPUtoDefault(self):
t = Tensor.ones(N, N, device="CPU").contiguous().realize()
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
with Timing("queue: "):
t.to(Device.DEFAULT).realize()
Device[Device.DEFAULT].synchronize()
def testCopyCPUtoDefaultFresh(self):
print("fresh copy")
for _ in range(3):
t = Tensor.ones(N, N, device="CPU").contiguous().realize()
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"): # noqa: F821
with Timing("queue: "):
t.to(Device.DEFAULT).realize()
Device[Device.DEFAULT].synchronize()
del t
def testCopyDefaulttoCPU(self):
t = Tensor.ones(N, N).contiguous().realize()
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
t.to('CPU').realize()
def testCopyDefaulttoCPUJit(self):
if Device.DEFAULT == "CPU": return unittest.skip("CPU to CPU copy is a no-op")
@TinyJit
def _do_copy(t): return t.to('CPU').realize()
t = Tensor.randn(N, N, 4).contiguous().realize()
for _ in range(5):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
x = _do_copy(t)
Device[Device.DEFAULT].synchronize()
np.testing.assert_equal(t.numpy(), x.numpy())
def testCopytoCPUtoDefaultJit(self):
if Device.DEFAULT == "CPU": return unittest.skip("CPU to CPU copy is a no-op")
@TinyJit
def _do_copy(x): return t.to(Device.DEFAULT).realize()
for _ in range(5):
t = Tensor.randn(N, N, 4, device="CPU").contiguous().realize()
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
x = _do_copy(t)
Device[Device.DEFAULT].synchronize()
np.testing.assert_equal(t.numpy(), x.numpy())
@unittest.skipIf(CI, "CI doesn't have 6 GPUs")
@unittest.skipIf(Device.DEFAULT != "GPU", "only test this on GPU")
def testCopyCPUto6GPUs(self):
from tinygrad.runtime.ops_gpu import CLDevice
if len(CLDevice.device_ids) != 6: raise unittest.SkipTest("computer doesn't have 6 GPUs")
t = Tensor.ones(N, N, device="CPU").contiguous().realize()
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s ({t.nbytes()*6/ns:.2f} GB/s total)"):
with Timing("queue: "):
for g in range(6):
t.to(f"gpu:{g}").realize()
Device["gpu"].synchronize()
if __name__ == '__main__':
unittest.main()