Add openpilot tests

2024-03-06 14:58:47 -07:00
parent 2901597132
commit b39097a12d
259 changed files with 31176 additions and 12 deletions
--- a/tinygrad_repo/test/test_allocators.py
+++ b/tinygrad_repo/test/test_allocators.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+import unittest
+import numpy as np
+from weakref import ref
+from tinygrad.helpers import GlobalCounters
+from tinygrad.runtime.lib import RawBuffer, LRUAllocator
+from tinygrad.helpers import dtypes, prod
+from tinygrad.ops import Device
+from tinygrad.tensor import Tensor
+
+def check_gc():
+  if Device.DEFAULT == "GPU":
+    from extra.introspection import print_objects
+    assert print_objects() == 0
+
+class FakeDeviceBuffer:
+  def __init__(self, sz, dt, device):
+    self.id = 1
+    self.size = sz
+    self.dtype = dt
+    self.device = device
+  def __del__(self):
+    assert self.id == 0, "Should called _do_free() before"
+
+class FakeAllocator(LRUAllocator):
+  def _do_alloc(self, size, dtype, device, **kwargs): return FakeDeviceBuffer(size, dtype, device)
+  def _do_free(self, buf):
+    buf.id -= 1
+    assert buf.id == 0, f"Free should be called once, but {buf.id}"
+  def __del__(self): # Fake allocator should clear all buffers after each test.
+    for v in self.cached_buffers.values():
+      for buf, _ in v: self._free_buffer(buf)
+
+FAKE_GLOBAL_ALLOCATOR = None
+class FakeBuffer(RawBuffer):
+  def __init__(self, size, dtype, device='0'):
+    global FAKE_GLOBAL_ALLOCATOR
+    super().__init__(size, dtype, allocator=FAKE_GLOBAL_ALLOCATOR, **{'device': device})
+    assert self._buf.size == size and self._buf.dtype == dtype and self._buf.device == device, "This allocator requires 100% match of dtype and size."
+  @classmethod
+  def fromCPU(cls, x:np.ndarray, **kwargs): return cls(prod(x.shape), dtypes.from_np(x.dtype), **kwargs)
+  def toCPU(self): return np.empty(self.size, dtype=self.dtype.np)
+
+def alloc(allocator, size, dtype, **kwargs):
+  global FAKE_GLOBAL_ALLOCATOR
+  FAKE_GLOBAL_ALLOCATOR = allocator
+  buf = FakeBuffer(size, dtype, **kwargs)
+  assert buf.dtype == dtype and buf.size == size
+  FAKE_GLOBAL_ALLOCATOR = None
+  return buf
+
+def alloc_free_trace(allocator, size, dtype, **kwargs):
+  buf = alloc(allocator, size, dtype, **kwargs)
+  return ref(buf._buf)
+
+def cmp_trace_and_buf(buf, trace_ref): return trace_ref and trace_ref() == buf._buf
+
+class TestAllocators(unittest.TestCase):
+  def test_lru_allocator_reusage(self):
+    mc, mu = GlobalCounters.mem_cached, GlobalCounters.mem_used
+    def test():
+      lru_allocator = FakeAllocator(2048)
+      traced_buf = alloc_free_trace(lru_allocator, 16, dtypes.float32)
+      assert GlobalCounters.mem_cached - mc == 16*dtypes.float32.itemsize, "Buffer should be cached"
+      for _ in range(32):
+        def __test():
+          buf = alloc(lru_allocator, 16, dtypes.float32)
+          assert cmp_trace_and_buf(buf, traced_buf), "Buffer should be reused"
+        __test()
+
+      usedbuf = alloc(lru_allocator, 16, dtypes.float32)
+      for _ in range(32):
+        def __test():
+          buf = alloc(lru_allocator, 16, dtypes.float32)
+          assert usedbuf != buf, "Nobody should get used buffer"
+        __test()
+      assert GlobalCounters.mem_used - mu == 16*dtypes.float32.itemsize, "Only usedbuf is still allocated."
+    test()
+    check_gc()
+
+  def test_lru_allocator_cache_free(self):
+    mc, mu = GlobalCounters.mem_cached, GlobalCounters.mem_used
+    def test():
+      lru_allocator = FakeAllocator(128)
+      refs = []
+      for _ in range(32):
+        refs.append(alloc_free_trace(lru_allocator, 16, dtypes.float32))
+      for sz in range(1, 32):
+        alloc_free_trace(lru_allocator, sz, dtypes.float32)
+        assert GlobalCounters.mem_used + GlobalCounters.mem_cached - mc - mu <= 128, "Should not allocate on device more than allowed (128)"
+      for r in refs: assert r() is None, "All refs should be dead, since buffers were cleared from cache"
+    test()
+    check_gc()
+
+  def test_lru_allocator_multidevice(self):
+    def test():
+      lru_allocator = FakeAllocator(256)
+      refs=[]
+      for i in range(8):
+        refs.append(alloc_free_trace(lru_allocator, 16, dtypes.float32, device=str(i)))
+      for i in range(64):
+        def __test():
+          dev = str(i % 8)
+          buf = alloc(lru_allocator, 16, dtypes.float32, device=dev)
+          assert cmp_trace_and_buf(buf, refs[i%8]), "Buffer should be reused"
+        __test()
+      for r in refs: assert r() is not None, "All refs should be cached"
+    test()
+    check_gc()
+
+  @unittest.skip("failing in CI")
+  def test_gpu_copyout(self):
+    def test():
+      from tinygrad.runtime.ops_gpu import CL
+
+      # Allocation to init the allocator.
+      tx = Tensor.rand(1)
+      tx.realize()
+      free_space = CL.cl_allocator.free_space[tx.lazydata.realized._device]
+
+      # Spawning 128mb objects to fill half of free_space
+      will_allocate = free_space // 3
+      trash_allocation_size = free_space // 2
+
+      def sp():
+        trash_buffer = Tensor.rand(trash_allocation_size // 4)
+        trash_buffer.realize()
+      sp()
+
+      xx = Tensor.rand(will_allocate // 4)
+      _ = xx.numpy()
+    test()
+    check_gc()
+
+if __name__ == "__main__":
+  unittest.main()