Add openpilot tests

This commit is contained in:
FrogAi
2024-03-06 14:58:47 -07:00
parent 2901597132
commit b39097a12d
259 changed files with 31176 additions and 12 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,57 @@
#!/usr/bin/env python
import unittest
import numpy as np
from tinygrad.tensor import Tensor
from tinygrad.ops import Device
import torch
def get_question_samp(bsz, seq_len, vocab_size, seed):
np.random.seed(seed)
in_ids= np.random.randint(vocab_size, size=(bsz, seq_len))
mask = np.random.choice([True, False], size=(bsz, seq_len))
seg_ids = np.random.randint(1, size=(bsz, seq_len))
return in_ids, mask, seg_ids
def set_equal_weights(mdl, torch_mdl):
from tinygrad.nn.state import get_state_dict
state, torch_state = get_state_dict(mdl), torch_mdl.state_dict()
assert len(state) == len(torch_state)
for k, v in state.items():
assert k in torch_state
torch_state[k].copy_(torch.from_numpy(v.numpy()))
torch_mdl.eval()
class TestBert(unittest.TestCase):
def test_questions(self):
from models.bert import BertForQuestionAnswering
from transformers import BertForQuestionAnswering as TorchBertForQuestionAnswering
from transformers import BertConfig
# small
config = {
'vocab_size':24, 'hidden_size':2, 'num_hidden_layers':2, 'num_attention_heads':2,
'intermediate_size':32, 'hidden_dropout_prob':0.1, 'attention_probs_dropout_prob':0.1,
'max_position_embeddings':512, 'type_vocab_size':2
}
# Create in tinygrad
Tensor.manual_seed(1337)
mdl = BertForQuestionAnswering(**config)
# Create in torch
with torch.no_grad():
torch_mdl = TorchBertForQuestionAnswering(BertConfig(**config))
set_equal_weights(mdl, torch_mdl)
seeds = (1337, 3141)
bsz, seq_len = 1, 16
for _, seed in enumerate(seeds):
in_ids, mask, seg_ids = get_question_samp(bsz, seq_len, config['vocab_size'], seed)
out = mdl(Tensor(in_ids), Tensor(mask), Tensor(seg_ids))
torch_out = torch_mdl.forward(torch.from_numpy(in_ids).long(), torch.from_numpy(mask), torch.from_numpy(seg_ids).long())[:2]
torch_out = torch.cat(torch_out).unsqueeze(2)
np.testing.assert_allclose(out.numpy(), torch_out.detach().numpy(), atol=5e-4, rtol=5e-4)
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,115 @@
import ast
import pathlib
import sys
import unittest
import numpy as np
from PIL import Image
from tinygrad.helpers import getenv
from tinygrad.tensor import Tensor
from models.efficientnet import EfficientNet
from models.vit import ViT
from models.resnet import ResNet50
def _load_labels():
labels_filename = pathlib.Path(__file__).parent / 'efficientnet/imagenet1000_clsidx_to_labels.txt'
return ast.literal_eval(labels_filename.read_text())
_LABELS = _load_labels()
def preprocess(img, new=False):
# preprocess image
aspect_ratio = img.size[0] / img.size[1]
img = img.resize((int(224*max(aspect_ratio,1.0)), int(224*max(1.0/aspect_ratio,1.0))))
img = np.array(img)
y0, x0 =(np.asarray(img.shape)[:2] - 224) // 2
img = img[y0: y0 + 224, x0: x0 + 224]
# low level preprocess
if new:
img = img.astype(np.float32)
img -= [127.0, 127.0, 127.0]
img /= [128.0, 128.0, 128.0]
img = img[None]
else:
img = np.moveaxis(img, [2, 0, 1], [0, 1, 2])
img = img.astype(np.float32)[:3].reshape(1, 3, 224, 224)
img /= 255.0
img -= np.array([0.485, 0.456, 0.406]).reshape((1, -1, 1, 1))
img /= np.array([0.229, 0.224, 0.225]).reshape((1, -1, 1, 1))
return img
def _infer(model: EfficientNet, img, bs=1):
Tensor.training = False
img = preprocess(img)
# run the net
if bs > 1: img = img.repeat(bs, axis=0)
out = model.forward(Tensor(img)).cpu()
return _LABELS[np.argmax(out.numpy()[0])]
chicken_img = Image.open(pathlib.Path(__file__).parent / 'efficientnet/Chicken.jpg')
car_img = Image.open(pathlib.Path(__file__).parent / 'efficientnet/car.jpg')
class TestEfficientNet(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = EfficientNet(number=getenv("NUM"))
cls.model.load_from_pretrained()
@classmethod
def tearDownClass(cls):
del cls.model
def test_chicken(self):
label = _infer(self.model, chicken_img)
self.assertEqual(label, "hen")
def test_chicken_bigbatch(self):
label = _infer(self.model, chicken_img, 2)
self.assertEqual(label, "hen")
def test_car(self):
label = _infer(self.model, car_img)
self.assertEqual(label, "sports car, sport car")
class TestViT(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = ViT()
cls.model.load_from_pretrained()
@classmethod
def tearDownClass(cls):
del cls.model
def test_chicken(self):
label = _infer(self.model, chicken_img)
self.assertEqual(label, "cock")
def test_car(self):
label = _infer(self.model, car_img)
self.assertEqual(label, "racer, race car, racing car")
class TestResNet(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = ResNet50()
cls.model.load_from_pretrained()
@classmethod
def tearDownClass(cls):
del cls.model
def test_chicken(self):
label = _infer(self.model, chicken_img)
self.assertEqual(label, "hen")
def test_car(self):
label = _infer(self.model, car_img)
self.assertEqual(label, "sports car, sport car")
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,165 @@
import torch
from torch import nn
import unittest
import numpy as np
from tinygrad.nn.state import get_parameters, get_state_dict
from tinygrad.nn import optim, Linear, Conv2d, BatchNorm2d
from tinygrad.tensor import Tensor
from extra.datasets import fetch_mnist
from tinygrad.helpers import CI
def compare_tiny_torch(model, model_torch, X, Y):
with Tensor.train():
model_torch.train()
model_state_dict = get_state_dict(model)
for k,v in model_torch.named_parameters():
if not CI: print(f"initting {k} from torch")
model_state_dict[k].assign(Tensor(v.detach().numpy())).realize()
optimizer = optim.SGD(get_parameters(model), lr=0.001)
optimizer_torch = torch.optim.SGD(model_torch.parameters(), lr=0.001)
Xt = torch.Tensor(X.numpy())
np.testing.assert_allclose(X.numpy(), Xt.detach().numpy())
out = model(X)
loss = (out * Y).mean()
if not CI: print(loss.realize().numpy())
out_torch = model_torch(torch.Tensor(X.numpy()))
loss_torch = (out_torch * torch.Tensor(Y.numpy())).mean()
if not CI: print(loss_torch.detach().numpy())
# assert losses match
np.testing.assert_allclose(loss.realize().numpy(), loss_torch.detach().numpy(), atol=1e-4)
# zero and backward
optimizer.zero_grad()
loss.backward()
optimizer_torch.zero_grad()
loss_torch.backward()
for k,v in list(model_torch.named_parameters())[::-1]:
g = model_state_dict[k].grad.numpy()
gt = v.grad.detach().numpy()
if not CI: print("testing grads", k)
np.testing.assert_allclose(g, gt, atol=1e-3, err_msg=f'grad mismatch {k}')
# take the steps
optimizer.step()
optimizer_torch.step()
# assert weights match (they don't!)
for k,v in model_torch.named_parameters():
if not CI: print("testing weight", k)
np.testing.assert_allclose(model_state_dict[k].numpy(), v.detach().numpy(), atol=1e-3, err_msg=f'weight mismatch {k}')
def get_mnist_data():
X_train, Y_train, X_test, Y_test = fetch_mnist()
BS = 32
num_classes = 10
X = Tensor(X_test[0:BS].astype(np.float32))
Y = np.zeros((BS, num_classes), np.float32)
Y[range(BS),Y_test[0:BS]] = -1.0*num_classes
return X, Tensor(Y)
class TestEnd2End(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.X, cls.Y = get_mnist_data()
def setUp(self):
torch.manual_seed(123)
def test_linear_mnist(self):
class LinTiny:
def __init__(self, has_batchnorm=False):
self.l1 = Linear(784, 128)
self.l2 = Linear(128, 10)
self.bn1 = BatchNorm2d(128) if has_batchnorm else lambda x: x
def __call__(self, x):
return self.l2(self.l1(x)).relu().log_softmax(-1)
class LinTorch(nn.Module):
def __init__(self, has_batchnorm=False):
super().__init__()
self.l1 = nn.Linear(784, 128)
self.l2 = nn.Linear(128, 10)
def forward(self, x):
return self.l2(self.l1(x)).relu().log_softmax(-1)
compare_tiny_torch(LinTiny(), LinTorch(), self.X, self.Y)
def test_bn_mnist(self):
class LinTiny:
def __init__(self):
self.l1 = Linear(784, 128)
self.l2 = Linear(128, 10)
self.bn1 = BatchNorm2d(128)
def __call__(self, x):
return self.l2(self.bn1(self.l1(x).reshape(x.shape[0], -1, 1, 1)).reshape(x.shape[0], -1).relu()).log_softmax(-1)
class LinTorch(nn.Module):
def __init__(self):
super().__init__()
self.l1 = nn.Linear(784, 128)
self.l2 = nn.Linear(128, 10)
self.bn1 = nn.BatchNorm2d(128)
def forward(self, x):
return self.l2(self.bn1(self.l1(x).reshape(x.shape[0], -1, 1, 1)).reshape(x.shape[0], -1).relu()).log_softmax(-1)
compare_tiny_torch(LinTiny(), LinTorch(), self.X, self.Y)
def test_bn_alone(self):
np.random.seed(1337)
X = Tensor(np.random.randn(32, 10, 1, 1).astype(np.float32))
Y = Tensor(np.random.randn(32, 10, 1, 1).astype(np.float32))
compare_tiny_torch(BatchNorm2d(10), nn.BatchNorm2d(10), X, Y)
def test_bn_linear(self):
BS, K = 2, 1
eps = 0
X = Tensor([1,0]).reshape(BS, K, 1, 1)
Y = Tensor([-1,0]).reshape(BS, K, 1, 1)
class LinTiny:
def __init__(self):
self.l1 = Conv2d(K, K, 1, bias=False)
self.bn1 = BatchNorm2d(K, affine=False, track_running_stats=False, eps=eps)
def __call__(self, x): return self.bn1(self.l1(x))
class LinTorch(nn.Module):
def __init__(self):
super().__init__()
self.l1 = nn.Conv2d(K, K, 1, bias=False)
self.bn1 = nn.BatchNorm2d(K, affine=False, track_running_stats=False, eps=eps)
def forward(self, x): return self.bn1(self.l1(x))
model_torch = LinTorch()
with torch.no_grad():
model_torch.l1.weight[:] = 1.
compare_tiny_torch(LinTiny(), model_torch, X, Y)
def test_conv_mnist(self):
class LinTiny:
def __init__(self, has_batchnorm=False):
self.c1 = Conv2d(1, 8, 3, stride=2)
self.c2 = Conv2d(8, 16, 3, stride=2)
self.l1 = Linear(16*6*6, 10)
if has_batchnorm:
self.bn1, self.bn2 = BatchNorm2d(8), BatchNorm2d(16)
else:
self.bn1, self.bn2 = lambda x: x, lambda x: x
def __call__(self, x):
return self.l1(self.bn2(self.c2(self.bn1(self.c1(x)).relu())).relu().reshape(x.shape[0], -1)).log_softmax(-1)
class LinTorch(nn.Module):
def __init__(self, has_batchnorm=False):
super().__init__()
self.c1 = nn.Conv2d(1, 8, 3, stride=2)
self.c2 = nn.Conv2d(8, 16, 3, stride=2)
self.l1 = nn.Linear(16*6*6, 10)
if has_batchnorm:
self.bn1, self.bn2 = nn.BatchNorm2d(8), nn.BatchNorm2d(16)
else:
self.bn1, self.bn2 = lambda x: x, lambda x: x
def forward(self, x):
return self.l1(self.bn2(self.c2(self.bn1(self.c1(x)).relu())).relu().reshape(x.shape[0], -1)).log_softmax(-1)
for has_batchnorm in [False, True]:
with self.subTest(has_batchnorm=has_batchnorm):
compare_tiny_torch(LinTiny(has_batchnorm), LinTorch(has_batchnorm), self.X.reshape((-1, 1, 28, 28)), self.Y)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,116 @@
#!/usr/bin/env python
import unittest
import numpy as np
from tinygrad.nn.state import get_parameters
from tinygrad.tensor import Tensor, Device
from tinygrad.nn import optim, BatchNorm2d
from extra.training import train, evaluate
from extra.datasets import fetch_mnist
import pytest
pytestmark = [pytest.mark.exclude_gpu, pytest.mark.exclude_clang]
# load the mnist dataset
X_train, Y_train, X_test, Y_test = fetch_mnist()
# create a model
class TinyBobNet:
def __init__(self):
self.l1 = Tensor.scaled_uniform(784, 128)
self.l2 = Tensor.scaled_uniform(128, 10)
def parameters(self):
return get_parameters(self)
def forward(self, x):
return x.dot(self.l1).relu().dot(self.l2).log_softmax()
# create a model with a conv layer
class TinyConvNet:
def __init__(self, has_batchnorm=False):
# https://keras.io/examples/vision/mnist_convnet/
conv = 3
#inter_chan, out_chan = 32, 64
inter_chan, out_chan = 8, 16 # for speed
self.c1 = Tensor.scaled_uniform(inter_chan,1,conv,conv)
self.c2 = Tensor.scaled_uniform(out_chan,inter_chan,conv,conv)
self.l1 = Tensor.scaled_uniform(out_chan*5*5, 10)
if has_batchnorm:
self.bn1 = BatchNorm2d(inter_chan)
self.bn2 = BatchNorm2d(out_chan)
else:
self.bn1, self.bn2 = lambda x: x, lambda x: x
def parameters(self):
return get_parameters(self)
def forward(self, x:Tensor):
x = x.reshape(shape=(-1, 1, 28, 28)) # hacks
x = self.bn1(x.conv2d(self.c1)).relu().max_pool2d()
x = self.bn2(x.conv2d(self.c2)).relu().max_pool2d()
x = x.reshape(shape=[x.shape[0], -1])
return x.dot(self.l1).log_softmax()
class TestMNIST(unittest.TestCase):
def test_sgd_onestep(self):
np.random.seed(1337)
model = TinyBobNet()
optimizer = optim.SGD(model.parameters(), lr=0.001)
train(model, X_train, Y_train, optimizer, BS=69, steps=1)
for p in model.parameters(): p.realize()
def test_sgd_threestep(self):
np.random.seed(1337)
model = TinyBobNet()
optimizer = optim.SGD(model.parameters(), lr=0.001)
train(model, X_train, Y_train, optimizer, BS=69, steps=3)
def test_sgd_sixstep(self):
np.random.seed(1337)
model = TinyBobNet()
optimizer = optim.SGD(model.parameters(), lr=0.001)
train(model, X_train, Y_train, optimizer, BS=69, steps=6, noloss=True)
def test_adam_onestep(self):
np.random.seed(1337)
model = TinyBobNet()
optimizer = optim.Adam(model.parameters(), lr=0.001)
train(model, X_train, Y_train, optimizer, BS=69, steps=1)
for p in model.parameters(): p.realize()
def test_adam_threestep(self):
np.random.seed(1337)
model = TinyBobNet()
optimizer = optim.Adam(model.parameters(), lr=0.001)
train(model, X_train, Y_train, optimizer, BS=69, steps=3)
def test_conv_onestep(self):
np.random.seed(1337)
model = TinyConvNet()
optimizer = optim.SGD(model.parameters(), lr=0.001)
train(model, X_train, Y_train, optimizer, BS=69, steps=1, noloss=True)
for p in model.parameters(): p.realize()
def test_conv(self):
np.random.seed(1337)
model = TinyConvNet()
optimizer = optim.Adam(model.parameters(), lr=0.001)
train(model, X_train, Y_train, optimizer, steps=100)
assert evaluate(model, X_test, Y_test) > 0.93 # torch gets 0.9415 sometimes
def test_conv_with_bn(self):
np.random.seed(1337)
model = TinyConvNet(has_batchnorm=True)
optimizer = optim.AdamW(model.parameters(), lr=0.003)
train(model, X_train, Y_train, optimizer, steps=200)
assert evaluate(model, X_test, Y_test) > 0.94
def test_sgd(self):
np.random.seed(1337)
model = TinyBobNet()
optimizer = optim.SGD(model.parameters(), lr=0.001)
train(model, X_train, Y_train, optimizer, steps=600)
assert evaluate(model, X_test, Y_test) > 0.94 # CPU gets 0.9494 sometimes
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,143 @@
#!/usr/bin/env python
import os
import time
import io
import unittest
import numpy as np
import onnx
from extra.utils import fetch, temp
from extra.onnx import get_run_onnx
from tinygrad.tensor import Tensor
from tinygrad.helpers import CI
import pytest
pytestmark = [pytest.mark.exclude_gpu, pytest.mark.exclude_clang]
def run_onnx_torch(onnx_model, inputs):
import torch
from onnx2torch import convert
torch_model = convert(onnx_model).float()
with torch.no_grad():
torch_out = torch_model(*[torch.tensor(x) for x in inputs.values()])
return torch_out
OPENPILOT_MODEL = "https://github.com/commaai/openpilot/raw/v0.9.4/selfdrive/modeld/models/supercombo.onnx"
np.random.seed(1337)
class TestOnnxModel(unittest.TestCase):
def test_benchmark_openpilot_model(self):
dat = fetch(OPENPILOT_MODEL)
onnx_model = onnx.load(io.BytesIO(dat))
run_onnx = get_run_onnx(onnx_model)
def get_inputs():
np_inputs = {
"input_imgs": np.random.randn(*(1, 12, 128, 256)),
"big_input_imgs": np.random.randn(*(1, 12, 128, 256)),
"desire": np.zeros((1, 100, 8)),
"traffic_convention": np.array([[1., 0.]]),
"nav_features": np.zeros((1, 256)),
"features_buffer": np.zeros((1, 99, 128)),
}
inputs = {k:Tensor(v.astype(np.float32), requires_grad=False) for k,v in np_inputs.items()}
return inputs
for _ in range(7):
inputs = get_inputs()
st = time.monotonic()
tinygrad_out = run_onnx(inputs)['outputs']
mt = time.monotonic()
tinygrad_out.realize()
mt2 = time.monotonic()
tinygrad_out = tinygrad_out.numpy()
et = time.monotonic()
if not CI: print(f"ran openpilot model in {(et-st)*1000.0:.2f} ms, waited {(mt2-mt)*1000.0:.2f} ms for realize, {(et-mt2)*1000.0:.2f} ms for GPU queue")
if not CI:
import cProfile
import pstats
inputs = get_inputs()
pr = cProfile.Profile(timer=time.perf_counter_ns, timeunit=1e-6)
pr.enable()
tinygrad_out = run_onnx(inputs)['outputs']
tinygrad_out.realize()
tinygrad_out = tinygrad_out.numpy()
if not CI:
pr.disable()
stats = pstats.Stats(pr)
stats.dump_stats(temp("net.prof"))
os.system(f"flameprof {temp('net.prof')} > {temp('prof.svg')}")
ps = stats.sort_stats(pstats.SortKey.TIME)
ps.print_stats(30)
def test_openpilot_model(self):
dat = fetch(OPENPILOT_MODEL)
onnx_model = onnx.load(io.BytesIO(dat))
run_onnx = get_run_onnx(onnx_model)
print("got run_onnx")
inputs = {
"input_imgs": np.random.randn(*(1, 12, 128, 256)),
"big_input_imgs": np.random.randn(*(1, 12, 128, 256)),
"desire": np.zeros((1, 100, 8)),
"traffic_convention": np.array([[1., 0.]]),
"nav_features": np.zeros((1, 256)),
"features_buffer": np.zeros((1, 99, 128)),
}
inputs = {k:v.astype(np.float32) for k,v in inputs.items()}
st = time.monotonic()
print("****** run onnx ******")
tinygrad_out = run_onnx(inputs)['outputs']
mt = time.monotonic()
print("****** realize ******")
tinygrad_out.realize()
mt2 = time.monotonic()
tinygrad_out = tinygrad_out.numpy()
et = time.monotonic()
print(f"ran openpilot model in {(et-st)*1000.0:.2f} ms, waited {(mt2-mt)*1000.0:.2f} ms for realize, {(et-mt2)*1000.0:.2f} ms for GPU queue")
Tensor.no_grad = True
torch_out = run_onnx_torch(onnx_model, inputs).numpy()
Tensor.no_grad = False
print(tinygrad_out, torch_out)
np.testing.assert_allclose(torch_out, tinygrad_out, atol=1e-4, rtol=1e-2)
def test_efficientnet(self):
dat = fetch("https://github.com/onnx/models/raw/main/vision/classification/efficientnet-lite4/model/efficientnet-lite4-11.onnx")
input_name, input_new = "images:0", True
self._test_model(dat, input_name, input_new)
def test_shufflenet(self):
dat = fetch("https://github.com/onnx/models/raw/main/vision/classification/shufflenet/model/shufflenet-9.onnx")
print(f"shufflenet downloaded : {len(dat)/1e6:.2f} MB")
input_name, input_new = "gpu_0/data_0", False
self._test_model(dat, input_name, input_new)
@unittest.skip("test is very slow")
def test_resnet(self):
# NOTE: many onnx models can't be run right now due to max pool with strides != kernel_size
dat = fetch("https://github.com/onnx/models/raw/main/vision/classification/resnet/model/resnet18-v2-7.onnx")
print(f"resnet downloaded : {len(dat)/1e6:.2f} MB")
input_name, input_new = "data", False
self._test_model(dat, input_name, input_new)
def _test_model(self, dat, input_name, input_new, debug=False):
onnx_model = onnx.load(io.BytesIO(dat))
print("onnx loaded")
from test.models.test_efficientnet import chicken_img, car_img, preprocess, _LABELS
run_onnx = get_run_onnx(onnx_model)
def run(img):
inputs = {input_name: preprocess(img, new=input_new)}
tinygrad_out = list(run_onnx(inputs, debug=debug).values())[0].numpy()
return tinygrad_out.argmax()
cls = run(chicken_img)
print(cls, _LABELS[cls])
assert _LABELS[cls] == "hen" or _LABELS[cls] == "cock"
cls = run(car_img)
print(cls, _LABELS[cls])
assert "car" in _LABELS[cls] or _LABELS[cls] == "convertible"
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,100 @@
import unittest, time
import numpy as np
from tinygrad.tensor import Tensor
from tinygrad.nn import optim
from tinygrad.nn.state import get_parameters
from tinygrad.jit import TinyJit, JIT_SUPPORTED_DEVICE
from tinygrad.ops import Device, GlobalCounters
from tinygrad.helpers import CI, dtypes, getenv, prod
from test.helpers import derandomize_model
from examples.gpt2 import Transformer as GPT2Transformer, MODEL_PARAMS as GPT2_MODEL_PARAMS
from examples.hlb_cifar10 import SpeedyResNet
from examples.llama import Transformer as LLaMaTransformer, MODEL_PARAMS as LLAMA_MODEL_PARAMS
from examples.stable_diffusion import UNetModel
def helper_test(nm, gen, train, max_memory_allowed, max_kernels_allowed, all_jitted=False):
tms = []
for _ in range(4):
GlobalCounters.reset()
GlobalCounters.mem_used = 0
Device[Device.DEFAULT].synchronize()
st = time.perf_counter_ns()
train(*gen())
Device[Device.DEFAULT].synchronize()
tms.append(time.perf_counter_ns() - st)
kernels_used = len(train.jit_cache) if hasattr(train, "jit_cache") else None
print(f"{nm}: used {GlobalCounters.mem_used/1e9:.2f} GB and {kernels_used} kernels in {min(tms)/1e6:.2f} ms")
assert GlobalCounters.mem_used/1e9 < max_memory_allowed, f"{nm} used more than {max_memory_allowed:.2f} GB"
assert not kernels_used or kernels_used <= max_kernels_allowed, f"{nm} used more than {max_kernels_allowed} kernels"
if all_jitted:
assert kernels_used > 0 and kernels_used == GlobalCounters.kernel_count, f"only {kernels_used} out of {GlobalCounters.kernel_count} were jitted"
class TestRealWorld(unittest.TestCase):
def setUp(self):
self.old_type = Tensor.default_type
np.random.seed(2002)
def tearDown(self):
Tensor.default_type = self.old_type
@unittest.skipUnless(not CI, "too big for CI")
def test_stable_diffusion(self):
model = UNetModel()
derandomize_model(model)
@TinyJit
def test(t, t2): return model(t, 801, t2).realize()
helper_test("test_sd", lambda: (Tensor.randn(1, 4, 64, 64),Tensor.randn(1, 77, 768)), test, 18.0, 967)
@unittest.skipUnless(Device.DEFAULT in JIT_SUPPORTED_DEVICE and Device.DEFAULT not in ["LLVM"], "needs JIT, too long on CI LLVM")
def test_llama(self):
Tensor.default_type = dtypes.float16
args_tiny = {"dim": 1024, "multiple_of": 256, "n_heads": 8, "n_layers": 8, "norm_eps": 1e-05, "vocab_size": 1000}
model = LLaMaTransformer(**(args_tiny if CI else LLAMA_MODEL_PARAMS["1"]["7B"]["args"]))
derandomize_model(model)
@TinyJit
def test(t): return model(t, 0).realize()
# NOTE: only test one pass, not testing the dynamic shape autoregressive part
helper_test("test_llama", lambda: (Tensor([[1,]]),), test, 0.22 if CI else 13.5, 126 if CI else 486, all_jitted=True)
@unittest.skipUnless(Device.DEFAULT in JIT_SUPPORTED_DEVICE and (Device.DEFAULT not in ["LLVM"] or not CI), "needs JIT, too long on CI LLVM")
def test_gpt2(self):
Tensor.default_type = dtypes.float16
args_tiny = {"dim": 1024, "n_heads": 8, "n_layers": 8, "norm_eps": 1e-5, "vocab_size": 1000}
model = GPT2Transformer(**(args_tiny if CI else GPT2_MODEL_PARAMS["gpt2-medium"]))
derandomize_model(model)
@TinyJit
def test(t): return model(t, 0).realize()
helper_test("test_gpt2", lambda: (Tensor([[1,]]),), test, 0.21 if CI else 0.9, 129 if CI else 369, all_jitted=True)
@unittest.skipUnless(Device.DEFAULT in JIT_SUPPORTED_DEVICE and (Device.DEFAULT not in ["LLVM", "CLANG"] or not CI), "needs JIT, too long on CI LLVM and CLANG")
def test_train_cifar(self):
# TODO: with default device
#old_default = Device.DEFAULT
#Device.DEFAULT = "FAKE"
#Device['fake'].codegen = Device[old_default].codegen
with Tensor.train():
model = SpeedyResNet(Tensor.ones((12,3,2,2)))
optimizer = optim.SGD(get_parameters(model), lr=0.01, momentum=0.8, nesterov=True, weight_decay=0.15)
BS = 32 if CI else 512
@TinyJit
def train(X):
out = model(X)
loss = out.mean()
optimizer.zero_grad()
loss.backward()
optimizer.step()
helper_test("train_cifar", lambda: (Tensor.randn(BS, 3, 32, 32),), train, (1.0/48)*BS, 154) # it's 154 on metal
# reset device
#Device.DEFAULT = old_default
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python
import unittest
import numpy as np
from tinygrad.tensor import Tensor
from models.rnnt import LSTM
import torch
class TestRNNT(unittest.TestCase):
def test_lstm(self):
BS, SQ, IS, HS, L = 2, 20, 40, 128, 2
# create in torch
with torch.no_grad():
torch_layer = torch.nn.LSTM(IS, HS, L)
# create in tinygrad
layer = LSTM(IS, HS, L, 0.0)
# copy weights
with torch.no_grad():
layer.cells[0].weights_ih.assign(Tensor(torch_layer.weight_ih_l0.numpy()))
layer.cells[0].weights_hh.assign(Tensor(torch_layer.weight_hh_l0.numpy()))
layer.cells[0].bias_ih.assign(Tensor(torch_layer.bias_ih_l0.numpy()))
layer.cells[0].bias_hh.assign(Tensor(torch_layer.bias_hh_l0.numpy()))
layer.cells[1].weights_ih.assign(Tensor(torch_layer.weight_ih_l1.numpy()))
layer.cells[1].weights_hh.assign(Tensor(torch_layer.weight_hh_l1.numpy()))
layer.cells[1].bias_ih.assign(Tensor(torch_layer.bias_ih_l1.numpy()))
layer.cells[1].bias_hh.assign(Tensor(torch_layer.bias_hh_l1.numpy()))
# test initial hidden
for _ in range(3):
x = Tensor.randn(SQ, BS, IS)
z, hc = layer(x, None)
torch_x = torch.tensor(x.numpy())
torch_z, torch_hc = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3)
# test passing hidden
for _ in range(3):
x = Tensor.randn(SQ, BS, IS)
z, hc = layer(x, hc)
torch_x = torch.tensor(x.numpy())
torch_z, torch_hc = torch_layer(torch_x, torch_hc)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3)
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,83 @@
import unittest
import time
import numpy as np
from tinygrad.nn.state import get_parameters
from tinygrad.nn import optim
from tinygrad.tensor import Device
from tinygrad.helpers import getenv
from extra.training import train
from models.convnext import ConvNeXt
from models.efficientnet import EfficientNet
from models.transformer import Transformer
from models.vit import ViT
from models.resnet import ResNet18
import pytest
pytestmark = [pytest.mark.exclude_gpu, pytest.mark.exclude_clang]
BS = getenv("BS", 2)
def train_one_step(model,X,Y):
params = get_parameters(model)
pcount = 0
for p in params:
pcount += np.prod(p.shape)
optimizer = optim.SGD(params, lr=0.001)
print("stepping %r with %.1fM params bs %d" % (type(model), pcount/1e6, BS))
st = time.time()
train(model, X, Y, optimizer, steps=1, BS=BS)
et = time.time()-st
print("done in %.2f ms" % (et*1000.))
def check_gc():
if Device.DEFAULT == "GPU":
from extra.introspection import print_objects
assert print_objects() == 0
class TestTrain(unittest.TestCase):
def test_convnext(self):
model = ConvNeXt(depths=[1], dims=[16])
X = np.zeros((BS,3,224,224), dtype=np.float32)
Y = np.zeros((BS), dtype=np.int32)
train_one_step(model,X,Y)
check_gc()
def test_efficientnet(self):
model = EfficientNet(0)
X = np.zeros((BS,3,224,224), dtype=np.float32)
Y = np.zeros((BS), dtype=np.int32)
train_one_step(model,X,Y)
check_gc()
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "too many buffers for webgpu")
def test_vit(self):
model = ViT()
X = np.zeros((BS,3,224,224), dtype=np.float32)
Y = np.zeros((BS,), dtype=np.int32)
train_one_step(model,X,Y)
check_gc()
def test_transformer(self):
# this should be small GPT-2, but the param count is wrong
# (real ff_dim is 768*4)
model = Transformer(syms=10, maxlen=6, layers=12, embed_dim=768, num_heads=12, ff_dim=768//4)
X = np.zeros((BS,6), dtype=np.float32)
Y = np.zeros((BS,6), dtype=np.int32)
train_one_step(model,X,Y)
check_gc()
def test_resnet(self):
X = np.zeros((BS, 3, 224, 224), dtype=np.float32)
Y = np.zeros((BS), dtype=np.int32)
for resnet_v in [ResNet18]:
model = resnet_v()
model.load_from_pretrained()
train_one_step(model, X, Y)
check_gc()
def test_bert(self):
# TODO: write this
pass
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,25 @@
#!/usr/bin/env python
import pathlib
import unittest
import numpy as np
from tinygrad.tensor import Tensor
from tinygrad.ops import Device
class TestVGG7(unittest.TestCase):
def test_vgg7(self):
from examples.vgg7_helpers.waifu2x import Vgg7, image_load
# Create in tinygrad
Tensor.manual_seed(1337)
mdl = Vgg7()
mdl.load_from_pretrained()
# Scale up an image
test_x = image_load(pathlib.Path(__file__).parent / 'waifu2x/input.png')
test_y = image_load(pathlib.Path(__file__).parent / 'waifu2x/output.png')
scaled = mdl.forward_tiled(test_x, 156)
scaled = np.fmax(0, np.fmin(1, scaled))
np.testing.assert_allclose(scaled, test_y, atol=5e-3, rtol=5e-3)
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,25 @@
import unittest
import pathlib
from tinygrad.ops import Device
from examples.whisper import init_whisper, transcribe_file
@unittest.skipUnless(Device.DEFAULT == "METAL", "Some non-metal backends spend too long trying to allocate a 20GB array")
class TestWhisper(unittest.TestCase):
@classmethod
def setUpClass(cls):
model, enc = init_whisper("tiny.en")
cls.model = model
cls.enc = enc
@classmethod
def tearDownClass(cls):
del cls.model
del cls.enc
def test_transcribe_file(self):
# Audio generated with the command on MacOS:
# say "Could you please let me out of the box?" --file-format=WAVE --data-format=LEUI8@16000 -o test
# We use the WAVE type because it's easier to decode in CI test environments
filename = str(pathlib.Path(__file__).parent / "whisper/test.wav")
transcription = transcribe_file(self.model, self.enc, filename)
self.assertEqual("<|startoftranscript|><|notimestamps|> Could you please let me out of the box?<|endoftext|>", transcription)

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.