mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
two stage cumsum in tensor.py (#2331)
* two stage cumsum in tensor.py * 2 more kernels for llama cumsum * gpt-2 and llama use fast multinomial
This commit is contained in:
2
test/external/external_test_opt.py
vendored
2
test/external/external_test_opt.py
vendored
@@ -89,7 +89,7 @@ class TestInferenceMinKernels(unittest.TestCase):
|
||||
args_tiny = {"dim": 512, "multiple_of": 256, "n_heads": 8, "n_layers": 4, "norm_eps": 1e-05, "vocab_size": 1000}
|
||||
model = Transformer(**args_tiny)
|
||||
for p in get_parameters(model): p.assign(np.zeros(p.shape, dtype=p.dtype.np))
|
||||
with CLCache(98):
|
||||
with CLCache(100):
|
||||
model(Tensor([[1,2,3,4]]), 0).realize()
|
||||
|
||||
@unittest.skipUnless(Device.DEFAULT == "GPU", "Not Implemented")
|
||||
|
||||
Reference in New Issue
Block a user