mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
Match Torch speed for sum reduction on M1 (#1187)
* Add additional kernel when reducing multiple dimensions at once. * Faster for smaller inputs * Whitespace and naming * Cleaner, guard for Metal only, and max 1 split rather than N * Draft of different approach * One additional kernel call for this test (as expected)
This commit is contained in:
committed by
GitHub
parent
fde9f0e60d
commit
59af9b81c5
2
test/external/external_test_opt.py
vendored
2
test/external/external_test_opt.py
vendored
@@ -63,7 +63,7 @@ class TestInferenceMinKernels(unittest.TestCase):
|
||||
for p in get_parameters(model): p.assign(np.zeros(p.shape, dtype=p.dtype.np))
|
||||
img = Tensor.randn(1, 3, 224, 224)
|
||||
# TODO: this seems very high
|
||||
with CLCache(115):
|
||||
with CLCache(116):
|
||||
model.forward(img).realize()
|
||||
|
||||
def test_resnet(self):
|
||||
|
||||
Reference in New Issue
Block a user