bugfixes from multioutput + PCONTIG=3 for fa bw memory fix (#12837)

* bugfixes from multioutput

* PCONTIG=3 fixes fa memory usage

* that's base
This commit is contained in:
George Hotz
2025-10-21 19:21:02 +08:00
committed by GitHub
parent 0435d31f1c
commit 20a232f1c5
6 changed files with 25 additions and 12 deletions

View File

@@ -1,6 +1,6 @@
import unittest
from tinygrad import Tensor, nn, Device
from tinygrad.helpers import Context, GlobalCounters, CI, getenv
from tinygrad.helpers import Context, GlobalCounters, CI, getenv, PCONTIG
from tinygrad.uop.ops import graph_rewrite, PatternMatcher, UPat, Ops
from tinygrad.renderer.ptx import PTXRenderer
from tinygrad.renderer.nir import NIRRenderer
@@ -64,11 +64,11 @@ class TestPcontig(unittest.TestCase):
Tensor.realize(*ret)
return ret
with Context(PCONTIG=2, DEBUG=2):
with Context(PCONTIG=max(2, PCONTIG.value), DEBUG=2):
grads = fa_bw()
print(f"{GlobalCounters.global_ops/1e9:.2f} GFLOPS")
with Context(DEBUG=2):
with Context(PCONTIG=0, DEBUG=2):
cmp_grads = fa_bw()
print(f"{GlobalCounters.global_ops/1e9:.2f} GFLOPS")