diff --git a/extra/viz/cli.py b/extra/viz/cli.py index 06885c5412..846717be3b 100755 --- a/extra/viz/cli.py +++ b/extra/viz/cli.py @@ -10,7 +10,11 @@ def optional_eq(val:dict, arg:str|None) -> bool: return arg is None or ansistrip def print_data(data:dict) -> None: if isinstance(data.get("value"), Iterator): for m in data["value"]: + if m.get("uop"): + print("Input UOp:") + print(m["uop"]) if not m["diff"]: continue + print("Rewrites:") fp = pathlib.Path(m["upat"][0][0]) print(f"{fp.parent.name}/{fp.name}:{m['upat'][0][1]}") print(m["upat"][1]) diff --git a/test/test_multitensor.py b/test/test_multitensor.py index ddcc2074f5..7866d3000e 100644 --- a/test/test_multitensor.py +++ b/test/test_multitensor.py @@ -1263,7 +1263,6 @@ class TestMultiRamUsage(unittest.TestCase): self.assertEqual(total_mem[dtypes.half], total_mem[dtypes.float] // 2) def test_matmul_half(self): self._test_matmul_half(devices_2) - @unittest.expectedFailure def test_matmul_half_alt(self): self._test_matmul_half(devices_4) @unittest.skipIf(not_support_multi_device(), "need multi") diff --git a/tinygrad/schedule/rangeify.py b/tinygrad/schedule/rangeify.py index aa3db46de7..e4c38550b3 100644 --- a/tinygrad/schedule/rangeify.py +++ b/tinygrad/schedule/rangeify.py @@ -171,7 +171,7 @@ def remove_bufferize(src:UOp, buf:UOp, idx:UOp): indexes: list[UOp] = [] reduces: list[UOp] = [] def red_gate(x:UOp): - if x.op is Ops.BUFFERIZE and x.arg.addrspace == AddrSpace.GLOBAL: + if (x.op is Ops.BUFFERIZE and x.arg.addrspace == AddrSpace.GLOBAL) or x.op is Ops.MSTACK: accessed_buffers.append(x) return False if x.op is Ops.BUFFER: