#!/usr/bin/env python3 from tinygrad import Tensor, Device, GlobalCounters, Context, dtypes from tinygrad.helpers import getenv, colored SZ = 8_000_000_000 GPUS = getenv("GPUS", 4) # TODO: expose a way in tinygrad to access this if __name__ == "__main__": # create tensors tens = [Tensor.ones(SZ, dtype=dtypes.uint8, device=f"{Device.DEFAULT}:{i}").contiguous() for i in range(GPUS)] Tensor.realize(*tens) bw = [[0.0]*GPUS for _ in range(GPUS)] for i in range(GPUS): for j in range(GPUS): GlobalCounters.reset() with Context(DEBUG=2): if i == j: # this copy would be optimized out, just add 1 (tens[i]+1).realize() else: tens[i].to(f"{Device.DEFAULT}:{j}").realize() t = max(GlobalCounters.time_sum_s, 1e-9) bw[i][j] = SZ / t / 1e9 # GB/s def fmt(x): c = "green" if x > 50 else "yellow" if x > 20 else "red" return colored(f"{x:6.1f}", c) # header print(" " * 8 + " ".join(f"{'d'+str(j):>6}" for j in range(GPUS))) # rows for i in range(GPUS): print(f"{'s'+str(i):>6} -> " + " ".join(fmt(x) for x in bw[i]))