remove PROFILE=1 option, it's just VIZ=1 [pr] (#12176)

* remove PROFILE=1 option, it's just VIZ=1 [pr] * sqtt * sqtt 2 * return last * rename
2026-01-09 06:58:11 -05:00 · 2025-09-15 12:51:50 +03:00
parent 65397bfdeb
commit a388d2cb1a
9 changed files with 16 additions and 20 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -656,7 +656,7 @@ jobs:
        run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
      - name: Run TestOps.test_add with SQTT
        run: |
-          PROFILE=1 SQTT=1 DEBUG=5 python3 test/test_ops.py TestOps.test_add
+          VIZ=1 SQTT=1 DEBUG=5 python3 test/test_ops.py TestOps.test_add
          extra/sqtt/rgptool.py create "/tmp/profile.pkl.$USER" -o /tmp/gpu0.rgp
      - name: Run process replay tests
        uses: ./.github/actions/process-replay
--- a/docs/env_vars.md
+++ b/docs/env_vars.md
@@ -42,7 +42,6 @@ DEFAULT_FLOAT       | [HALF, ...]| specify the default float dtype (FLOAT32, HAL
 IMAGE               | [1-2]      | enable 2d specific optimizations
 FLOAT16             | [1]        | use float16 for images instead of float32
 PTX                 | [1]        | enable the specialized [PTX](https://docs.nvidia.com/cuda/parallel-thread-execution/) assembler for Nvidia GPUs. If not set, defaults to generic CUDA codegen backend.
-PROFILE             | [1]        | enable profiling. This feature is supported in NV, AMD, QCOM and METAL backends.
 VISIBLE_DEVICES     | [list[int]]| restricts the NV/AMD devices that are available. The format is a comma-separated list of identifiers (indexing starts with 0).
 JIT                 | [0-2]      | 0=disabled, 1=[jit enabled](quickstart.md#jit) (default), 2=jit enabled, but graphs are disabled
 VIZ                 | [1]        | 0=disabled, 1=[viz enabled](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/viz)
--- a/extra/sqtt/README.md
+++ b/extra/sqtt/README.md
@@ -4,7 +4,7 @@

 Only supported on 7900XTX, requires either AM (`rmmod amdgpu`) or disabling power gating on AMD (`ppfeaturemask=0xffff3fff`, don't forget to rebuild initramfs)

-SQTT is implemented on top of normal tinygrad PROFILE=1, `PROFILE=1 SQTT=1` to get profile pickle with sqtt data embedded in it.
+SQTT is implemented on top of normal tinygrad profiling, `VIZ=1 SQTT=1` to get profile pickle with sqtt data embedded in it.

 `SQTT_BUFFER_SIZE=X` to change size of SQTT buffer (per shader engine, 6 SEs on 7900xtx) in megabytes, default 256.

--- a/test/test_profiler.py
+++ b/test/test_profiler.py
@@ -17,7 +17,7 @@ def helper_collect_profile(*devs):
  cpu_events.clear()

  profile_list = []
-  with Context(PROFILE=1):
+  with Context(VIZ=1):
    yield profile_list
    for dev in devs: dev.synchronize()
    for dev in devs: dev._at_profile_finalize()
--- a/test/unit/test_viz.py
+++ b/test/unit/test_viz.py
@@ -408,7 +408,7 @@ class TestVizProfiler(unittest.TestCase):
      get_profile(prof)

  def test_python_marker(self):
-    with Context(PROFILE=1):
+    with Context(VIZ=1):
      a = Tensor.empty(1, device="NULL")
      b = Tensor.empty(1, device="NULL")
      (a+b).realize()
--- a/tinygrad/device.py
+++ b/tinygrad/device.py
@@ -354,9 +354,8 @@ if PROFILE:

    with open(fn:=temp("profile.pkl", append_user=True), "wb") as f: pickle.dump(cpu_events+Compiled.profile_events+Buffer.profile_events, f)

-    if not getenv("SQTT", 0):
-      from tinygrad.uop.ops import launch_viz
-      launch_viz(PROFILE, fn)
+    from tinygrad.uop.ops import launch_viz
+    launch_viz("PROFILE", fn)

 if __name__ == "__main__":
  from tinygrad import Tensor, Device
--- a/tinygrad/helpers.py
+++ b/tinygrad/helpers.py
@@ -135,7 +135,7 @@ USE_TC, TC_SELECT, TC_OPT, AMX = ContextVar("TC", 1), ContextVar("TC_SELECT", -1
 TRANSCENDENTAL, NOLOCALS = ContextVar("TRANSCENDENTAL", 1), ContextVar("NOLOCALS", 0)
 FUSE_ARANGE, FUSE_CONV_BW = ContextVar("FUSE_ARANGE", 1), ContextVar("FUSE_CONV_BW", 0)
 SPLIT_REDUCEOP, NO_MEMORY_PLANNER, RING = ContextVar("SPLIT_REDUCEOP", 1), ContextVar("NO_MEMORY_PLANNER", 0), ContextVar("RING", 1)
-PICKLE_BUFFERS, PROFILE, LRU = ContextVar("PICKLE_BUFFERS", 1), ContextVar("PROFILE", getenv("VIZ")), ContextVar("LRU", 1)
+PICKLE_BUFFERS, LRU = ContextVar("PICKLE_BUFFERS", 1), ContextVar("LRU", 1)
 CACHELEVEL, IGNORE_BEAM_CACHE, DEVECTORIZE = ContextVar("CACHELEVEL", 2), ContextVar("IGNORE_BEAM_CACHE", 0), ContextVar("DEVECTORIZE", 1)
 DISABLE_COMPILER_CACHE, BLOCK_REORDER = ContextVar("DISABLE_COMPILER_CACHE", 0), ContextVar("BLOCK_REORDER", 1)
 DONT_REALIZE_EXPAND, DONT_GROUP_REDUCES = ContextVar("DONT_REALIZE_EXPAND", 0), ContextVar("DONT_GROUP_REDUCES", 0)
@@ -146,6 +146,7 @@ RANGEIFY, FUSE_ATTENTION = ContextVar("RANGEIFY", 0), ContextVar("FUSE_ATTENTION
 EMULATE = ContextVar("EMULATE", "")
 CPU_COUNT = ContextVar("CPU_COUNT", max(1, (os.cpu_count() or 1) // (4 if ARCH_X86 else 2))) # take 1/2 of the cores, accounting HT
 CPU_LLVM, AMD_LLVM = ContextVar("CPU_LLVM", 0), ContextVar("AMD_LLVM", 1)
+VIZ = PROFILE = ContextVar("VIZ", 0)

@dataclass(frozen=True)
 class Metadata:
--- a/tinygrad/uop/ops.py
+++ b/tinygrad/uop/ops.py
@@ -7,7 +7,7 @@ from tinygrad.uop import Ops, GroupOp
 from tinygrad.uop.mathtraits import MathTrait
 from tinygrad.dtype import ConstType, ImageDType, dtypes, DType, truncate, PtrDType, least_upper_dtype, Invalid, InvalidType
 from tinygrad.helpers import ContextVar, all_int, prod, getenv, all_same, Context, partition, temp, unwrap, T, argfix, Metadata, flatten, TRACEMETA
-from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put, to_function_name, cpu_profile, TracingKey, RANGEIFY
+from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put, to_function_name, cpu_profile, TracingKey, RANGEIFY, VIZ
 if TYPE_CHECKING:
  from tinygrad.shape.shapetracker import ShapeTracker
  from tinygrad.device import Buffer, MultiBuffer
@@ -835,7 +835,6 @@ def track_uop(u:UOp):

 # *** tracking pattern matcher ***

-VIZ = ContextVar("VIZ", 0)
 TRACK_MATCH_STATS = ContextVar("TRACK_MATCH_STATS", 2 if VIZ else 0)
 match_stats:dict[UPat, list[int|float]] = dict()

@@ -938,7 +937,7 @@ if TRACK_MATCH_STATS or PROFILE:
      with open(fn:=temp("rewrites.pkl", append_user=True), "wb") as f:
        print(f"rewrote {len(tracked_ctxs)} graphs and matched {sum(len(r.matches) for x in tracked_ctxs for r in x)} times, saved to {fn}")
        pickle.dump([(tracked_keys, tracked_ctxs, uop_fields)], f)
-    if VIZ: launch_viz(VIZ, temp("rewrites.pkl", append_user=True))
+    if VIZ: return launch_viz("VIZ", temp("rewrites.pkl", append_user=True))
    if getenv("PRINT_MATCH_STATS", TRACK_MATCH_STATS.value):
      ret = [0,0,0.0,0.0]
      for k,v in sorted(list(match_stats.items()), key=lambda x: x[1][2]+x[1][3]):
@@ -948,11 +947,10 @@ if TRACK_MATCH_STATS or PROFILE:
      print(f"{ret[0]:6d} / {ret[1]:7d} -- {ret[3]*1000.:9.2f} / {(ret[2]+ret[3])*1000.:9.2f} ms -- TOTAL")
      print(f"{len(match_stats)} rules, {sum(v[0] > 0 for v in match_stats.values())} matched once")

-  def launch_viz(var:ContextVar, data:str):
-    os.environ[(env_str:=var.key)] = "0"
+  def launch_viz(env_str:str, data:str):
+    os.environ[env_str] = "0"
    os.environ[f"{env_str}_DATA"] = data
-    os.environ[f"{env_str}_VALUE"] = str(var.value)
-    if not int(os.getenv("VIZ", "0")) and not int(os.getenv("PROFILE", "0")):
+    if not int(os.getenv("VIZ", "0")) and not int(os.getenv("PROFILE", "0")) and not int(os.getenv("SQTT", "0")):
      args = ['--kernels', getenv("VIZ_DATA", "")] if getenv("VIZ_DATA", "") else []
      args += ['--profile', getenv("PROFILE_DATA", "")] if getenv("PROFILE_DATA", "") else []
      os.execv(sys.executable, [sys.executable] + [os.path.join(os.path.dirname(__file__), "../", "viz", "serve.py")] + args)
--- a/tinygrad/viz/README
+++ b/tinygrad/viz/README
@@ -6,19 +6,18 @@ most uses of DEBUG >= 3
 tiny-tools

 and a viewer for:
-SAVE_SCHEDULE=1
 TRACK_MATCH_STATS=2
-PROFILE=1
+ProfileEvents

 to use:
-1. Run tinygrad with VIZ=1 and/or PROFILE=1 (this saves the pkls and launches the server (new process please!))
+1. Run tinygrad with VIZ=1 (this saves the pkls and launches the server (new process please!))
 2. That's it!

 This should be able to:
 1. See all schedules (VIZ=1)
 2. See all graphs and how they were rewritten (VIZ=1)
 3. See generated code (VIZ=1)
-4. See profile (PROFILE=1)
+4. See profile (click on 'profiler')

 bunch of dev rules:
 * everything must be responsive to keyboard smashing! lag should never happen