From a388d2cb1a1927481040f18e5f4e93ca08ffab7f Mon Sep 17 00:00:00 2001
From: qazal <77887910+Qazalin@users.noreply.github.com>
Date: Mon, 15 Sep 2025 12:51:50 +0300
Subject: [PATCH] remove PROFILE=1 option, it's just VIZ=1 [pr] (#12176)

* remove PROFILE=1 option, it's just VIZ=1 [pr]

* sqtt

* sqtt 2

* return last

* rename
---
 .github/workflows/test.yml |  2 +-
 docs/env_vars.md           |  1 -
 extra/sqtt/README.md       |  2 +-
 test/test_profiler.py      |  2 +-
 test/unit/test_viz.py      |  2 +-
 tinygrad/device.py         |  5 ++---
 tinygrad/helpers.py        |  3 ++-
 tinygrad/uop/ops.py        | 12 +++++-------
 tinygrad/viz/README        |  7 +++----
 9 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1a0490da87..61f1605fb3 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -656,7 +656,7 @@ jobs:
         run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
       - name: Run TestOps.test_add with SQTT
         run: |
-          PROFILE=1 SQTT=1 DEBUG=5 python3 test/test_ops.py TestOps.test_add
+          VIZ=1 SQTT=1 DEBUG=5 python3 test/test_ops.py TestOps.test_add
           extra/sqtt/rgptool.py create "/tmp/profile.pkl.$USER" -o /tmp/gpu0.rgp
       - name: Run process replay tests
         uses: ./.github/actions/process-replay
diff --git a/docs/env_vars.md b/docs/env_vars.md
index 44be042bfa..e4129bc169 100644
--- a/docs/env_vars.md
+++ b/docs/env_vars.md
@@ -42,7 +42,6 @@ DEFAULT_FLOAT       | [HALF, ...]| specify the default float dtype (FLOAT32, HAL
 IMAGE               | [1-2]      | enable 2d specific optimizations
 FLOAT16             | [1]        | use float16 for images instead of float32
 PTX                 | [1]        | enable the specialized [PTX](https://docs.nvidia.com/cuda/parallel-thread-execution/) assembler for Nvidia GPUs. If not set, defaults to generic CUDA codegen backend.
-PROFILE             | [1]        | enable profiling. This feature is supported in NV, AMD, QCOM and METAL backends.
 VISIBLE_DEVICES     | [list[int]]| restricts the NV/AMD devices that are available. The format is a comma-separated list of identifiers (indexing starts with 0).
 JIT                 | [0-2]      | 0=disabled, 1=[jit enabled](quickstart.md#jit) (default), 2=jit enabled, but graphs are disabled
 VIZ                 | [1]        | 0=disabled, 1=[viz enabled](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/viz)
diff --git a/extra/sqtt/README.md b/extra/sqtt/README.md
index 6d739ceb68..1d19ae8f32 100644
--- a/extra/sqtt/README.md
+++ b/extra/sqtt/README.md
@@ -4,7 +4,7 @@
 
 Only supported on 7900XTX, requires either AM (`rmmod amdgpu`) or disabling power gating on AMD (`ppfeaturemask=0xffff3fff`, don't forget to rebuild initramfs)
 
-SQTT is implemented on top of normal tinygrad PROFILE=1, `PROFILE=1 SQTT=1` to get profile pickle with sqtt data embedded in it.
+SQTT is implemented on top of normal tinygrad profiling, `VIZ=1 SQTT=1` to get profile pickle with sqtt data embedded in it.
 
 `SQTT_BUFFER_SIZE=X` to change size of SQTT buffer (per shader engine, 6 SEs on 7900xtx) in megabytes, default 256.
 
diff --git a/test/test_profiler.py b/test/test_profiler.py
index 15cf8647fb..6143086ca0 100644
--- a/test/test_profiler.py
+++ b/test/test_profiler.py
@@ -17,7 +17,7 @@ def helper_collect_profile(*devs):
   cpu_events.clear()
 
   profile_list = []
-  with Context(PROFILE=1):
+  with Context(VIZ=1):
     yield profile_list
     for dev in devs: dev.synchronize()
     for dev in devs: dev._at_profile_finalize()
diff --git a/test/unit/test_viz.py b/test/unit/test_viz.py
index 20dab1f7f4..7ecdbe4172 100644
--- a/test/unit/test_viz.py
+++ b/test/unit/test_viz.py
@@ -408,7 +408,7 @@ class TestVizProfiler(unittest.TestCase):
       get_profile(prof)
 
   def test_python_marker(self):
-    with Context(PROFILE=1):
+    with Context(VIZ=1):
       a = Tensor.empty(1, device="NULL")
       b = Tensor.empty(1, device="NULL")
       (a+b).realize()
diff --git a/tinygrad/device.py b/tinygrad/device.py
index bc0f6eb64c..6d08da0fd2 100644
--- a/tinygrad/device.py
+++ b/tinygrad/device.py
@@ -354,9 +354,8 @@ if PROFILE:
 
     with open(fn:=temp("profile.pkl", append_user=True), "wb") as f: pickle.dump(cpu_events+Compiled.profile_events+Buffer.profile_events, f)
 
-    if not getenv("SQTT", 0):
-      from tinygrad.uop.ops import launch_viz
-      launch_viz(PROFILE, fn)
+    from tinygrad.uop.ops import launch_viz
+    launch_viz("PROFILE", fn)
 
 if __name__ == "__main__":
   from tinygrad import Tensor, Device
diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py
index 76ddbc525e..82fe093f6e 100644
--- a/tinygrad/helpers.py
+++ b/tinygrad/helpers.py
@@ -135,7 +135,7 @@ USE_TC, TC_SELECT, TC_OPT, AMX = ContextVar("TC", 1), ContextVar("TC_SELECT", -1
 TRANSCENDENTAL, NOLOCALS = ContextVar("TRANSCENDENTAL", 1), ContextVar("NOLOCALS", 0)
 FUSE_ARANGE, FUSE_CONV_BW = ContextVar("FUSE_ARANGE", 1), ContextVar("FUSE_CONV_BW", 0)
 SPLIT_REDUCEOP, NO_MEMORY_PLANNER, RING = ContextVar("SPLIT_REDUCEOP", 1), ContextVar("NO_MEMORY_PLANNER", 0), ContextVar("RING", 1)
-PICKLE_BUFFERS, PROFILE, LRU = ContextVar("PICKLE_BUFFERS", 1), ContextVar("PROFILE", getenv("VIZ")), ContextVar("LRU", 1)
+PICKLE_BUFFERS, LRU = ContextVar("PICKLE_BUFFERS", 1), ContextVar("LRU", 1)
 CACHELEVEL, IGNORE_BEAM_CACHE, DEVECTORIZE = ContextVar("CACHELEVEL", 2), ContextVar("IGNORE_BEAM_CACHE", 0), ContextVar("DEVECTORIZE", 1)
 DISABLE_COMPILER_CACHE, BLOCK_REORDER = ContextVar("DISABLE_COMPILER_CACHE", 0), ContextVar("BLOCK_REORDER", 1)
 DONT_REALIZE_EXPAND, DONT_GROUP_REDUCES = ContextVar("DONT_REALIZE_EXPAND", 0), ContextVar("DONT_GROUP_REDUCES", 0)
@@ -146,6 +146,7 @@ RANGEIFY, FUSE_ATTENTION = ContextVar("RANGEIFY", 0), ContextVar("FUSE_ATTENTION
 EMULATE = ContextVar("EMULATE", "")
 CPU_COUNT = ContextVar("CPU_COUNT", max(1, (os.cpu_count() or 1) // (4 if ARCH_X86 else 2))) # take 1/2 of the cores, accounting HT
 CPU_LLVM, AMD_LLVM = ContextVar("CPU_LLVM", 0), ContextVar("AMD_LLVM", 1)
+VIZ = PROFILE = ContextVar("VIZ", 0)
 
 @dataclass(frozen=True)
 class Metadata:
diff --git a/tinygrad/uop/ops.py b/tinygrad/uop/ops.py
index f070e26f81..46c441d798 100644
--- a/tinygrad/uop/ops.py
+++ b/tinygrad/uop/ops.py
@@ -7,7 +7,7 @@ from tinygrad.uop import Ops, GroupOp
 from tinygrad.uop.mathtraits import MathTrait
 from tinygrad.dtype import ConstType, ImageDType, dtypes, DType, truncate, PtrDType, least_upper_dtype, Invalid, InvalidType
 from tinygrad.helpers import ContextVar, all_int, prod, getenv, all_same, Context, partition, temp, unwrap, T, argfix, Metadata, flatten, TRACEMETA
-from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put, to_function_name, cpu_profile, TracingKey, RANGEIFY
+from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put, to_function_name, cpu_profile, TracingKey, RANGEIFY, VIZ
 if TYPE_CHECKING:
   from tinygrad.shape.shapetracker import ShapeTracker
   from tinygrad.device import Buffer, MultiBuffer
@@ -835,7 +835,6 @@ def track_uop(u:UOp):
 
 # *** tracking pattern matcher ***
 
-VIZ = ContextVar("VIZ", 0)
 TRACK_MATCH_STATS = ContextVar("TRACK_MATCH_STATS", 2 if VIZ else 0)
 match_stats:dict[UPat, list[int|float]] = dict()
 
@@ -938,7 +937,7 @@ if TRACK_MATCH_STATS or PROFILE:
       with open(fn:=temp("rewrites.pkl", append_user=True), "wb") as f:
         print(f"rewrote {len(tracked_ctxs)} graphs and matched {sum(len(r.matches) for x in tracked_ctxs for r in x)} times, saved to {fn}")
         pickle.dump([(tracked_keys, tracked_ctxs, uop_fields)], f)
-    if VIZ: launch_viz(VIZ, temp("rewrites.pkl", append_user=True))
+    if VIZ: return launch_viz("VIZ", temp("rewrites.pkl", append_user=True))
     if getenv("PRINT_MATCH_STATS", TRACK_MATCH_STATS.value):
       ret = [0,0,0.0,0.0]
       for k,v in sorted(list(match_stats.items()), key=lambda x: x[1][2]+x[1][3]):
@@ -948,11 +947,10 @@ if TRACK_MATCH_STATS or PROFILE:
       print(f"{ret[0]:6d} / {ret[1]:7d} -- {ret[3]*1000.:9.2f} / {(ret[2]+ret[3])*1000.:9.2f} ms -- TOTAL")
       print(f"{len(match_stats)} rules, {sum(v[0] > 0 for v in match_stats.values())} matched once")
 
-  def launch_viz(var:ContextVar, data:str):
-    os.environ[(env_str:=var.key)] = "0"
+  def launch_viz(env_str:str, data:str):
+    os.environ[env_str] = "0"
     os.environ[f"{env_str}_DATA"] = data
-    os.environ[f"{env_str}_VALUE"] = str(var.value)
-    if not int(os.getenv("VIZ", "0")) and not int(os.getenv("PROFILE", "0")):
+    if not int(os.getenv("VIZ", "0")) and not int(os.getenv("PROFILE", "0")) and not int(os.getenv("SQTT", "0")):
       args = ['--kernels', getenv("VIZ_DATA", "")] if getenv("VIZ_DATA", "") else []
       args += ['--profile', getenv("PROFILE_DATA", "")] if getenv("PROFILE_DATA", "") else []
       os.execv(sys.executable, [sys.executable] + [os.path.join(os.path.dirname(__file__), "../", "viz", "serve.py")] + args)
diff --git a/tinygrad/viz/README b/tinygrad/viz/README
index ce46d461cf..bdd038e44c 100644
--- a/tinygrad/viz/README
+++ b/tinygrad/viz/README
@@ -6,19 +6,18 @@ most uses of DEBUG >= 3
 tiny-tools
 
 and a viewer for:
-SAVE_SCHEDULE=1
 TRACK_MATCH_STATS=2
-PROFILE=1
+ProfileEvents
 
 to use:
-1. Run tinygrad with VIZ=1 and/or PROFILE=1 (this saves the pkls and launches the server (new process please!))
+1. Run tinygrad with VIZ=1 (this saves the pkls and launches the server (new process please!))
 2. That's it!
 
 This should be able to:
 1. See all schedules (VIZ=1)
 2. See all graphs and how they were rewritten (VIZ=1)
 3. See generated code (VIZ=1)
-4. See profile (PROFILE=1)
+4. See profile (click on 'profiler')
 
 bunch of dev rules:
 * everything must be responsive to keyboard smashing! lag should never happen