mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 06:58:11 -05:00
remove PROFILE=1 option, it's just VIZ=1 [pr] (#12176)
* remove PROFILE=1 option, it's just VIZ=1 [pr] * sqtt * sqtt 2 * return last * rename
This commit is contained in:
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -656,7 +656,7 @@ jobs:
|
||||
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
|
||||
- name: Run TestOps.test_add with SQTT
|
||||
run: |
|
||||
PROFILE=1 SQTT=1 DEBUG=5 python3 test/test_ops.py TestOps.test_add
|
||||
VIZ=1 SQTT=1 DEBUG=5 python3 test/test_ops.py TestOps.test_add
|
||||
extra/sqtt/rgptool.py create "/tmp/profile.pkl.$USER" -o /tmp/gpu0.rgp
|
||||
- name: Run process replay tests
|
||||
uses: ./.github/actions/process-replay
|
||||
|
||||
@@ -42,7 +42,6 @@ DEFAULT_FLOAT | [HALF, ...]| specify the default float dtype (FLOAT32, HAL
|
||||
IMAGE | [1-2] | enable 2d specific optimizations
|
||||
FLOAT16 | [1] | use float16 for images instead of float32
|
||||
PTX | [1] | enable the specialized [PTX](https://docs.nvidia.com/cuda/parallel-thread-execution/) assembler for Nvidia GPUs. If not set, defaults to generic CUDA codegen backend.
|
||||
PROFILE | [1] | enable profiling. This feature is supported in NV, AMD, QCOM and METAL backends.
|
||||
VISIBLE_DEVICES | [list[int]]| restricts the NV/AMD devices that are available. The format is a comma-separated list of identifiers (indexing starts with 0).
|
||||
JIT | [0-2] | 0=disabled, 1=[jit enabled](quickstart.md#jit) (default), 2=jit enabled, but graphs are disabled
|
||||
VIZ | [1] | 0=disabled, 1=[viz enabled](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/viz)
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
Only supported on 7900XTX, requires either AM (`rmmod amdgpu`) or disabling power gating on AMD (`ppfeaturemask=0xffff3fff`, don't forget to rebuild initramfs)
|
||||
|
||||
SQTT is implemented on top of normal tinygrad PROFILE=1, `PROFILE=1 SQTT=1` to get profile pickle with sqtt data embedded in it.
|
||||
SQTT is implemented on top of normal tinygrad profiling, `VIZ=1 SQTT=1` to get profile pickle with sqtt data embedded in it.
|
||||
|
||||
`SQTT_BUFFER_SIZE=X` to change size of SQTT buffer (per shader engine, 6 SEs on 7900xtx) in megabytes, default 256.
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ def helper_collect_profile(*devs):
|
||||
cpu_events.clear()
|
||||
|
||||
profile_list = []
|
||||
with Context(PROFILE=1):
|
||||
with Context(VIZ=1):
|
||||
yield profile_list
|
||||
for dev in devs: dev.synchronize()
|
||||
for dev in devs: dev._at_profile_finalize()
|
||||
|
||||
@@ -408,7 +408,7 @@ class TestVizProfiler(unittest.TestCase):
|
||||
get_profile(prof)
|
||||
|
||||
def test_python_marker(self):
|
||||
with Context(PROFILE=1):
|
||||
with Context(VIZ=1):
|
||||
a = Tensor.empty(1, device="NULL")
|
||||
b = Tensor.empty(1, device="NULL")
|
||||
(a+b).realize()
|
||||
|
||||
@@ -354,9 +354,8 @@ if PROFILE:
|
||||
|
||||
with open(fn:=temp("profile.pkl", append_user=True), "wb") as f: pickle.dump(cpu_events+Compiled.profile_events+Buffer.profile_events, f)
|
||||
|
||||
if not getenv("SQTT", 0):
|
||||
from tinygrad.uop.ops import launch_viz
|
||||
launch_viz(PROFILE, fn)
|
||||
from tinygrad.uop.ops import launch_viz
|
||||
launch_viz("PROFILE", fn)
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tinygrad import Tensor, Device
|
||||
|
||||
@@ -135,7 +135,7 @@ USE_TC, TC_SELECT, TC_OPT, AMX = ContextVar("TC", 1), ContextVar("TC_SELECT", -1
|
||||
TRANSCENDENTAL, NOLOCALS = ContextVar("TRANSCENDENTAL", 1), ContextVar("NOLOCALS", 0)
|
||||
FUSE_ARANGE, FUSE_CONV_BW = ContextVar("FUSE_ARANGE", 1), ContextVar("FUSE_CONV_BW", 0)
|
||||
SPLIT_REDUCEOP, NO_MEMORY_PLANNER, RING = ContextVar("SPLIT_REDUCEOP", 1), ContextVar("NO_MEMORY_PLANNER", 0), ContextVar("RING", 1)
|
||||
PICKLE_BUFFERS, PROFILE, LRU = ContextVar("PICKLE_BUFFERS", 1), ContextVar("PROFILE", getenv("VIZ")), ContextVar("LRU", 1)
|
||||
PICKLE_BUFFERS, LRU = ContextVar("PICKLE_BUFFERS", 1), ContextVar("LRU", 1)
|
||||
CACHELEVEL, IGNORE_BEAM_CACHE, DEVECTORIZE = ContextVar("CACHELEVEL", 2), ContextVar("IGNORE_BEAM_CACHE", 0), ContextVar("DEVECTORIZE", 1)
|
||||
DISABLE_COMPILER_CACHE, BLOCK_REORDER = ContextVar("DISABLE_COMPILER_CACHE", 0), ContextVar("BLOCK_REORDER", 1)
|
||||
DONT_REALIZE_EXPAND, DONT_GROUP_REDUCES = ContextVar("DONT_REALIZE_EXPAND", 0), ContextVar("DONT_GROUP_REDUCES", 0)
|
||||
@@ -146,6 +146,7 @@ RANGEIFY, FUSE_ATTENTION = ContextVar("RANGEIFY", 0), ContextVar("FUSE_ATTENTION
|
||||
EMULATE = ContextVar("EMULATE", "")
|
||||
CPU_COUNT = ContextVar("CPU_COUNT", max(1, (os.cpu_count() or 1) // (4 if ARCH_X86 else 2))) # take 1/2 of the cores, accounting HT
|
||||
CPU_LLVM, AMD_LLVM = ContextVar("CPU_LLVM", 0), ContextVar("AMD_LLVM", 1)
|
||||
VIZ = PROFILE = ContextVar("VIZ", 0)
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Metadata:
|
||||
|
||||
@@ -7,7 +7,7 @@ from tinygrad.uop import Ops, GroupOp
|
||||
from tinygrad.uop.mathtraits import MathTrait
|
||||
from tinygrad.dtype import ConstType, ImageDType, dtypes, DType, truncate, PtrDType, least_upper_dtype, Invalid, InvalidType
|
||||
from tinygrad.helpers import ContextVar, all_int, prod, getenv, all_same, Context, partition, temp, unwrap, T, argfix, Metadata, flatten, TRACEMETA
|
||||
from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put, to_function_name, cpu_profile, TracingKey, RANGEIFY
|
||||
from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put, to_function_name, cpu_profile, TracingKey, RANGEIFY, VIZ
|
||||
if TYPE_CHECKING:
|
||||
from tinygrad.shape.shapetracker import ShapeTracker
|
||||
from tinygrad.device import Buffer, MultiBuffer
|
||||
@@ -835,7 +835,6 @@ def track_uop(u:UOp):
|
||||
|
||||
# *** tracking pattern matcher ***
|
||||
|
||||
VIZ = ContextVar("VIZ", 0)
|
||||
TRACK_MATCH_STATS = ContextVar("TRACK_MATCH_STATS", 2 if VIZ else 0)
|
||||
match_stats:dict[UPat, list[int|float]] = dict()
|
||||
|
||||
@@ -938,7 +937,7 @@ if TRACK_MATCH_STATS or PROFILE:
|
||||
with open(fn:=temp("rewrites.pkl", append_user=True), "wb") as f:
|
||||
print(f"rewrote {len(tracked_ctxs)} graphs and matched {sum(len(r.matches) for x in tracked_ctxs for r in x)} times, saved to {fn}")
|
||||
pickle.dump([(tracked_keys, tracked_ctxs, uop_fields)], f)
|
||||
if VIZ: launch_viz(VIZ, temp("rewrites.pkl", append_user=True))
|
||||
if VIZ: return launch_viz("VIZ", temp("rewrites.pkl", append_user=True))
|
||||
if getenv("PRINT_MATCH_STATS", TRACK_MATCH_STATS.value):
|
||||
ret = [0,0,0.0,0.0]
|
||||
for k,v in sorted(list(match_stats.items()), key=lambda x: x[1][2]+x[1][3]):
|
||||
@@ -948,11 +947,10 @@ if TRACK_MATCH_STATS or PROFILE:
|
||||
print(f"{ret[0]:6d} / {ret[1]:7d} -- {ret[3]*1000.:9.2f} / {(ret[2]+ret[3])*1000.:9.2f} ms -- TOTAL")
|
||||
print(f"{len(match_stats)} rules, {sum(v[0] > 0 for v in match_stats.values())} matched once")
|
||||
|
||||
def launch_viz(var:ContextVar, data:str):
|
||||
os.environ[(env_str:=var.key)] = "0"
|
||||
def launch_viz(env_str:str, data:str):
|
||||
os.environ[env_str] = "0"
|
||||
os.environ[f"{env_str}_DATA"] = data
|
||||
os.environ[f"{env_str}_VALUE"] = str(var.value)
|
||||
if not int(os.getenv("VIZ", "0")) and not int(os.getenv("PROFILE", "0")):
|
||||
if not int(os.getenv("VIZ", "0")) and not int(os.getenv("PROFILE", "0")) and not int(os.getenv("SQTT", "0")):
|
||||
args = ['--kernels', getenv("VIZ_DATA", "")] if getenv("VIZ_DATA", "") else []
|
||||
args += ['--profile', getenv("PROFILE_DATA", "")] if getenv("PROFILE_DATA", "") else []
|
||||
os.execv(sys.executable, [sys.executable] + [os.path.join(os.path.dirname(__file__), "../", "viz", "serve.py")] + args)
|
||||
|
||||
@@ -6,19 +6,18 @@ most uses of DEBUG >= 3
|
||||
tiny-tools
|
||||
|
||||
and a viewer for:
|
||||
SAVE_SCHEDULE=1
|
||||
TRACK_MATCH_STATS=2
|
||||
PROFILE=1
|
||||
ProfileEvents
|
||||
|
||||
to use:
|
||||
1. Run tinygrad with VIZ=1 and/or PROFILE=1 (this saves the pkls and launches the server (new process please!))
|
||||
1. Run tinygrad with VIZ=1 (this saves the pkls and launches the server (new process please!))
|
||||
2. That's it!
|
||||
|
||||
This should be able to:
|
||||
1. See all schedules (VIZ=1)
|
||||
2. See all graphs and how they were rewritten (VIZ=1)
|
||||
3. See generated code (VIZ=1)
|
||||
4. See profile (PROFILE=1)
|
||||
4. See profile (click on 'profiler')
|
||||
|
||||
bunch of dev rules:
|
||||
* everything must be responsive to keyboard smashing! lag should never happen
|
||||
|
||||
Reference in New Issue
Block a user