From b1e52ba0c2553abab18840c1986aa09eeacca548 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Wed, 8 Apr 2026 15:53:52 +0300 Subject: [PATCH] the slowest line in hcq graph (#15635) * the slowest line in hcq graph * x --- tinygrad/runtime/graph/hcq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tinygrad/runtime/graph/hcq.py b/tinygrad/runtime/graph/hcq.py index ea343dcce9..687d8735ca 100644 --- a/tinygrad/runtime/graph/hcq.py +++ b/tinygrad/runtime/graph/hcq.py @@ -144,7 +144,7 @@ class HCQGraph(MultiGraphRunner): self.last_j[enqueue_queue] = j # Check which signals are used in the profile graph. - self.prof_signal_is_used = [any(ent.st_id == j or ent.en_id == j for ent in self.prof_graph_entries) for j in range(len(self.jit_cache) * 2)] + self.prof_signal_is_used: set[int] = {sid for ent in self.prof_graph_entries for sid in (ent.st_id, ent.en_id)} # Build hardware queues. self.copy_to_devs: dict[HCQCompiled, set[HCQCompiled]] = {dev: set() for dev in self.devices} @@ -167,7 +167,7 @@ class HCQGraph(MultiGraphRunner): for sig, val in sync_signals + deps: enqueue_queue.wait(sig, val) # Encode waits and start profile timestamp (if needed). - if PROFILE and self.prof_signal_is_used[j * 2]: enqueue_queue.timestamp(self.prof_signals[j * 2]) + if PROFILE and j * 2 in self.prof_signal_is_used: enqueue_queue.timestamp(self.prof_signals[j * 2]) # Encode main commands based on ji type. if isinstance(ji.prg, CompiledRunner): @@ -203,7 +203,7 @@ class HCQGraph(MultiGraphRunner): self.copy_to_devs[cast(HCQCompiled, Device[dest.device])].add(cast(HCQCompiled, Device[src.device])) # Encode finish profile timestamp (if needed). - if PROFILE and self.prof_signal_is_used[j * 2 + 1]: enqueue_queue.timestamp(self.prof_signals[j * 2 + 1]) + if PROFILE and j * 2 + 1 in self.prof_signal_is_used: enqueue_queue.timestamp(self.prof_signals[j * 2 + 1]) if signal_val is not None: enqueue_queue.signal(signal, signal_val)