diff --git a/test/test_profiler.py b/test/test_profiler.py index 01a760153e..fbe486923d 100644 --- a/test/test_profiler.py +++ b/test/test_profiler.py @@ -172,7 +172,10 @@ class TestProfiler(unittest.TestCase): for (i1, d1), (i2, d2) in pairs: cpu_diff = d1.gpu2cpu_compute_time_diff - d2.gpu2cpu_compute_time_diff jitter_matrix[i1][i2] = statistics.median(_sync_d2d(d1, d2) - _sync_d2d(d2, d1) for _ in range(20)) / 2 - cpu_diff - assert abs(jitter_matrix[i1][i2]) < 0.5, "jitter should be less than 0.5ms" + + for (i1, d1), (i2, d2) in pairs: + assert abs(jitter_matrix[i1][i2]) < 0.5, "jitter should be less than 0.5us" + print("pairwise clock jitter matrix (us):\n" + '\n'.join([''.join([f'{float(item):8.3f}' for item in row]) for row in jitter_matrix])) def test_cpu_profile(self): diff --git a/tinygrad/runtime/support/hcq.py b/tinygrad/runtime/support/hcq.py index bc010065fd..16fda389ac 100644 --- a/tinygrad/runtime/support/hcq.py +++ b/tinygrad/runtime/support/hcq.py @@ -426,10 +426,10 @@ class HCQCompiled(Compiled, Generic[SignalType]): et = time.perf_counter_ns() return (decimal.Decimal(et+st) / 2000) - d.timeline_signal.timestamp - gpu2cpu_compute_time_diff = statistics.median([_sync(self, self.hw_compute_queue_t) for _ in range(40)]) + self.gpu2cpu_compute_time_diff = statistics.median([_sync(self, self.hw_compute_queue_t) for _ in range(40)]) if self.hw_copy_queue_t is None: gpu2cpu_copy_time_diff = decimal.Decimal(0) else: gpu2cpu_copy_time_diff = statistics.median([_sync(self, self.hw_copy_queue_t) for _ in range(40)]) - Compiled.profile_events += [ProfileDeviceEvent(self.device, gpu2cpu_compute_time_diff, gpu2cpu_copy_time_diff, props=self.device_props())] + Compiled.profile_events += [ProfileDeviceEvent(self.device, self.gpu2cpu_compute_time_diff, gpu2cpu_copy_time_diff, props=self.device_props())] def _wrap_timeline_signal(self): self.timeline_signal, self._shadow_timeline_signal, self.timeline_value = self._shadow_timeline_signal, self.timeline_signal, 1