From 25ff7146f23fe36fe79239ebb056c63f4db96065 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Wed, 25 Mar 2026 20:54:56 +0800 Subject: [PATCH] add a status line to REMOTE with DEBUG=1 (#15471) * python speedups of hot paths * add a status line to REMOTE with DEBUG=1 * pc * t --- tinygrad/runtime/support/system.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tinygrad/runtime/support/system.py b/tinygrad/runtime/support/system.py index dc559d5bed..44922f0ace 100644 --- a/tinygrad/runtime/support/system.py +++ b/tinygrad/runtime/support/system.py @@ -1,6 +1,6 @@ from __future__ import annotations -import os, mmap, array, functools, ctypes, select, contextlib, dataclasses, sys, itertools, struct, socket, subprocess, time, enum -from tinygrad.helpers import round_up, getenv, OSX, temp, ceildiv, unwrap, fetch, system, _ensure_downloads_dir +import os, mmap, array, functools, ctypes, select, contextlib, dataclasses, sys, itertools, struct, socket, subprocess, time, enum, atexit +from tinygrad.helpers import round_up, getenv, OSX, temp, ceildiv, unwrap, fetch, system, _ensure_downloads_dir, DEBUG from tinygrad.runtime.autogen import libc, pci, vfio, iokit, corefoundation from tinygrad.runtime.support.hcq import FileIOInterface, MMIOInterface, HCQBuffer, hcq_filter_visible_devices from tinygrad.runtime.support.memory import VirtMapping, AddrSpace, BumpAllocator @@ -307,6 +307,11 @@ class RemoteMMIOInterface(MMIOInterface): return RemoteMMIOInterface(self.dev, self.residx, size or (self.nbytes - offset), fmt or self.fmt, self.off + offset, self.rd_cmd, self.wr_cmd) class RemotePCIDevice(PCIDevice): + _bulk_sent:int = 0 + _bulk_recv:int = 0 + _rpc_count:int = 0 + _start_time:float = 0.0 + @staticmethod @functools.cache def remote_sock() -> socket.socket: @@ -317,6 +322,14 @@ class RemotePCIDevice(PCIDevice): sock.settimeout(getenv("REMOTE_TIMEOUT", 3)) sock.connect((host, port)) sock.settimeout(None) + if DEBUG >= 1: + RemotePCIDevice._start_time = time.perf_counter() + def _print_stats(): + dt = time.perf_counter() - RemotePCIDevice._start_time + sent_mb, recv_mb = RemotePCIDevice._bulk_sent / 1e6, RemotePCIDevice._bulk_recv / 1e6 + print(f"remote: sent {sent_mb:,.2f} MB ({sent_mb/dt:,.2f} MB/s), recv {recv_mb:,.2f} MB ({recv_mb/dt:,.2f} MB/s), " + f"{RemotePCIDevice._rpc_count:,} roundtrips in {dt:.2f}s") + atexit.register(_print_stats) return sock @staticmethod @@ -343,6 +356,7 @@ class RemotePCIDevice(PCIDevice): else: msg, fd = RemotePCIDevice._recvall(sock, 17), None if (resp:=struct.unpack(' 0 else 'unknown error'}") + RemotePCIDevice._rpc_count += 1 return (resp[1], resp[2]) + ((RemotePCIDevice._recvall(sock, readout_size) if readout_size > 0 else None),) + (fd,) def __init__(self, devpref:str, pcibus:str, sock:socket.socket|None=None): @@ -352,8 +366,10 @@ class RemotePCIDevice(PCIDevice): self.lock_fd = System.flock_acquire(f"{devpref.lower()}_{pcibus.lower()}.lock") def _bulk_read(self, cmd:int, idx:int, offset:int, size:int) -> bytes: + RemotePCIDevice._bulk_recv += size return unwrap(self._rpc(self.sock, self.dev_id, cmd, offset, size, bar=idx, readout_size=size)[2]) def _bulk_write(self, cmd:int, idx:int, offset:int, data:bytes): + RemotePCIDevice._bulk_sent += len(data) self.sock.sendall(struct.pack(' tuple[MMIOInterface, list[int]]: