move graph to runtime, check line count with sz.py (#2842)

* move graph to runtime, check line count with sz.py * oops, didn't save * dtype aliases * restore comment, REALCOUNT
2026-01-08 22:48:25 -05:00 · 2023-12-18 20:30:06 -08:00
parent 15dc5bcfbd
commit 6617dcf095
10 changed files with 28 additions and 33 deletions
--- a/sz.py
+++ b/sz.py
@@ -4,6 +4,7 @@ import token
 import tokenize
 import itertools
 from tabulate import tabulate
+from tinygrad.helpers import getenv

 TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]

@@ -16,7 +17,10 @@ def gen_stats(base_path="."):
      relfilepath = os.path.relpath(filepath, base_path)
      with tokenize.open(filepath) as file_:
        tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST]
-        token_count, line_count = len(tokens), len(set([t.start[0] for t in tokens]))
+        if getenv("REALCOUNT"):
+          token_count, line_count = len(tokens), len(set([x for t in tokens for x in range(t.start[0], t.end[0]+1)]))
+        else:
+          token_count, line_count = len(tokens), len(set([t.start[0] for t in tokens]))
        table.append([relfilepath, line_count, token_count/line_count])
  return table

@@ -65,4 +69,7 @@ if __name__ == "__main__":
      print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", floatfmt=".1f")+"\n")
      for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1], x[2]) for x in table]), key=lambda x:x[0]):
        print(f"{dir_name:30s} : {sum([x[1] for x in group]):6d}")
-      print(f"\ntotal line count: {sum([x[1] for x in table])}")
+      total_lines = sum([x[1] for x in table])
+      print(f"\ntotal line count: {total_lines}")
+      max_line_count = getenv("MAX_LINE_COUNT", -1)
+      assert max_line_count == -1 or total_lines < max_line_count, f"OVER {max_line_count} LINES"