move line counter to python

2026-04-29 03:00:14 -04:00 · 2023-05-29 09:21:18 -07:00
parent 8c6085a715
commit ae204e40c8
3 changed files with 27 additions and 42 deletions
--- a/count_tokens.py
+++ b/count_tokens.py
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import token
-import tokenize
-
-
-TOKEN_WHITELIST = [
-  token.OP,
-  token.NAME,
-  token.NUMBER,
-  token.STRING
-]
-
-
-if __name__ == "__main__":
-  count_by_file = {}
-
-  for path, subdirs, files in os.walk("tinygrad"):
-    for name in files:
-      if not name.endswith(".py"):
-        continue
-      filepath = os.path.join(path, name)
-      with tokenize.open(filepath) as file_:
-        tokens = tokenize.generate_tokens(file_.readline)
-        count_by_file[filepath] = len([t for t in tokens if t.type in TOKEN_WHITELIST])
-
-  count_by_file = dict(sorted(count_by_file.items(), key=lambda el: el[1], reverse=True))
-  max_length = max(len(k) for k in count_by_file.keys()) + 10
-  print(f"{'File':<{max_length}}  {'Token count'}")
-  print('-' * (max_length + 14))
-  for key, value in count_by_file.items():
-    print(f"{key:<{max_length}}  {value}")
-  print('-' * (max_length + 14))
-  print(f"{'Total':<{max_length}} {sum(count_by_file.values())}")
--- a/sz.py
+++ b/sz.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+import os
+import token
+import tokenize
+import itertools
+from tabulate import tabulate
+
+TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
+
+if __name__ == "__main__":
+  headers = ["Name", "Lines", "Tokens/Line"]
+  table = []
+  for path, subdirs, files in os.walk("tinygrad"):
+    for name in files:
+      if not name.endswith(".py"): continue
+      filepath = os.path.join(path, name)
+      with tokenize.open(filepath) as file_:
+        tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST]
+        token_count, line_count = len(tokens), len(set([t.start[0] for t in tokens]))
+        table.append([filepath, line_count, token_count/line_count])
+
+  print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", floatfmt=".1f")+"\n")
+
+  for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1]) for x in table]), key=lambda x:x[0]):
+    print(f"{dir_name:30s} : {sum([x[1] for x in group]):6d}")
+
+  print(f"\ntotal line count: {sum([x[1] for x in table])}")
--- a/sz.sh
+++ b/sz.sh
@@ -1,7 +0,0 @@
-#!/bin/bash
-# switched to cloc due to https://github.com/boyter/scc/issues/379
-cloc --by-file tinygrad/* | grep "tinygrad" && echo
-# also some sloccount for a dir summary
-sloccount tinygrad | grep "python" && echo
-# token count (as defined by Python tokenize)
-./count_tokens.py