optionally output to file in self_tokenize.py (#8399)

can paste the whole tinygrad in gemini this way
2026-01-09 15:08:02 -05:00 · 2024-12-24 21:09:26 -05:00
parent de3705168e
commit a35eef8d58
1 changed files with 14 additions and 1 deletions
--- a/examples/self_tokenize.py
+++ b/examples/self_tokenize.py
@@ -1,4 +1,4 @@
-import os, pathlib
+import os, pathlib, argparse
 from examples.llama3 import Tokenizer
 from tabulate import tabulate
 from tinygrad import fetch
@@ -18,7 +18,16 @@ def read_code(base_path):
      ret += [(fullpath.split("tinygrad/", 1)[1], code)]
  return ret

+def write_code_to_file(filename, code_list):
+  """Writes the combined code to a specified file."""
+  with open(filename, 'w') as f:
+    f.write('\x00'.join(flatten(code_list)))
+
 if __name__ == "__main__":
+  parser = argparse.ArgumentParser(description="Analyze and optionally save tinygrad code.")
+  parser.add_argument("--output", help="Output file to write the combined code to.")
+  args = parser.parse_args()
+
  ret = read_code(".")

  table = []
@@ -33,3 +42,7 @@ if __name__ == "__main__":

  encoded = tokenizer.encode(code_str)
  print(f"code has {len(encoded)} tokens")
+
+  if args.output:
+    write_code_to_file(args.output, ret)
+    print(f"Combined code written to {args.output}")