optionally output to file in self_tokenize.py (#8399)

can paste the whole tinygrad in gemini this way
This commit is contained in:
chenyu
2024-12-24 21:09:26 -05:00
committed by GitHub
parent de3705168e
commit a35eef8d58

View File

@@ -1,4 +1,4 @@
import os, pathlib
import os, pathlib, argparse
from examples.llama3 import Tokenizer
from tabulate import tabulate
from tinygrad import fetch
@@ -18,7 +18,16 @@ def read_code(base_path):
ret += [(fullpath.split("tinygrad/", 1)[1], code)]
return ret
def write_code_to_file(filename, code_list):
"""Writes the combined code to a specified file."""
with open(filename, 'w') as f:
f.write('\x00'.join(flatten(code_list)))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Analyze and optionally save tinygrad code.")
parser.add_argument("--output", help="Output file to write the combined code to.")
args = parser.parse_args()
ret = read_code(".")
table = []
@@ -33,3 +42,7 @@ if __name__ == "__main__":
encoded = tokenizer.encode(code_str)
print(f"code has {len(encoded)} tokens")
if args.output:
write_code_to_file(args.output, ret)
print(f"Combined code written to {args.output}")