datasets isn't a feature + filter docstrings (#4228)

* datasets isn't a feature

* filter docstrings in sz
This commit is contained in:
George Hotz
2024-04-19 16:16:10 +04:00
committed by GitHub
parent b9570d6100
commit cd88afc98b
4 changed files with 6 additions and 3 deletions

5
sz.py
View File

@@ -7,6 +7,9 @@ from tabulate import tabulate
TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
def is_docstring(t):
return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
def gen_stats(base_path="."):
table = []
for path, _, files in os.walk(os.path.join(base_path, "tinygrad")):
@@ -16,7 +19,7 @@ def gen_stats(base_path="."):
filepath = os.path.join(path, name)
relfilepath = os.path.relpath(filepath, base_path)
with tokenize.open(filepath) as file_:
tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST]
tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST and not is_docstring(t)]
token_count, line_count = len(tokens), len(set([x for t in tokens for x in range(t.start[0], t.end[0]+1)]))
table.append([relfilepath, line_count, token_count/line_count])
return table