From cd88afc98b5f3bc2e389b9f1a9f700e7a11b889a Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Fri, 19 Apr 2024 16:16:10 +0400 Subject: [PATCH] datasets isn't a feature + filter docstrings (#4228) * datasets isn't a feature * filter docstrings in sz --- docs-legacy/abstractions3.py | 2 +- examples/beautiful_mnist.py | 2 +- sz.py | 5 ++++- tinygrad/{features => nn}/datasets.py | 0 4 files changed, 6 insertions(+), 3 deletions(-) rename tinygrad/{features => nn}/datasets.py (100%) diff --git a/docs-legacy/abstractions3.py b/docs-legacy/abstractions3.py index a013afc7df..c2e25c0a79 100644 --- a/docs-legacy/abstractions3.py +++ b/docs-legacy/abstractions3.py @@ -6,7 +6,7 @@ from tinygrad.helpers import DEBUG # ***** # 0. Load mnist on the device -from tinygrad.features.datasets import mnist +from tinygrad.nn.datasets import mnist X_train, Y_train, _, _ = mnist() X_train = X_train.float() X_train -= X_train.mean() diff --git a/examples/beautiful_mnist.py b/examples/beautiful_mnist.py index 7c55d204da..3be5ed8da6 100644 --- a/examples/beautiful_mnist.py +++ b/examples/beautiful_mnist.py @@ -2,7 +2,7 @@ from typing import List, Callable from tinygrad import Tensor, TinyJit, nn, GlobalCounters from tinygrad.helpers import getenv, colored -from tinygrad.features.datasets import mnist +from tinygrad.nn.datasets import mnist from tqdm import trange class Model: diff --git a/sz.py b/sz.py index aaec10b21c..990fcde69e 100755 --- a/sz.py +++ b/sz.py @@ -7,6 +7,9 @@ from tabulate import tabulate TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING] +def is_docstring(t): + return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""') + def gen_stats(base_path="."): table = [] for path, _, files in os.walk(os.path.join(base_path, "tinygrad")): @@ -16,7 +19,7 @@ def gen_stats(base_path="."): filepath = os.path.join(path, name) relfilepath = os.path.relpath(filepath, base_path) with tokenize.open(filepath) as file_: - tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST] + tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST and not is_docstring(t)] token_count, line_count = len(tokens), len(set([x for t in tokens for x in range(t.start[0], t.end[0]+1)])) table.append([relfilepath, line_count, token_count/line_count]) return table diff --git a/tinygrad/features/datasets.py b/tinygrad/nn/datasets.py similarity index 100% rename from tinygrad/features/datasets.py rename to tinygrad/nn/datasets.py