From cd88afc98b5f3bc2e389b9f1a9f700e7a11b889a Mon Sep 17 00:00:00 2001
From: George Hotz <72895+geohot@users.noreply.github.com>
Date: Fri, 19 Apr 2024 16:16:10 +0400
Subject: [PATCH] datasets isn't a feature + filter docstrings (#4228)

* datasets isn't a feature

* filter docstrings in sz
---
 docs-legacy/abstractions3.py          | 2 +-
 examples/beautiful_mnist.py           | 2 +-
 sz.py                                 | 5 ++++-
 tinygrad/{features => nn}/datasets.py | 0
 4 files changed, 6 insertions(+), 3 deletions(-)
 rename tinygrad/{features => nn}/datasets.py (100%)

diff --git a/docs-legacy/abstractions3.py b/docs-legacy/abstractions3.py
index a013afc7df..c2e25c0a79 100644
--- a/docs-legacy/abstractions3.py
+++ b/docs-legacy/abstractions3.py
@@ -6,7 +6,7 @@ from tinygrad.helpers import DEBUG
 # *****
 # 0. Load mnist on the device
 
-from tinygrad.features.datasets import mnist
+from tinygrad.nn.datasets import mnist
 X_train, Y_train, _, _ = mnist()
 X_train = X_train.float()
 X_train -= X_train.mean()
diff --git a/examples/beautiful_mnist.py b/examples/beautiful_mnist.py
index 7c55d204da..3be5ed8da6 100644
--- a/examples/beautiful_mnist.py
+++ b/examples/beautiful_mnist.py
@@ -2,7 +2,7 @@
 from typing import List, Callable
 from tinygrad import Tensor, TinyJit, nn, GlobalCounters
 from tinygrad.helpers import getenv, colored
-from tinygrad.features.datasets import mnist
+from tinygrad.nn.datasets import mnist
 from tqdm import trange
 
 class Model:
diff --git a/sz.py b/sz.py
index aaec10b21c..990fcde69e 100755
--- a/sz.py
+++ b/sz.py
@@ -7,6 +7,9 @@ from tabulate import tabulate
 
 TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
 
+def is_docstring(t):
+  return t.type == token.STRING and t.string.startswith('"""') and t.line.strip().startswith('"""')
+
 def gen_stats(base_path="."):
   table = []
   for path, _, files in os.walk(os.path.join(base_path, "tinygrad")):
@@ -16,7 +19,7 @@ def gen_stats(base_path="."):
       filepath = os.path.join(path, name)
       relfilepath = os.path.relpath(filepath, base_path)
       with tokenize.open(filepath) as file_:
-        tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST]
+        tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST and not is_docstring(t)]
         token_count, line_count = len(tokens), len(set([x for t in tokens for x in range(t.start[0], t.end[0]+1)]))
         table.append([relfilepath, line_count, token_count/line_count])
   return table
diff --git a/tinygrad/features/datasets.py b/tinygrad/nn/datasets.py
similarity index 100%
rename from tinygrad/features/datasets.py
rename to tinygrad/nn/datasets.py