mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-07 03:00:26 -04:00
* Fix examples * Remove training in parameters * Simplify a bit * Remove extra import * Fix linter errors * factor out Device * NumPy-like semantics for Tensor.__getitem__ (#506) * Rewrote Tensor.__getitem__ to fix negative indices and add support for np.newaxis/None * Fixed pad2d * mypy doesn't know about mlops methods * normal python behavior for out-of-bounds slicing * type: ignore * inlined idxfix * added comment for __getitem__ * Better comments, better tests, and fixed bug in np.newaxis * update cpu and torch to hold buffers (#542) * update cpu and torch to hold buffers * save lines, and probably faster * Mypy fun (#541) * mypy fun * things are just faster * running fast * mypy is fast * compile.sh * no gpu hack * refactor ops_cpu and ops_torch to not subclass * make weak buffer work * tensor works * fix test failing * cpu/torch cleanups * no or operator on dict in python 3.8 * that was junk * fix warnings * comment and touchup * dyn add of math ops * refactor ops_cpu and ops_torch to not share code * nn/optim.py compiles now * Reorder imports * call mkdir only if directory doesn't exist --------- Co-authored-by: George Hotz <geohot@gmail.com> Co-authored-by: Mitchell Goff <mitchellgoffpc@gmail.com> Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
66 lines
2.1 KiB
Python
66 lines
2.1 KiB
Python
#!/usr/bin/env python3
|
|
import gc
|
|
import time
|
|
from tqdm import trange
|
|
from models.efficientnet import EfficientNet
|
|
from tinygrad.nn import optim
|
|
from tinygrad.tensor import Tensor
|
|
from tinygrad.runtime.opencl import CL
|
|
from tinygrad.ops import GlobalCounters
|
|
from tinygrad.helpers import getenv
|
|
|
|
def tensors_allocated():
|
|
return sum(isinstance(x, Tensor) for x in gc.get_objects())
|
|
|
|
NUM = getenv("NUM", 2)
|
|
BS = getenv("BS", 8)
|
|
CNT = getenv("CNT", 10)
|
|
BACKWARD = getenv("BACKWARD", 0)
|
|
TRAINING = getenv("TRAINING", 1)
|
|
ADAM = getenv("ADAM", 0)
|
|
CLCACHE = getenv("CLCACHE", 0)
|
|
|
|
if __name__ == "__main__":
|
|
print(f"NUM:{NUM} BS:{BS} CNT:{CNT}")
|
|
model = EfficientNet(NUM, classes=1000, has_se=False, track_running_stats=False)
|
|
parameters = optim.get_parameters(model)
|
|
for p in parameters: p.realize()
|
|
if ADAM: optimizer = optim.Adam(parameters, lr=0.001)
|
|
else: optimizer = optim.SGD(parameters, lr=0.001)
|
|
|
|
Tensor.training = TRAINING
|
|
Tensor.no_grad = not BACKWARD
|
|
for i in trange(CNT):
|
|
GlobalCounters.reset()
|
|
cpy = time.monotonic()
|
|
x_train = Tensor.randn(BS, 3, 224, 224, requires_grad=False).realize()
|
|
y_train = Tensor.randn(BS, 1000, requires_grad=False).realize()
|
|
|
|
if i < 3 or not CLCACHE:
|
|
st = time.monotonic()
|
|
out = model.forward(x_train)
|
|
loss = out.logsoftmax().mul(y_train).mean()
|
|
if i == 2 and CLCACHE: GlobalCounters.cache = []
|
|
if BACKWARD:
|
|
optimizer.zero_grad()
|
|
loss.backward()
|
|
optimizer.step()
|
|
mt = time.monotonic()
|
|
loss.realize()
|
|
for p in parameters:
|
|
p.realize()
|
|
et = time.monotonic()
|
|
else:
|
|
st = mt = time.monotonic()
|
|
for prg, args in cl_cache: prg(*args)
|
|
et = time.monotonic()
|
|
|
|
if i == 2 and CLCACHE:
|
|
cl_cache = GlobalCounters.cache
|
|
|
|
mem_used = CL.mem_used
|
|
loss_cpu = loss.detach().numpy()[0]
|
|
cl = time.monotonic()
|
|
|
|
print(f"{(st-cpy)*1000.0:7.2f} ms cpy, {(cl-st)*1000.0:7.2f} ms run, {(mt-st)*1000.0:7.2f} ms build, {(et-mt)*1000.0:7.2f} ms realize, {(cl-et)*1000.0:7.2f} ms CL, {loss_cpu:7.2f} loss, {tensors_allocated():4d} tensors, {mem_used/1e9:.2f} GB used, {GlobalCounters.global_ops*1e-9/(cl-st):9.2f} GFLOPS")
|