Files
tinygrad/examples/benchmark_train_efficientnet.py
Jacky Lee f08187526f Fix examples (#540)
* Fix examples

* Remove training in parameters

* Simplify a bit

* Remove extra import

* Fix linter errors

* factor out Device

* NumPy-like semantics for Tensor.__getitem__ (#506)

* Rewrote Tensor.__getitem__ to fix negative indices and add support for np.newaxis/None

* Fixed pad2d

* mypy doesn't know about mlops methods

* normal python behavior for out-of-bounds slicing

* type: ignore

* inlined idxfix

* added comment for __getitem__

* Better comments, better tests, and fixed bug in np.newaxis

* update cpu and torch to hold buffers (#542)

* update cpu and torch to hold buffers

* save lines, and probably faster

* Mypy fun (#541)

* mypy fun

* things are just faster

* running fast

* mypy is fast

* compile.sh

* no gpu hack

* refactor ops_cpu and ops_torch to not subclass

* make weak buffer work

* tensor works

* fix test failing

* cpu/torch cleanups

* no or operator on dict in python 3.8

* that was junk

* fix warnings

* comment and touchup

* dyn add of math ops

* refactor ops_cpu and ops_torch to not share code

* nn/optim.py compiles now

* Reorder imports

* call mkdir only if directory doesn't exist

---------

Co-authored-by: George Hotz <geohot@gmail.com>
Co-authored-by: Mitchell Goff <mitchellgoffpc@gmail.com>
Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
2023-02-10 12:09:37 -06:00

66 lines
2.1 KiB
Python

#!/usr/bin/env python3
import gc
import time
from tqdm import trange
from models.efficientnet import EfficientNet
from tinygrad.nn import optim
from tinygrad.tensor import Tensor
from tinygrad.runtime.opencl import CL
from tinygrad.ops import GlobalCounters
from tinygrad.helpers import getenv
def tensors_allocated():
return sum(isinstance(x, Tensor) for x in gc.get_objects())
NUM = getenv("NUM", 2)
BS = getenv("BS", 8)
CNT = getenv("CNT", 10)
BACKWARD = getenv("BACKWARD", 0)
TRAINING = getenv("TRAINING", 1)
ADAM = getenv("ADAM", 0)
CLCACHE = getenv("CLCACHE", 0)
if __name__ == "__main__":
print(f"NUM:{NUM} BS:{BS} CNT:{CNT}")
model = EfficientNet(NUM, classes=1000, has_se=False, track_running_stats=False)
parameters = optim.get_parameters(model)
for p in parameters: p.realize()
if ADAM: optimizer = optim.Adam(parameters, lr=0.001)
else: optimizer = optim.SGD(parameters, lr=0.001)
Tensor.training = TRAINING
Tensor.no_grad = not BACKWARD
for i in trange(CNT):
GlobalCounters.reset()
cpy = time.monotonic()
x_train = Tensor.randn(BS, 3, 224, 224, requires_grad=False).realize()
y_train = Tensor.randn(BS, 1000, requires_grad=False).realize()
if i < 3 or not CLCACHE:
st = time.monotonic()
out = model.forward(x_train)
loss = out.logsoftmax().mul(y_train).mean()
if i == 2 and CLCACHE: GlobalCounters.cache = []
if BACKWARD:
optimizer.zero_grad()
loss.backward()
optimizer.step()
mt = time.monotonic()
loss.realize()
for p in parameters:
p.realize()
et = time.monotonic()
else:
st = mt = time.monotonic()
for prg, args in cl_cache: prg(*args)
et = time.monotonic()
if i == 2 and CLCACHE:
cl_cache = GlobalCounters.cache
mem_used = CL.mem_used
loss_cpu = loss.detach().numpy()[0]
cl = time.monotonic()
print(f"{(st-cpy)*1000.0:7.2f} ms cpy, {(cl-st)*1000.0:7.2f} ms run, {(mt-st)*1000.0:7.2f} ms build, {(et-mt)*1000.0:7.2f} ms realize, {(cl-et)*1000.0:7.2f} ms CL, {loss_cpu:7.2f} loss, {tensors_allocated():4d} tensors, {mem_used/1e9:.2f} GB used, {GlobalCounters.global_ops*1e-9/(cl-st):9.2f} GFLOPS")