mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 23:48:01 -05:00
Add tensorflow GEMM benchmark script (#1000)
* Modelled closely after the existing torch benchmark script but just adapted slightly for tensorflow
This commit is contained in:
33
extra/gemm/tf_gemm.py
Normal file
33
extra/gemm/tf_gemm.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import time
|
||||
import tensorflow as tf
|
||||
|
||||
gpus = tf.config.list_physical_devices('GPU')
|
||||
if gpus:
|
||||
try:
|
||||
# Currently, memory growth needs to be the same across GPUs
|
||||
for gpu in gpus:
|
||||
tf.config.experimental.set_memory_growth(gpu, True)
|
||||
logical_gpus = tf.config.list_logical_devices('GPU')
|
||||
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
|
||||
except RuntimeError as e:
|
||||
# Memory growth must be set before GPUs have been initialized
|
||||
print(e)
|
||||
|
||||
for dtype in [tf.float16, tf.float32]:
|
||||
for N in [256, 512, 1024, 2048, 4096, 8192]:
|
||||
FLOPS = N*N*N*2
|
||||
|
||||
b = tf.random.uniform((N, N), dtype=dtype)
|
||||
c = tf.random.uniform((N, N), dtype=dtype)
|
||||
|
||||
b = tf.Variable(b)
|
||||
c = tf.Variable(c)
|
||||
|
||||
def tf_prog(b, c):
|
||||
st = time.perf_counter()
|
||||
a = tf.matmul(b, c)
|
||||
tf.debugging.check_numerics(a, "Nan or Inf in result") # Ensures that the calculation is done.
|
||||
return time.perf_counter() - st
|
||||
|
||||
tm = min([tf_prog(b, c) for _ in range(20)])
|
||||
print(f"{N*N:10d} {tm*1e6:9.2f} us, would be {FLOPS*1e-9/tm:9.2f} GFLOPS {N:4d}x{N:4d}x{N:4d} matmul in {dtype}")
|
||||
Reference in New Issue
Block a user