adapting test/external/external_osx_profiling.py to the new code base (#2002)

* adapting external osx profiling

* fixing dtype

* fixing buffer size
This commit is contained in:
Luca Sciarpa
2023-10-08 14:55:00 +02:00
committed by GitHub
parent c4e8ea73bd
commit e93e240a6c
2 changed files with 8 additions and 8 deletions

View File

@@ -1,24 +1,24 @@
from tinygrad.runtime.ops_gpu import CLProgram, CL, CLBuffer
from tinygrad.helpers import dtypes
import time
N = 1000000
a = CLBuffer(N*4)
b = CLBuffer(N*4)
c = CLBuffer(N*4)
a = CLBuffer(N, dtypes.float32)
b = CLBuffer(N, dtypes.float32)
c = CLBuffer(N, dtypes.float32)
prg = CLProgram("test", """__kernel void test(__global float *a, __global float *b, __global float *c) {
int idx = get_global_id(0);
a[idx] = b[idx] + c[idx];
}""")
prg.clprg(CL.cl_queue[0], [N,], None, a._cl, b._cl, c._cl)
prg.clprgs[0](CL.cl_queue[0], [N,], None, a._buf, b._buf, c._buf)
t1 = time.monotonic_ns()
e1 = prg.clprg(CL.cl_queue[0], [N,], None, a._cl, b._cl, c._cl)
e1 = prg.clprgs[0](CL.cl_queue[0], [N,], None, a._buf, b._buf, c._buf)
CL.synchronize()
t2 = time.monotonic_ns()
time.sleep(3)
t3 = time.monotonic_ns()
e2 = prg.clprg(CL.cl_queue[0], [N,], None, a._cl, b._cl, c._cl)
e2 = prg.clprgs[0](CL.cl_queue[0], [N,], None, a._buf, b._buf, c._buf)
CL.synchronize()
t4 = time.monotonic_ns()

View File

@@ -9,7 +9,7 @@ from tinygrad.runtime.lib import RawBufferCopyInOut, LRUAllocator, RawBufferTran
from tinygrad.codegen.kernel import LinearizerOptions
from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
OSX_TIMING_RATIO = (125/3) if OSX else 1.0 # see test/external_osx_profiling.py to determine this ratio. it's in like GPU clocks or something
OSX_TIMING_RATIO = (125/3) if OSX else 1.0 # see test/external/external_osx_profiling.py to determine this ratio. it's in like GPU clocks or something
# TODO: if you fork and exit the child process after creating anything with cl on AMD, it hangs on e.wait()
ROCM_LLVM_PATH = pathlib.Path("/opt/rocm/llvm/bin")