mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 23:18:04 -05:00
adapting test/external/external_osx_profiling.py to the new code base (#2002)
* adapting external osx profiling * fixing dtype * fixing buffer size
This commit is contained in:
14
test/external/external_osx_profiling.py
vendored
14
test/external/external_osx_profiling.py
vendored
@@ -1,24 +1,24 @@
|
||||
from tinygrad.runtime.ops_gpu import CLProgram, CL, CLBuffer
|
||||
from tinygrad.helpers import dtypes
|
||||
import time
|
||||
|
||||
N = 1000000
|
||||
a = CLBuffer(N*4)
|
||||
b = CLBuffer(N*4)
|
||||
c = CLBuffer(N*4)
|
||||
a = CLBuffer(N, dtypes.float32)
|
||||
b = CLBuffer(N, dtypes.float32)
|
||||
c = CLBuffer(N, dtypes.float32)
|
||||
|
||||
prg = CLProgram("test", """__kernel void test(__global float *a, __global float *b, __global float *c) {
|
||||
int idx = get_global_id(0);
|
||||
a[idx] = b[idx] + c[idx];
|
||||
}""")
|
||||
prg.clprg(CL.cl_queue[0], [N,], None, a._cl, b._cl, c._cl)
|
||||
|
||||
prg.clprgs[0](CL.cl_queue[0], [N,], None, a._buf, b._buf, c._buf)
|
||||
t1 = time.monotonic_ns()
|
||||
e1 = prg.clprg(CL.cl_queue[0], [N,], None, a._cl, b._cl, c._cl)
|
||||
e1 = prg.clprgs[0](CL.cl_queue[0], [N,], None, a._buf, b._buf, c._buf)
|
||||
CL.synchronize()
|
||||
t2 = time.monotonic_ns()
|
||||
time.sleep(3)
|
||||
t3 = time.monotonic_ns()
|
||||
e2 = prg.clprg(CL.cl_queue[0], [N,], None, a._cl, b._cl, c._cl)
|
||||
e2 = prg.clprgs[0](CL.cl_queue[0], [N,], None, a._buf, b._buf, c._buf)
|
||||
CL.synchronize()
|
||||
t4 = time.monotonic_ns()
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from tinygrad.runtime.lib import RawBufferCopyInOut, LRUAllocator, RawBufferTran
|
||||
from tinygrad.codegen.kernel import LinearizerOptions
|
||||
from tinygrad.renderer.cstyle import uops_to_cstyle, CStyleLanguage
|
||||
|
||||
OSX_TIMING_RATIO = (125/3) if OSX else 1.0 # see test/external_osx_profiling.py to determine this ratio. it's in like GPU clocks or something
|
||||
OSX_TIMING_RATIO = (125/3) if OSX else 1.0 # see test/external/external_osx_profiling.py to determine this ratio. it's in like GPU clocks or something
|
||||
|
||||
# TODO: if you fork and exit the child process after creating anything with cl on AMD, it hangs on e.wait()
|
||||
ROCM_LLVM_PATH = pathlib.Path("/opt/rocm/llvm/bin")
|
||||
|
||||
Reference in New Issue
Block a user