multi cl_queue (#762)

* multi cl_queue * only platforms 1 * gpus first, then cpus * put device on underlying buffer * cl_queue array
2026-02-08 05:35:11 -05:00 · 2023-05-03 12:15:28 -07:00
parent 7757f5fed2
commit 7ecf4dff68
4 changed files with 28 additions and 25 deletions
--- a/test/external/external_osx_profiling.py
+++ b/test/external/external_osx_profiling.py
@@ -10,16 +10,16 @@ prg = CLProgram("test", """__kernel void test(__global float *a, __global float
  int idx = get_global_id(0);
  a[idx] = b[idx] + c[idx];
 }""")
-prg.clprg(CL.cl_queue, [N,], None, a._cl, b._cl, c._cl)
+prg.clprg(CL.cl_queue[0], [N,], None, a._cl, b._cl, c._cl)

 t1 = time.monotonic_ns()
-e1 = prg.clprg(CL.cl_queue, [N,], None, a._cl, b._cl, c._cl)
-CL.cl_queue.finish()  # type: ignore
+e1 = prg.clprg(CL.cl_queue[0], [N,], None, a._cl, b._cl, c._cl)
+CL.synchronize()
 t2 = time.monotonic_ns()
 time.sleep(3)
 t3 = time.monotonic_ns()
-e2 = prg.clprg(CL.cl_queue, [N,], None, a._cl, b._cl, c._cl)
-CL.cl_queue.finish()  # type: ignore
+e2 = prg.clprg(CL.cl_queue[0], [N,], None, a._cl, b._cl, c._cl)
+CL.synchronize()
 t4 = time.monotonic_ns()

 print(e1.profile.queued)