use copyin (#2500)

* it's always copyin

* all RawBuffer are RawBufferCopyIn

* cleanups

* this fixes it

* requirements='C'

* more correct
This commit is contained in:
George Hotz
2023-11-29 09:34:00 -08:00
committed by GitHub
parent 947711a532
commit 6707f2588e
8 changed files with 22 additions and 36 deletions

View File

@@ -25,10 +25,8 @@ def atan2_gpu(ret:LazyBuffer, a:LazyBuffer, b:LazyBuffer):
int idx = get_global_id(0);
c[idx] = atan2(a[idx], b[idx]);
}""", global_size=[prod(ret.shape)]).build(Device[ret.device].compiler, Device[ret.device].runtime).exec([ret.realized, a.realized, b.realized])
return ret.realized
def atan2_cpu(ret:LazyBuffer, a:LazyBuffer, b:LazyBuffer):
return Device[ret.device].buffer.fromCPU(np.arctan2(a.realized._buf, b.realized._buf))
def atan2_cpu(ret:LazyBuffer, a:LazyBuffer, b:LazyBuffer): ret.realized._copyin(np.arctan2(a.realized._buf, b.realized._buf))
# *** second, we write the ATan2 mlop ***
# NOTE: The derivative of atan2 doesn't need a custom op! https://www.liquisearch.com/atan2/derivative