mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
rawcpu (#365)
* rawcpu * add should work when we respect shapetracker * now that's true * still have to handle shapetracker * copyin * Fix mypy
This commit is contained in:
63
accel/rawcpu/buffer.pyx
Normal file
63
accel/rawcpu/buffer.pyx
Normal file
@@ -0,0 +1,63 @@
|
||||
from cbuffer cimport CBuffer
|
||||
cimport numpy as np
|
||||
import numpy as np
|
||||
from typing import Tuple
|
||||
from tinygrad.helpers import prod
|
||||
from tinygrad.ops import UnaryOps, BinaryOps, MovementOps, ReduceOps
|
||||
from tinygrad.shapetracker import ShapeTracker
|
||||
|
||||
cdef class RawCPUBuffer:
|
||||
cdef CBuffer *buf
|
||||
st: ShapeTracker
|
||||
|
||||
def __init__(self, shape, RawCPUBuffer parent=None):
|
||||
# TODO: copied from ops gpu, generic this?
|
||||
self.st = shape if isinstance(shape, ShapeTracker) else ShapeTracker(tuple(shape))
|
||||
if parent is not None: self.buf = parent.buf
|
||||
else: self.buf = new CBuffer(prod(shape))
|
||||
|
||||
@property
|
||||
def shape(self): return self.st.shape
|
||||
|
||||
def contiguous_op(RawCPUBuffer x) -> RawCPUBuffer:
|
||||
return x if x.st.contiguous else x.unary_op(UnaryOps.NOOP)
|
||||
|
||||
@staticmethod
|
||||
def fromCPU(np.ndarray x):
|
||||
ret = RawCPUBuffer([x.shape[i] for i in range(x.ndim)])
|
||||
ret.buf.copyin(x.data)
|
||||
return ret
|
||||
|
||||
def toCPU(RawCPUBuffer self):
|
||||
x: RawCPUBuffer
|
||||
print("toCPU", self.buf.size, self.st)
|
||||
x = self.contiguous_op()
|
||||
buf = memoryview(<float[:prod(x.shape)]> x.buf.buf)
|
||||
return np.frombuffer(buf, dtype=np.float32).reshape(x.shape)
|
||||
|
||||
# 1 free generic op same as GPU (superclass with shapetracker?)
|
||||
|
||||
def movement_op(RawCPUBuffer x, op, arg): return type(x)(ShapeTracker(x.st).movement_op(op, arg), x)
|
||||
|
||||
# 3 actual ops
|
||||
|
||||
REQUIRES_SIMPLE_REDUCE = True
|
||||
def reduce_op(RawCPUBuffer x, op:ReduceOps, new_shape:Tuple[int, ...]):
|
||||
return x
|
||||
|
||||
def unary_op(RawCPUBuffer x, op):
|
||||
print(op, x.st)
|
||||
return x
|
||||
|
||||
# TODO: shape/strides for x and y combined
|
||||
def binary_op(RawCPUBuffer x, op, RawCPUBuffer y):
|
||||
print(op, x.st, y.st)
|
||||
ret = RawCPUBuffer(x.shape)
|
||||
ret.buf = new CBuffer(prod(x.shape))
|
||||
if op == BinaryOps.ADD: ret.buf.add(x.buf, y.buf)
|
||||
elif op == BinaryOps.MUL: ret.buf.mul(x.buf, y.buf)
|
||||
else: raise NotImplementedError()
|
||||
# TODO: write binary op in c++
|
||||
return ret
|
||||
|
||||
# can all be combined into _processing_op
|
||||
10
accel/rawcpu/buffer.pyxbld
Normal file
10
accel/rawcpu/buffer.pyxbld
Normal file
@@ -0,0 +1,10 @@
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
def make_ext(modname, pyxfilename):
|
||||
from distutils.extension import Extension
|
||||
include_dir = os.path.dirname(pyxfilename)
|
||||
return Extension(name=modname,
|
||||
include_dirs=[include_dir, np.get_include()],
|
||||
sources=[pyxfilename],
|
||||
language='c++')
|
||||
26
accel/rawcpu/cbuffer.h
Normal file
26
accel/rawcpu/cbuffer.h
Normal file
@@ -0,0 +1,26 @@
|
||||
class CBuffer {
|
||||
public:
|
||||
CBuffer(int size_, void* dat = NULL) {
|
||||
size = size_;
|
||||
buf = (float*)malloc(size*4);
|
||||
}
|
||||
|
||||
void copyin(void *dat) {
|
||||
memcpy(buf, dat, size*4);
|
||||
}
|
||||
|
||||
void add(CBuffer *x, CBuffer *y) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
buf[i] = x->buf[i] + y->buf[i];
|
||||
}
|
||||
}
|
||||
|
||||
void mul(CBuffer *x, CBuffer *y) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
buf[i] = x->buf[i] * y->buf[i];
|
||||
}
|
||||
}
|
||||
|
||||
float *buf;
|
||||
int size;
|
||||
};
|
||||
12
accel/rawcpu/cbuffer.pxd
Normal file
12
accel/rawcpu/cbuffer.pxd
Normal file
@@ -0,0 +1,12 @@
|
||||
# distutils: language = c++
|
||||
# distutils: sources = cbuffer.h
|
||||
|
||||
cdef extern from "cbuffer.h":
|
||||
cdef cppclass CBuffer:
|
||||
CBuffer(int size)
|
||||
void copyin(void *dat)
|
||||
void add(CBuffer *a, CBuffer *b)
|
||||
void mul(CBuffer *a, CBuffer *b)
|
||||
float *buf
|
||||
int size
|
||||
|
||||
9
accel/rawcpu/ops_rawcpu.py
Normal file
9
accel/rawcpu/ops_rawcpu.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# type: ignore
|
||||
import sys
|
||||
|
||||
# only pyximport this
|
||||
import pyximport
|
||||
py_importer, pyx_importer = pyximport.install()
|
||||
from accel.rawcpu.buffer import RawCPUBuffer
|
||||
sys.meta_path.remove(pyx_importer)
|
||||
|
||||
@@ -115,6 +115,7 @@ class GPUBuffer:
|
||||
def reduce_op(x, op:ReduceOps, new_shape:Tuple[int, ...]): return type(x)(new_shape)._processing_op([("A", x)], code="acc", earlycode=GPUBuffer.code_for_op[op], earlybufs=set("A"), start=GPUBuffer.start_for_op[op])
|
||||
|
||||
#REQUIRES_SIMPLE_REDUCE = True
|
||||
# is there a downside to REQUIRES_SIMPLE_REDUCE always?
|
||||
def _processing_op(ret, bufs: List[Tuple[str, GPUBuffer]]=[], code:str="acc", C:Optional[ConvArgs]=None, start="0.0", reduce_shape=None, earlybufs:Set[str]=set(), earlycode:str="acc") -> GPUBuffer:
|
||||
assert C is None
|
||||
|
||||
|
||||
1
tinygrad/llops/ops_rawcpu.py
Symbolic link
1
tinygrad/llops/ops_rawcpu.py
Symbolic link
@@ -0,0 +1 @@
|
||||
../../accel/rawcpu/ops_rawcpu.py
|
||||
@@ -251,7 +251,7 @@ class LazyBuffer:
|
||||
if getattr(x.dbuffer, "REQUIRES_SIMPLE_REDUCE", False) and (len(new_shape) != 2 or new_shape[1] != 1):
|
||||
num, red = prod([s for s,n in zip(x.shape, new_shape) if n != 1]), prod([s for s,n in zip(x.shape, new_shape) if n == 1])
|
||||
x = x.movement_op(MovementOps.PERMUTE, [i for i,n in enumerate(new_shape) if n != 1] + [i for i,n in enumerate(new_shape) if n == 1])
|
||||
x = x.movement_op(MovementOps.RESHAPE, (num, red))
|
||||
x = x.movement_op(MovementOps.RESHAPE, (num, red)) # remove this reshape, at the end is enough
|
||||
return x.reduce_op(op, (num, 1)).movement_op(MovementOps.RESHAPE, new_shape)
|
||||
else:
|
||||
return LazyBuffer(x.device, tuple(new_shape), ReduceOps, LazyOp(op, (x,), tuple(new_shape)))
|
||||
|
||||
@@ -68,6 +68,7 @@ def view_from_shape(shape:Tuple[int, ...]):
|
||||
class ShapeTracker:
|
||||
def __init__(self, shape:Union[ShapeTracker, Tuple[int, ...]]):
|
||||
self.views : List[ViewTypes] = shape.views[:] if isinstance(shape, ShapeTracker) else [view_from_shape(shape)]
|
||||
def __repr__(self): return f"{'Complex' if len(self.views) > 1 else ''}ShapeTracker<{self.shape}, {self.views}>"
|
||||
|
||||
@property
|
||||
def contiguous(self): return len(self.views) == 1 and self.views[-1].contiguous
|
||||
@@ -85,11 +86,14 @@ class ShapeTracker:
|
||||
def movement_op(self, op, arg): getattr(self, str(op).split(".")[1].lower())(*arg); return self
|
||||
def needs_valid(self): return any(isinstance(v, ZeroView) for v in self.views)
|
||||
|
||||
# TODO: this is not really needed, only for testing
|
||||
def __getitem__(self, val):
|
||||
locals = {"idx": val, "valid": 1}
|
||||
exec(self.expr(), None, locals)
|
||||
return locals["idx"] if locals["valid"] else -1
|
||||
|
||||
# TODO: do we really need this for conv?
|
||||
# if we replace, confirm the ops taken fold into one view
|
||||
def strided(self, *arg):
|
||||
view = View([x[0] for x in arg], [x[1] for x in arg])
|
||||
if self.contiguous: self.views[-1] = view
|
||||
|
||||
Reference in New Issue
Block a user