mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
fixed pylint, formatted python files iwth cblack on localhost (#204)
* fixed pylint, formatted python files iwth cblack on localhost
* Revert "fixed pylint, formatted python files iwth cblack on localhost"
This reverts commit 07e2b88466.
* dedented 4-spaces added linter
Co-authored-by: Iain Wong <iainwong@outlook.com>
This commit is contained in:
27
.github/workflows/test.yml
vendored
27
.github/workflows/test.yml
vendored
@@ -34,13 +34,22 @@ jobs:
|
||||
run: python -m pytest -s -v
|
||||
|
||||
linter:
|
||||
name: Indentation Linter
|
||||
runs-on: ubuntu-latest
|
||||
name: Indentation Linter
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@v2
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install pylint
|
||||
# pip install -r requirements.txt
|
||||
- name: Lint with pylint
|
||||
run: |
|
||||
python -m pylint --disable=all -e W0311 --jobs=0 --indent-string=' ' **/*.py
|
||||
|
||||
steps:
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@v2
|
||||
- name: Install PyLint
|
||||
run: sudo apt-get install pylint
|
||||
- name: Validate indentation is 2 lines
|
||||
run: if [[ $(pylint --jobs=0 --indent-string=' ' * | grep "Bad indentation") ]]; then exit 1; fi
|
||||
|
||||
@@ -23,4 +23,3 @@ mlmodel = ct.models.MLModel(builder.spec)
|
||||
out = mlmodel.predict({"image": np.array([1337,0,0], dtype=np.float32)})
|
||||
print(out)
|
||||
mlmodel.save('test.mlmodel')
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ for c in a.headers[0].commands:
|
||||
|
||||
# this parser is wrong (fixed with 64-bit one)
|
||||
from macholib import SymbolTable
|
||||
sym = SymbolTable.SymbolTable(a)
|
||||
sym = SymbolTable.SymbolTable(a)
|
||||
|
||||
syms = {}
|
||||
for l in sym.nlists:
|
||||
@@ -96,4 +96,3 @@ for i in range(0, len(f2), 0x300):
|
||||
|
||||
#open("/tmp/data.section", "wb").write(f2)
|
||||
#print(compare(open("model.hwx.golden", "rb").read(), open("model.hwx", "rb").read()))
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import faulthandler
|
||||
faulthandler.enable()
|
||||
|
||||
libane = cdll.LoadLibrary(os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
os.path.dirname(os.path.abspath(__file__)),
|
||||
"libane.dylib"))
|
||||
|
||||
libane.ANE_Compile.argtypes = [c_char_p, c_int]
|
||||
@@ -71,4 +71,3 @@ if __name__ == "__main__":
|
||||
|
||||
print(tind)
|
||||
print(toutd)
|
||||
|
||||
|
||||
@@ -20,5 +20,3 @@ for dev in ["CPU", "GPU", "ANE"]:
|
||||
print("%s can do at least %.2f MEGAReLUs/sec" % (dev, (np.prod(boaa.shape)/1e6)/(et-st)))
|
||||
# decently reliable
|
||||
assert(np.all(boaa.cpu().data >= 0))
|
||||
|
||||
|
||||
|
||||
@@ -93,4 +93,3 @@ if __name__ == "__main__":
|
||||
print(np.argmax(out.data), np.max(out.data), lbls[np.argmax(out.data)])
|
||||
print("did inference in %.2f s" % (time.time()-st))
|
||||
#print("NOT", np.argmin(out.data), np.min(out.data), lbls[np.argmin(out.data)])
|
||||
|
||||
|
||||
@@ -39,108 +39,107 @@ class LinearDisc:
|
||||
def forward(self, x, train=True):
|
||||
x = x.dot(self.l1).leakyrelu(0.2)
|
||||
if train:
|
||||
x = x.dropout(0.3)
|
||||
x = x.dropout(0.3)
|
||||
x = x.dot(self.l2).leakyrelu(0.2)
|
||||
if train:
|
||||
x = x.dropout(0.3)
|
||||
x = x.dropout(0.3)
|
||||
x = x.dot(self.l3).leakyrelu(0.2)
|
||||
if train:
|
||||
x = x.dropout(0.3)
|
||||
x = x.dropout(0.3)
|
||||
x = x.dot(self.l4).logsoftmax()
|
||||
return x
|
||||
|
||||
if __name__ == "__main__":
|
||||
generator = LinearGen()
|
||||
discriminator = LinearDisc()
|
||||
batch_size = 512
|
||||
k = 1
|
||||
epochs = 300
|
||||
generator_params = get_parameters(generator)
|
||||
discriminator_params = get_parameters(discriminator)
|
||||
gen_loss = []
|
||||
disc_loss = []
|
||||
output_folder = "outputs"
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
train_data_size = len(X_train)
|
||||
ds_noise = Tensor(np.random.randn(64,128).astype(np.float32), gpu=GPU, requires_grad=False)
|
||||
n_steps = int(train_data_size/batch_size)
|
||||
if GPU:
|
||||
[x.cuda_() for x in generator_params+discriminator_params]
|
||||
# optimizers
|
||||
optim_g = optim.Adam(generator_params,lr=0.0002, b1=0.5) # 0.0002 for equilibrium!
|
||||
optim_d = optim.Adam(discriminator_params,lr=0.0002, b1=0.5)
|
||||
generator = LinearGen()
|
||||
discriminator = LinearDisc()
|
||||
batch_size = 512
|
||||
k = 1
|
||||
epochs = 300
|
||||
generator_params = get_parameters(generator)
|
||||
discriminator_params = get_parameters(discriminator)
|
||||
gen_loss = []
|
||||
disc_loss = []
|
||||
output_folder = "outputs"
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
train_data_size = len(X_train)
|
||||
ds_noise = Tensor(np.random.randn(64,128).astype(np.float32), gpu=GPU, requires_grad=False)
|
||||
n_steps = int(train_data_size/batch_size)
|
||||
if GPU:
|
||||
[x.cuda_() for x in generator_params+discriminator_params]
|
||||
# optimizers
|
||||
optim_g = optim.Adam(generator_params,lr=0.0002, b1=0.5) # 0.0002 for equilibrium!
|
||||
optim_d = optim.Adam(discriminator_params,lr=0.0002, b1=0.5)
|
||||
|
||||
def regularization_l2(model, a=1e-4):
|
||||
#TODO: l2 reg loss
|
||||
pass
|
||||
def regularization_l2(model, a=1e-4):
|
||||
#TODO: l2 reg loss
|
||||
pass
|
||||
|
||||
def generator_batch():
|
||||
idx = np.random.randint(0, X_train.shape[0], size=(batch_size))
|
||||
image_b = X_train[idx].reshape(-1, 28*28).astype(np.float32)/255.
|
||||
image_b = (image_b - 0.5)/0.5
|
||||
return Tensor(image_b, gpu=GPU)
|
||||
def generator_batch():
|
||||
idx = np.random.randint(0, X_train.shape[0], size=(batch_size))
|
||||
image_b = X_train[idx].reshape(-1, 28*28).astype(np.float32)/255.
|
||||
image_b = (image_b - 0.5)/0.5
|
||||
return Tensor(image_b, gpu=GPU)
|
||||
|
||||
def real_label(bs):
|
||||
y = np.zeros((bs,2), np.float32)
|
||||
y[range(bs), [1]*bs] = -2.0
|
||||
real_labels = Tensor(y, gpu=GPU)
|
||||
return real_labels
|
||||
def real_label(bs):
|
||||
y = np.zeros((bs,2), np.float32)
|
||||
y[range(bs), [1]*bs] = -2.0
|
||||
real_labels = Tensor(y, gpu=GPU)
|
||||
return real_labels
|
||||
|
||||
def fake_label(bs):
|
||||
y = np.zeros((bs,2), np.float32)
|
||||
y[range(bs), [0]*bs] = -2.0 # Can we do label smoothin? i.e -2.0 changed to -1.98789.
|
||||
fake_labels = Tensor(y, gpu=GPU)
|
||||
return fake_labels
|
||||
def fake_label(bs):
|
||||
y = np.zeros((bs,2), np.float32)
|
||||
y[range(bs), [0]*bs] = -2.0 # Can we do label smoothin? i.e -2.0 changed to -1.98789.
|
||||
fake_labels = Tensor(y, gpu=GPU)
|
||||
return fake_labels
|
||||
|
||||
def train_discriminator(optimizer, data_real, data_fake):
|
||||
real_labels = real_label(batch_size)
|
||||
fake_labels = fake_label(batch_size)
|
||||
def train_discriminator(optimizer, data_real, data_fake):
|
||||
real_labels = real_label(batch_size)
|
||||
fake_labels = fake_label(batch_size)
|
||||
|
||||
optimizer.zero_grad()
|
||||
optimizer.zero_grad()
|
||||
|
||||
output_real = discriminator.forward(data_real)
|
||||
loss_real = (output_real * real_labels).mean()
|
||||
output_real = discriminator.forward(data_real)
|
||||
loss_real = (output_real * real_labels).mean()
|
||||
|
||||
output_fake = discriminator.forward(data_fake)
|
||||
loss_fake = (output_fake * fake_labels).mean()
|
||||
output_fake = discriminator.forward(data_fake)
|
||||
loss_fake = (output_fake * fake_labels).mean()
|
||||
|
||||
loss_real.backward()
|
||||
loss_fake.backward()
|
||||
optimizer.step()
|
||||
return loss_real.cpu().data + loss_fake.cpu().data
|
||||
loss_real.backward()
|
||||
loss_fake.backward()
|
||||
optimizer.step()
|
||||
return loss_real.cpu().data + loss_fake.cpu().data
|
||||
|
||||
def train_generator(optimizer, data_fake):
|
||||
real_labels = real_label(batch_size)
|
||||
optimizer.zero_grad()
|
||||
output = discriminator.forward(data_fake)
|
||||
loss = (output * real_labels).mean()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
return loss.cpu().data
|
||||
|
||||
for epoch in tqdm(range(epochs)):
|
||||
loss_g = 0.0
|
||||
loss_d = 0.0
|
||||
print(f"Epoch {epoch} of {epochs}")
|
||||
for i in tqdm(range(n_steps)):
|
||||
image = generator_batch()
|
||||
for step in range(k): # Try with k = 5 or 7.
|
||||
noise = Tensor(np.random.randn(batch_size,128), gpu=GPU)
|
||||
data_fake = generator.forward(noise).detach()
|
||||
data_real = image
|
||||
loss_d_step = train_discriminator(optim_d, data_real, data_fake)
|
||||
loss_d += loss_d_step
|
||||
noise = Tensor(np.random.randn(batch_size,128), gpu=GPU)
|
||||
data_fake = generator.forward(noise)
|
||||
loss_g_step = train_generator(optim_g, data_fake)
|
||||
loss_g += loss_g_step
|
||||
fake_images = generator.forward(ds_noise).detach().cpu().data
|
||||
fake_images = (fake_images.reshape(-1, 1, 28, 28)+ 1) / 2 # 0 - 1 range.
|
||||
fake_images = make_grid(torch.tensor(fake_images))
|
||||
save_image(fake_images, os.path.join(output_folder,f"image_{epoch}.jpg"))
|
||||
epoch_loss_g = loss_g / n_steps
|
||||
epoch_loss_d = loss_d / n_steps
|
||||
print(f"EPOCH: Generator loss: {epoch_loss_g}, Discriminator loss: {epoch_loss_d}")
|
||||
else:
|
||||
print("Training Completed!")
|
||||
def train_generator(optimizer, data_fake):
|
||||
real_labels = real_label(batch_size)
|
||||
optimizer.zero_grad()
|
||||
output = discriminator.forward(data_fake)
|
||||
loss = (output * real_labels).mean()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
return loss.cpu().data
|
||||
|
||||
for epoch in tqdm(range(epochs)):
|
||||
loss_g = 0.0
|
||||
loss_d = 0.0
|
||||
print(f"Epoch {epoch} of {epochs}")
|
||||
for i in tqdm(range(n_steps)):
|
||||
image = generator_batch()
|
||||
for step in range(k): # Try with k = 5 or 7.
|
||||
noise = Tensor(np.random.randn(batch_size,128), gpu=GPU)
|
||||
data_fake = generator.forward(noise).detach()
|
||||
data_real = image
|
||||
loss_d_step = train_discriminator(optim_d, data_real, data_fake)
|
||||
loss_d += loss_d_step
|
||||
noise = Tensor(np.random.randn(batch_size,128), gpu=GPU)
|
||||
data_fake = generator.forward(noise)
|
||||
loss_g_step = train_generator(optim_g, data_fake)
|
||||
loss_g += loss_g_step
|
||||
fake_images = generator.forward(ds_noise).detach().cpu().data
|
||||
fake_images = (fake_images.reshape(-1, 1, 28, 28)+ 1) / 2 # 0 - 1 range.
|
||||
fake_images = make_grid(torch.tensor(fake_images))
|
||||
save_image(fake_images, os.path.join(output_folder,f"image_{epoch}.jpg"))
|
||||
epoch_loss_g = loss_g / n_steps
|
||||
epoch_loss_d = loss_d / n_steps
|
||||
print(f"EPOCH: Generator loss: {epoch_loss_g}, Discriminator loss: {epoch_loss_d}")
|
||||
else:
|
||||
print("Training Completed!")
|
||||
|
||||
@@ -29,9 +29,9 @@ class SqueezeExciteBlock2D:
|
||||
se = input.avg_pool2d(kernel_size=(input.shape[2], input.shape[3])) #GlobalAveragePool2D
|
||||
se = se.reshape(shape=(-1, self.filters))
|
||||
se = se.dot(self.weight1) + self.bias1
|
||||
se = se.relu()
|
||||
se = se.relu()
|
||||
se = se.dot(self.weight2) + self.bias2
|
||||
se = se.sigmoid().reshape(shape=(-1,self.filters,1,1)) #for broadcasting
|
||||
se = se.sigmoid().reshape(shape=(-1,self.filters,1,1)) #for broadcasting
|
||||
se = input.mul(se)
|
||||
return se
|
||||
|
||||
@@ -45,9 +45,9 @@ class ConvBlock:
|
||||
#init layers
|
||||
self._bn = BatchNorm2D(128, training=True)
|
||||
self._seb = SqueezeExciteBlock2D(filters)
|
||||
|
||||
|
||||
def __call__(self, input):
|
||||
x = input.reshape(shape=(-1, self.inp, self.w, self.h))
|
||||
x = input.reshape(shape=(-1, self.inp, self.w, self.h))
|
||||
for cweight, cbias in zip(self.cweights, self.cbiases):
|
||||
x = x.pad2d(padding=[1,1,1,1]).conv2d(cweight).add(cbias).relu()
|
||||
x = self._bn(x)
|
||||
@@ -80,7 +80,7 @@ class BigConvNet:
|
||||
|
||||
def load(self, filename):
|
||||
with open(filename+'.npy', 'rb') as f:
|
||||
for par in get_parameters(self):
|
||||
for par in get_parameters(self):
|
||||
#if par.requires_grad:
|
||||
try:
|
||||
par.cpu().data[:] = np.load(f)
|
||||
@@ -102,7 +102,7 @@ class BigConvNet:
|
||||
|
||||
if __name__ == "__main__":
|
||||
lrs = [1e-4, 1e-5] if QUICK else [1e-3, 1e-4, 1e-5, 1e-5]
|
||||
epochss = [2, 1] if QUICK else [13, 3, 3, 1]
|
||||
epochss = [2, 1] if QUICK else [13, 3, 3, 1]
|
||||
BS = 32
|
||||
|
||||
lmbd = 0.00025
|
||||
@@ -113,9 +113,9 @@ if __name__ == "__main__":
|
||||
if QUICK:
|
||||
steps = 1
|
||||
X_test, Y_test = X_test[:BS], Y_test[:BS]
|
||||
|
||||
|
||||
model = BigConvNet()
|
||||
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
try:
|
||||
model.load(sys.argv[1])
|
||||
@@ -123,7 +123,7 @@ if __name__ == "__main__":
|
||||
evaluate(model, X_test, Y_test, BS=BS)
|
||||
except:
|
||||
print('could not load weights "'+sys.argv[1]+'".')
|
||||
|
||||
|
||||
if GPU:
|
||||
params = get_parameters(model)
|
||||
[x.cuda_() for x in params]
|
||||
|
||||
@@ -40,7 +40,7 @@ if __name__ == "__main__":
|
||||
TRANSFER = os.getenv("TRANSFER") is not None
|
||||
if TINY:
|
||||
model = TinyConvNet(classes)
|
||||
elif TRANSFER:
|
||||
elif TRANSFER:
|
||||
model = EfficientNet(int(os.getenv("NUM", "0")), classes, has_se=True)
|
||||
model.load_weights_from_torch()
|
||||
else:
|
||||
@@ -93,4 +93,3 @@ if __name__ == "__main__":
|
||||
fp_time + bp_time + opt_time + finish_time))
|
||||
|
||||
del out, y, loss
|
||||
|
||||
|
||||
@@ -8,4 +8,3 @@ print(a.cpu())
|
||||
b = a.relu()
|
||||
print(b.cpu())
|
||||
assert(np.all(b.cpu().data >= 0))
|
||||
|
||||
|
||||
@@ -229,4 +229,3 @@ class EfficientNet:
|
||||
mv.data[:] = vnp
|
||||
else:
|
||||
print("MISMATCH SHAPE IN %s, %r %r" % (k, mv.shape, vnp.shape))
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ def train(model, X_train, Y_train, optim, steps, num_classes=None, BS=128, devic
|
||||
out = model.forward(x)
|
||||
|
||||
# NLL loss function
|
||||
loss = lossfn(out, y)
|
||||
loss = lossfn(out, y)
|
||||
optim.zero_grad()
|
||||
loss.backward()
|
||||
optim.step()
|
||||
@@ -48,4 +48,4 @@ def evaluate(model, X_test, Y_test, num_classes=None, device=Device.CPU, BS=128)
|
||||
if num_classes is None: num_classes = Y_test.max().astype(int)+1
|
||||
accuracy = numpy_eval(num_classes)
|
||||
print("test set accuracy is %f" % accuracy)
|
||||
return accuracy
|
||||
return accuracy
|
||||
|
||||
@@ -25,4 +25,3 @@ def get_parameters(obj):
|
||||
for k,v in obj.__dict__.items():
|
||||
parameters.extend(get_parameters(v))
|
||||
return parameters
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ from .config import ANE
|
||||
|
||||
def tensors_allocated():
|
||||
return sum([isinstance(x, Tensor) for x in gc.get_objects()])
|
||||
|
||||
|
||||
class TestGC(unittest.TestCase):
|
||||
device = Device.CPU
|
||||
|
||||
@@ -36,7 +36,7 @@ class TestGC(unittest.TestCase):
|
||||
|
||||
@unittest.skipUnless(GPU, "Requires GPU")
|
||||
class TestGCGPU(TestGC):
|
||||
device = Device.GPU
|
||||
device = Device.GPU
|
||||
|
||||
@unittest.skipUnless(ANE, "Requires ANE")
|
||||
class TestGCANE(TestGC):
|
||||
|
||||
@@ -105,4 +105,3 @@ class TestConvSpeedANE(TestConvSpeed):
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
|
||||
@@ -164,4 +164,3 @@ class TestOpsANE(TestOps):
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(verbosity=2)
|
||||
|
||||
|
||||
@@ -28,4 +28,3 @@ class BatchNorm2D:
|
||||
def normalize(self, x, mean, var):
|
||||
x = (x - mean.reshape(shape=[1, -1, 1, 1])) * self.weight.reshape(shape=[1, -1, 1, 1])
|
||||
return x.div(var.add(self.eps).reshape(shape=[1, -1, 1, 1])**0.5) + self.bias.reshape(shape=[1, -1, 1, 1])
|
||||
|
||||
|
||||
@@ -34,4 +34,3 @@ class ReLU(Function):
|
||||
ctx.ane.run(compile_relu(ctx.ane, input.sz), input, ret)
|
||||
return ret
|
||||
register('relu', ReLU, device=Tensor.ANE)
|
||||
|
||||
|
||||
@@ -129,8 +129,8 @@ class ReLU(Function):
|
||||
register('relu', ReLU)
|
||||
|
||||
def _exp_normalize(x, axis=None):
|
||||
y = np.exp(x - x.max(axis=axis, keepdims=True))
|
||||
return y / y.sum(axis=axis, keepdims=True)
|
||||
y = np.exp(x - x.max(axis=axis, keepdims=True))
|
||||
return y / y.sum(axis=axis, keepdims=True)
|
||||
|
||||
class Sigmoid(Function):
|
||||
@staticmethod
|
||||
@@ -155,7 +155,7 @@ class LogSoftmax(Function):
|
||||
def forward(ctx, input):
|
||||
softmax = _exp_normalize(input, axis=1)
|
||||
ctx.save_for_backward(softmax)
|
||||
return np.log(softmax)
|
||||
return np.log(softmax)
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
@@ -268,4 +268,3 @@ class AvgPool2D(Function):
|
||||
py, px = ctx.kernel_size
|
||||
return unstack_for_pool(lambda idx: grad_output/py/px, s, py, px)
|
||||
register('avg_pool2d', AvgPool2D)
|
||||
|
||||
|
||||
@@ -67,13 +67,13 @@ def get_binop_prg(cl_ctx, code, complist):
|
||||
ndims = len(complist)
|
||||
args = "".join([", int d%d" % i for i in range(ndims)]) + "".join([", int p%d" % i for i in range(ndims-1)])
|
||||
compute_idx_rets = ["\n int idx_ret"+str(i)+" = (gid0 / "+("p%d"%i if i < ndims-1 else "1")+") % d"+str(i)+";" for i in range(ndims)]
|
||||
|
||||
|
||||
idx_exprs = ["0", "0"] # [idx_x, idx_y]
|
||||
for i in range(ndims):
|
||||
for j in range(2):
|
||||
if complist[i][j]:
|
||||
idx_exprs[j] = "idx_ret%d + d%d*(%s)" % (i, i, idx_exprs[j])
|
||||
|
||||
|
||||
return cl.Program(cl_ctx, """__kernel void binop(__global const float *x_g, __global const float *y_g, __global float *res_g"""+args+""") {
|
||||
int gid0 = get_global_id(0);"""+"".join(compute_idx_rets)+"""
|
||||
float a = x_g["""+idx_exprs[0]+"""];
|
||||
@@ -88,7 +88,7 @@ def binary_op(ctx, code, x, y):
|
||||
if not np.all((shape_x == 1) | (shape_y == 1) | (shape_x == shape_y)):
|
||||
raise Exception(f"binary op unbroadcastable shape mismatch: {x.shape} vs {y.shape}")
|
||||
shape_ret = np.maximum(shape_x, shape_y)
|
||||
|
||||
|
||||
dimlist, complist = [], [] # note: len(dimlist) may be less than n_dims
|
||||
def push(dim, comp):
|
||||
if len(complist) > 0 and complist[-1] == comp:
|
||||
@@ -97,7 +97,7 @@ def binary_op(ctx, code, x, y):
|
||||
dimlist.append(dim); complist.append(comp)
|
||||
for i in range(n_dims): # group together any adjacent dimensions that we can to simplify broadcasting
|
||||
push(i32(max(shape_x[i], shape_y[i])), (shape_x[i] > 1, shape_y[i] > 1))
|
||||
|
||||
|
||||
prg = get_binop_prg(ctx.cl_ctx, code, tuple(complist))
|
||||
ret = buffer_new(ctx, shape_ret, zero=True)
|
||||
prod_list = np.array(dimlist, dtype=i32)[-1::-1].cumprod(dtype=i32)[-1::-1] # take cumprod from back to front
|
||||
@@ -121,7 +121,7 @@ def reduce_op(ctx, code, code2, inp, axis=None):
|
||||
osize = [1]*len(inp.shape)
|
||||
else:
|
||||
osize = np.array(inp.shape)
|
||||
osize[list(axis)] = 1
|
||||
osize[list(axis)] = 1
|
||||
ret = buffer_new(ctx, osize)
|
||||
if axis is None:
|
||||
ret.shape = (1,)
|
||||
|
||||
@@ -154,15 +154,15 @@ class Tensor:
|
||||
old = data
|
||||
data = np.empty(old.shape, dtype=np.float32)
|
||||
with ProfileOp("toCPU", [data]):
|
||||
cl.enqueue_copy(cl_queue, data, old.cl, is_blocking=True)
|
||||
cl.enqueue_copy(cl_queue, data, old.cl, is_blocking=True)
|
||||
|
||||
elif "ANETensor" in str(type(data)):
|
||||
if device == Device.ANE: return data
|
||||
with ProfileOp("toCPU", [data]):
|
||||
data = data.data().astype(np.float32)
|
||||
data = data.data().astype(np.float32)
|
||||
|
||||
if not isinstance(data, np.ndarray):
|
||||
data = np.array(data, dtype=np.float32)
|
||||
data = np.array(data, dtype=np.float32)
|
||||
|
||||
if data.dtype != np.float32 and not Tensor.did_float_warning:
|
||||
# warning? float64 is actually needed for numerical jacobian
|
||||
@@ -285,4 +285,3 @@ try:
|
||||
except ImportError:
|
||||
# no GPU support
|
||||
GPU = False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user