Files
MP-SPDZ/Compiler/ml.py

816 lines
29 KiB
Python

import mpc_math, math
from Compiler.types import *
from Compiler.types import _unreduced_squant
from Compiler.library import *
from functools import reduce
def log_e(x):
return mpc_math.log_fx(x, math.e)
def exp(x):
return mpc_math.pow_fx(math.e, x)
def sanitize(x, raw, lower, upper):
exp_limit = 2 ** (x.k - x.f - 1)
limit = math.log(exp_limit)
if get_program().options.ring:
res = raw
else:
res = (x > limit).if_else(upper, raw)
return (x < -limit).if_else(lower, res)
def sigmoid(x):
return sigmoid_from_e_x(x, exp(-x))
def sigmoid_from_e_x(x, e_x):
return sanitize(x, 1 / (1 + e_x), 0, 1)
def sigmoid_prime(x):
sx = sigmoid(x)
return sx * (1 - sx)
def lse_0_from_e_x(x, e_x):
return sanitize(-x, log_e(1 + e_x), x + 2 ** -x.f, 0)
def lse_0(x):
return lse_0_from_e_x(x, exp(x))
def relu_prime(x):
return (0 <= x)
def relu(x):
return (0 < x).if_else(x, 0)
def progress(x):
return
print_ln(x)
time()
def set_n_threads(n_threads):
Layer.n_threads = n_threads
Optimizer.n_threads = n_threads
class Layer:
n_threads = 1
class Output(Layer):
def __init__(self, N, debug=False):
self.N = N
self.X = sfix.Array(N)
self.Y = sfix.Array(N)
self.nabla_X = sfix.Array(N)
self.l = MemValue(sfix(-1))
self.e_x = sfix.Array(N)
self.debug = debug
self.weights = cint.Array(N)
self.weights.assign_all(1)
self.weight_total = N
nablas = lambda self: ()
thetas = lambda self: ()
reset = lambda self: None
def divisor(self, divisor, size):
return cfix(1.0 / divisor, size=size)
def forward(self, N=None):
N = N or self.N
lse = sfix.Array(N)
@multithread(self.n_threads, N)
def _(base, size):
x = self.X.get_vector(base, size)
y = self.Y.get_vector(base, size)
e_x = exp(-x)
self.e_x.assign(e_x, base)
lse.assign(lse_0_from_e_x(-x, e_x) + x * (1 - y), base)
e_x = self.e_x.get_vector(0, N)
self.l.write(sum(lse) * \
self.divisor(self.N, 1))
def backward(self):
@multithread(self.n_threads, self.N)
def _(base, size):
diff = sigmoid_from_e_x(self.X.get_vector(base, size),
self.e_x.get_vector(base, size)) - \
self.Y.get_vector(base, size)
assert sfix.f == cfix.f
diff *= self.weights.get_vector(base, size)
self.nabla_X.assign(diff * self.divisor(self.weight_total, size), \
base)
# @for_range_opt(len(diff))
# def _(i):
# self.nabla_X[i] = self.nabla_X[i] * self.weights[i]
if self.debug:
a = cfix.Array(len(diff))
a.assign(diff.reveal())
@for_range(len(diff))
def _(i):
x = a[i]
print_ln_if((x < -1.001) + (x > 1.001), 'sigmoid')
#print_ln('%s', x)
def set_weights(self, weights):
self.weights.assign(weights)
self.weight_total = sum(weights)
class DenseBase(Layer):
thetas = lambda self: (self.W, self.b)
nablas = lambda self: (self.nabla_W, self.nabla_b)
def backward_params(self, f_schur_Y):
N = self.N
tmp = Matrix(self.d_in, self.d_out, unreduced_sfix)
@for_range_opt_multithread(self.n_threads, [self.d_in, self.d_out])
def _(j, k):
assert self.d == 1
a = [f_schur_Y[i][0][k] for i in range(N)]
b = [self.X[i][0][j] for i in range(N)]
tmp[j][k] = sfix.unreduced_dot_product(a, b)
if self.d_in * self.d_out < 100000:
print('reduce at once')
@multithread(self.n_threads, self.d_in * self.d_out)
def _(base, size):
self.nabla_W.assign_vector(
tmp.get_vector(base, size).reduce_after_mul(), base=base)
else:
@for_range_opt(self.d_in)
def _(i):
self.nabla_W[i] = tmp[i].get_vector().reduce_after_mul()
self.nabla_b.assign(sum(sum(f_schur_Y[k][j][i] for k in range(N))
for j in range(self.d)) for i in range(self.d_out))
progress('nabla W/b')
class Dense(DenseBase):
def __init__(self, N, d_in, d_out, d=1, activation='id'):
self.activation = activation
if activation == 'id':
self.f = lambda x: x
elif activation == 'relu':
self.f = relu
self.f_prime = relu_prime
elif activation == 'sigmoid':
self.f = sigmoid
self.f_prime = sigmoid_prime
self.N = N
self.d_in = d_in
self.d_out = d_out
self.d = d
self.X = MultiArray([N, d, d_in], sfix)
self.Y = MultiArray([N, d, d_out], sfix)
self.W = sfix.Matrix(d_in, d_out)
self.b = sfix.Array(d_out)
self.reset()
self.nabla_Y = MultiArray([N, d, d_out], sfix)
self.nabla_X = MultiArray([N, d, d_in], sfix)
self.nabla_W = sfix.Matrix(d_in, d_out)
self.nabla_W.assign_all(0)
self.nabla_b = sfix.Array(d_out)
self.f_input = MultiArray([N, d, d_out], sfix)
def reset(self):
d_in = self.d_in
d_out = self.d_out
r = math.sqrt(6.0 / (d_in + d_out))
@for_range(d_in)
def _(i):
@for_range(d_out)
def _(j):
self.W[i][j] = sfix.get_random(-r, r)
self.b.assign_all(0)
def compute_f_input(self):
prod = MultiArray([self.N, self.d, self.d_out], sfix)
@for_range_opt_multithread(self.n_threads, self.N)
def _(i):
self.X[i].plain_mul(self.W, res=prod[i])
@for_range_opt_multithread(self.n_threads, self.N)
def _(i):
@for_range_opt(self.d)
def _(j):
v = prod[i][j].get_vector() + self.b.get_vector()
self.f_input[i][j].assign(v)
progress('f input')
def forward(self):
self.compute_f_input()
self.Y.assign_vector(self.f(self.f_input.get_vector()))
def backward(self, compute_nabla_X=True):
N = self.N
d = self.d
d_out = self.d_out
X = self.X
Y = self.Y
W = self.W
b = self.b
nabla_X = self.nabla_X
nabla_Y = self.nabla_Y
nabla_W = self.nabla_W
nabla_b = self.nabla_b
if self.activation == 'id':
f_schur_Y = nabla_Y
else:
f_prime_bit = MultiArray([N, d, d_out], sint)
f_schur_Y = MultiArray([N, d, d_out], sfix)
self.compute_f_input()
f_prime_bit.assign_vector(self.f_prime(self.f_input.get_vector()))
progress('f prime')
@for_range_opt(N)
def _(i):
f_schur_Y[i] = nabla_Y[i].schur(f_prime_bit[i])
progress('f prime schur Y')
if compute_nabla_X:
@for_range_opt(N)
def _(i):
if self.activation == 'id':
nabla_X[i] = nabla_Y[i].mul_trans(W)
else:
nabla_X[i] = nabla_Y[i].schur(f_prime_bit[i]).mul_trans(W)
progress('nabla X')
self.backward_params(f_schur_Y)
class QuantizedDense(DenseBase):
def __init__(self, N, d_in, d_out):
self.N = N
self.d_in = d_in
self.d_out = d_out
self.d = 1
self.H = math.sqrt(1.5 / (d_in + d_out))
self.W = sfix.Matrix(d_in, d_out)
self.nabla_W = self.W.same_shape()
self.T = sint.Matrix(d_in, d_out)
self.b = sfix.Array(d_out)
self.nabla_b = self.b.same_shape()
self.X = MultiArray([N, 1, d_in], sfix)
self.Y = MultiArray([N, 1, d_out], sfix)
self.nabla_Y = self.Y.same_shape()
def reset(self):
@for_range(self.d_in)
def _(i):
@for_range(self.d_out)
def _(j):
self.W[i][j] = sfix.get_random(-1, 1)
self.b.assign_all(0)
def forward(self):
@for_range_opt(self.d_in)
def _(i):
@for_range_opt(self.d_out)
def _(j):
over = self.W[i][j] > 0.5
under = self.W[i][j] < -0.5
self.T[i][j] = over.if_else(1, under.if_else(-1, 0))
over = self.W[i][j] > 1
under = self.W[i][j] < -1
self.W[i][j] = over.if_else(1, under.if_else(-1, self.W[i][j]))
@for_range_opt(self.N)
def _(i):
assert self.d_out == 1
self.Y[i][0][0] = self.b[0] + self.H * sfix._new(
sint.dot_product([self.T[j][0] for j in range(self.d_in)],
[self.X[i][0][j].v for j in range(self.d_in)]))
def backward(self, compute_nabla_X=False):
assert not compute_nabla_X
self.backward_params(self.nabla_Y)
class Dropout:
def __init__(self, N, d1, d2=1):
self.N = N
self.d1 = d1
self.d2 = d2
self.X = MultiArray([N, d1, d2], sfix)
self.Y = MultiArray([N, d1, d2], sfix)
self.nabla_Y = MultiArray([N, d1, d2], sfix)
self.nabla_X = MultiArray([N, d1, d2], sfix)
self.alpha = 0.5
self.B = MultiArray([N, d1, d2], sint)
def forward(self):
assert self.alpha == 0.5
@for_range(self.N)
def _(i):
@for_range(self.d1)
def _(j):
@for_range(self.d2)
def _(k):
self.B[i][j][k] = sint.get_random_bit()
self.Y = self.X.schur(self.B)
def backward(self):
self.nabla_X = self.nabla_Y.schur(self.B)
class QuantBase(object):
n_threads = 1
@staticmethod
def new_squant():
class _(squant):
@classmethod
def get_input_from(cls, player, size=None):
return cls._new(sint.get_input_from(player, size=size))
return _
def __init__(self, input_shape, output_shape):
self.input_shape = input_shape
self.output_shape = output_shape
self.input_squant = self.new_squant()
self.output_squant = self.new_squant()
self.X = MultiArray(input_shape, self.input_squant)
self.Y = MultiArray(output_shape, self.output_squant)
def temp_shape(self):
return [0]
class QuantConvBase(QuantBase):
fewer_rounds = True
temp_weights = None
temp_inputs = None
@classmethod
def init_temp(cls, layers):
size = 0
for layer in layers:
size = max(size, reduce(operator.mul, layer.temp_shape()))
cls.temp_weights = sfix.Array(size)
cls.temp_inputs = sfix.Array(size)
def __init__(self, input_shape, weight_shape, bias_shape, output_shape, stride):
super(QuantConvBase, self).__init__(input_shape, output_shape)
self.weight_shape = weight_shape
self.bias_shape = bias_shape
self.stride = stride
self.weight_squant = self.new_squant()
self.bias_squant = self.new_squant()
self.weights = MultiArray(weight_shape, self.weight_squant)
self.bias = Array(output_shape[-1], self.bias_squant)
self.unreduced = MultiArray(self.output_shape, sint,
address=self.Y.address)
assert(weight_shape[-1] == input_shape[-1])
assert(bias_shape[0] == output_shape[-1])
assert(len(bias_shape) == 1)
assert(len(input_shape) == 4)
assert(len(output_shape) == 4)
assert(len(weight_shape) == 4)
def input_from(self, player):
for s in self.input_squant, self.weight_squant, self.bias_squant, self.output_squant:
s.set_params(sfloat.get_input_from(player), sint.get_input_from(player))
self.weights.input_from(player, budget=100000)
self.bias.input_from(player)
print('WARNING: assuming that bias quantization parameters are correct')
self.output_squant.params.precompute(self.input_squant.params, self.weight_squant.params)
def dot_product(self, iv, wv, out_y, out_x, out_c):
bias = self.bias[out_c]
acc = squant.unreduced_dot_product(iv, wv)
acc.v += bias.v
acc.res_params = self.output_squant.params
#self.Y[0][out_y][out_x][out_c] = acc.reduce_after_mul()
self.unreduced[0][out_y][out_x][out_c] = acc.v
def reduction(self):
unreduced = self.unreduced
n_summands = self.n_summands()
start_timer(2)
n_outputs = reduce(operator.mul, self.output_shape)
if n_outputs % self.n_threads == 0:
n_per_thread = n_outputs // self.n_threads
@for_range_opt_multithread(self.n_threads, self.n_threads)
def _(i):
res = _unreduced_squant(
sint.load_mem(unreduced.address + i * n_per_thread,
size=n_per_thread),
(self.input_squant.params, self.weight_squant.params),
self.output_squant.params,
n_summands).reduce_after_mul()
res.store_in_mem(self.Y.address + i * n_per_thread)
else:
@for_range_opt_multithread(self.n_threads, self.output_shape[1])
def _(out_y):
self.Y[0][out_y].assign_vector(_unreduced_squant(
unreduced[0][out_y].get_vector(),
(self.input_squant.params, self.weight_squant.params),
self.output_squant.params,
n_summands).reduce_after_mul())
stop_timer(2)
def temp_shape(self):
return list(self.output_shape[1:]) + [self.n_summands()]
def prepare_temp(self):
shape = self.temp_shape()
inputs = MultiArray(shape, self.input_squant,
address=self.temp_inputs)
weights = MultiArray(shape, self.weight_squant,
address=self.temp_weights)
return inputs, weights
class QuantConv2d(QuantConvBase):
def n_summands(self):
_, weights_h, weights_w, _ = self.weight_shape
_, inputs_h, inputs_w, n_channels_in = self.input_shape
return weights_h * weights_w * n_channels_in
def forward(self, N=1):
assert(N == 1)
assert(self.weight_shape[0] == self.output_shape[-1])
_, weights_h, weights_w, _ = self.weight_shape
_, inputs_h, inputs_w, n_channels_in = self.input_shape
_, output_h, output_w, n_channels_out = self.output_shape
stride_h, stride_w = self.stride
padding_h, padding_w = (weights_h // 2, weights_w // 2)
if self.fewer_rounds:
inputs, weights = self.prepare_temp()
@for_range_opt_multithread(self.n_threads,
[output_h, output_w, n_channels_out])
def _(out_y, out_x, out_c):
in_x_origin = (out_x * stride_w) - padding_w
in_y_origin = (out_y * stride_h) - padding_h
iv = []
wv = []
for filter_y in range(weights_h):
in_y = in_y_origin + filter_y
inside_y = (0 <= in_y) * (in_y < inputs_h)
for filter_x in range(weights_w):
in_x = in_x_origin + filter_x
inside_x = (0 <= in_x) * (in_x < inputs_w)
inside = inside_y * inside_x
if inside is 0:
continue
for in_c in range(n_channels_in):
iv += [self.X[0][in_y * inside_y]
[in_x * inside_x][in_c]]
wv += [self.weights[out_c][filter_y][filter_x][in_c]]
wv[-1] *= inside
if self.fewer_rounds:
inputs[out_y][out_x][out_c].assign(iv)
weights[out_y][out_x][out_c].assign(wv)
else:
self.dot_product(iv, wv, out_y, out_x, out_c)
if self.fewer_rounds:
@for_range_opt_multithread(self.n_threads,
list(self.output_shape[1:]))
def _(out_y, out_x, out_c):
self.dot_product(inputs[out_y][out_x][out_c],
weights[out_y][out_x][out_c],
out_y, out_x, out_c)
self.reduction()
class QuantDepthwiseConv2d(QuantConvBase):
def n_summands(self):
_, weights_h, weights_w, _ = self.weight_shape
return weights_h * weights_w
def forward(self, N=1):
assert(N == 1)
assert(self.weight_shape[-1] == self.output_shape[-1])
assert(self.input_shape[-1] == self.output_shape[-1])
_, weights_h, weights_w, _ = self.weight_shape
_, inputs_h, inputs_w, n_channels_in = self.input_shape
_, output_h, output_w, n_channels_out = self.output_shape
stride_h, stride_w = self.stride
padding_h, padding_w = (weights_h // 2, weights_w // 2)
depth_multiplier = 1
if self.fewer_rounds:
inputs, weights = self.prepare_temp()
@for_range_opt_multithread(self.n_threads,
[output_h, output_w, n_channels_in])
def _(out_y, out_x, in_c):
for m in range(depth_multiplier):
oc = m + in_c * depth_multiplier
in_x_origin = (out_x * stride_w) - padding_w
in_y_origin = (out_y * stride_h) - padding_h
iv = []
wv = []
for filter_y in range(weights_h):
for filter_x in range(weights_w):
in_x = in_x_origin + filter_x
in_y = in_y_origin + filter_y
inside = (0 <= in_x) * (in_x < inputs_w) * \
(0 <= in_y) * (in_y < inputs_h)
if inside is 0:
continue
iv += [self.X[0][in_y][in_x][in_c]]
wv += [self.weights[0][filter_y][filter_x][oc]]
wv[-1] *= inside
if self.fewer_rounds:
inputs[out_y][out_x][oc].assign(iv)
weights[out_y][out_x][oc].assign(wv)
else:
self.dot_product(iv, wv, out_y, out_x, oc)
if self.fewer_rounds:
@for_range_opt_multithread(self.n_threads,
list(self.output_shape[1:]))
def _(out_y, out_x, out_c):
self.dot_product(inputs[out_y][out_x][out_c],
weights[out_y][out_x][out_c],
out_y, out_x, out_c)
self.reduction()
class QuantAveragePool2d(QuantBase):
def __init__(self, input_shape, output_shape, filter_size):
super(QuantAveragePool2d, self).__init__(input_shape, output_shape)
self.filter_size = filter_size
def input_from(self, player):
print('WARNING: assuming that input and output quantization parameters are the same')
for s in self.input_squant, self.output_squant:
s.set_params(sfloat.get_input_from(player), sint.get_input_from(player))
def forward(self, N=1):
assert(N == 1)
_, input_h, input_w, n_channels_in = self.input_shape
_, output_h, output_w, n_channels_out = self.output_shape
n = input_h * input_w
print('divisor: ', n)
assert output_h == output_w == 1
assert n_channels_in == n_channels_out
padding_h, padding_w = (0, 0)
stride_h, stride_w = (2, 2)
filter_h, filter_w = self.filter_size
@for_range_opt(output_h)
def _(out_y):
@for_range_opt(output_w)
def _(out_x):
@for_range_opt(n_channels_in)
def _(c):
in_x_origin = (out_x * stride_w) - padding_w
in_y_origin = (out_y * stride_h) - padding_h
fxs = (-in_x_origin).max(0)
#fxe = min(filter_w, input_w - in_x_origin)
fys = (-in_y_origin).max(0)
#fye = min(filter_h, input_h - in_y_origin)
acc = 0
#fc = 0
for i in range(filter_h):
filter_y = fys + i
for j in range(filter_w):
filter_x = fxs + j
in_x = in_x_origin + filter_x
in_y = in_y_origin + filter_y
acc += self.X[0][in_y][in_x][c].v
#fc += 1
logn = int(math.log(n, 2))
acc = (acc + n // 2)
if 2 ** logn == n:
acc = acc.round(self.output_squant.params.k + logn,
logn, nearest=True)
else:
acc = acc.int_div(sint(n),
self.output_squant.params.k + logn)
#acc = min(255, max(0, acc))
self.Y[0][out_y][out_x][c] = self.output_squant._new(acc)
class QuantReshape(QuantBase):
def __init__(self, input_shape, _, output_shape):
super(QuantReshape, self).__init__(input_shape, output_shape)
def input_from(self, player):
print('WARNING: assuming that input and output quantization parameters are the same')
_ = self.new_squant()
for s in self.input_squant, _, self.output_squant:
s.set_params(sfloat.get_input_from(player), sint.get_input_from(player))
for i in range(2):
sint.get_input_from(player)
def forward(self, N=1):
assert(N == 1)
# reshaping is implicit
self.Y.assign(self.X)
class QuantSoftmax(QuantBase):
def input_from(self, player):
print('WARNING: assuming that input and output quantization parameters are the same')
for s in self.input_squant, self.output_squant:
s.set_params(sfloat.get_input_from(player), sint.get_input_from(player))
def forward(self, N=1):
assert(N == 1)
assert(len(self.input_shape) == 2)
# just print the best
def comp(left, right):
c = left[1].v.greater_than(right[1].v, self.input_squant.params.k)
#print_ln('comp %s %s %s', c.reveal(), left[1].v.reveal(), right[1].v.reveal())
return [c.if_else(x, y) for x, y in zip(left, right)]
print_ln('guess: %s', util.tree_reduce(comp, list(enumerate(self.X[0])))[0].reveal())
class Optimizer:
n_threads = Layer.n_threads
def forward(self, N):
for j in range(len(self.layers) - 1):
self.layers[j].forward()
self.layers[j + 1].X.assign(self.layers[j].Y)
self.layers[-1].forward(N)
def backward(self):
for j in range(1, len(self.layers)):
self.layers[-j].backward()
self.layers[-j - 1].nabla_Y.assign(self.layers[-j].nabla_X)
self.layers[0].backward(compute_nabla_X=False)
def run(self):
i = MemValue(0)
@do_while
def _():
if self.X_by_label is not None:
N = self.layers[0].N
assert self.layers[-1].N == N
assert N % 2 == 0
n = N // 2
@for_range(n)
def _(i):
self.layers[-1].Y[i] = 0
self.layers[-1].Y[i + n] = 1
n_per_epoch = int(math.ceil(1. * max(len(X) for X in
self.X_by_label) / n))
print('%d runs per epoch' % n_per_epoch)
indices_by_label = []
for label, X in enumerate(self.X_by_label):
indices = regint.Array(n * n_per_epoch)
indices_by_label.append(indices)
indices.assign(i % len(X) for i in range(len(indices)))
indices.shuffle()
@for_range(n_per_epoch)
def _(j):
j = MemValue(j)
for label, X in enumerate(self.X_by_label):
indices = indices_by_label[label]
@for_range_multithread(self.n_threads, 1, n)
def _(i):
idx = indices[i + j * n_per_epoch]
self.layers[0].X[i + label * n] = X[idx]
self.forward(None)
self.backward()
self.update(i)
else:
self.forward(None)
self.backward()
self.update(i)
loss = self.layers[-1].l
if self.report_loss:
print_ln('loss after epoch %s: %s', i, loss.reveal())
else:
print_ln('done with epoch %s', i)
time()
i.iadd(1)
res = (i < self.n_epochs)
if self.tol > 0:
res *= (1 - (loss >= 0) * (loss < self.tol)).reveal()
return res
print_ln('finished after %s epochs', i)
class Adam(Optimizer):
def __init__(self, layers, n_epochs):
self.alpha = .001
self.beta1 = 0.9
self.beta2 = 0.999
self.epsilon = 10 ** -8
self.n_epochs = n_epochs
self.layers = layers
self.ms = []
self.vs = []
self.gs = []
self.thetas = []
for layer in layers:
for nabla in layer.nablas():
self.gs.append(nabla)
for x in self.ms, self.vs:
x.append(nabla.same_shape())
for theta in layer.thetas():
self.thetas.append(theta)
self.mhat_factors = Array(n_epochs, sfix)
self.vhat_factors = Array(n_epochs, sfix)
for i in range(n_epochs):
for factors, beta in ((self.mhat_factors, self.beta1),
(self.vhat_factors, self.beta2)):
factors[i] = 1. / (1 - beta ** (i + 1))
def update(self, i_epoch):
for m, v, g, theta in zip(self.ms, self.vs, self.gs, self.thetas):
@for_range_opt(len(m))
def _(k):
m[k] = self.beta1 * m[k] + (1 - self.beta1) * g[k]
v[k] = self.beta2 * v[k] + (1 - self.beta2) * g[k] ** 2
mhat = m[k] * self.mhat_factors[i_epoch]
vhat = v[k] * self.vhat_factors[i_epoch]
theta[k] = theta[k] - self.alpha * mhat / \
mpc_math.sqrt(vhat) + self.epsilon
class SGD(Optimizer):
def __init__(self, layers, n_epochs, debug=False, report_loss=False):
self.momentum = 0.9
self.layers = layers
self.n_epochs = n_epochs
self.thetas = []
self.nablas = []
self.delta_thetas = []
for layer in layers:
self.nablas.extend(layer.nablas())
self.thetas.extend(layer.thetas())
for theta in layer.thetas():
self.delta_thetas.append(theta.same_shape())
self.gamma = MemValue(sfix(0.01))
self.debug = debug
self.report_loss = report_loss
self.tol = 0.000
self.X_by_label = None
def reset(self, X_by_label=None):
self.X_by_label = X_by_label
for y in self.delta_thetas:
y.assign_all(0)
for layer in self.layers:
layer.reset()
def update(self, i_epoch):
for nabla, theta, delta_theta in zip(self.nablas, self.thetas,
self.delta_thetas):
@for_range_opt_multithread(self.n_threads, len(nabla))
def _(k):
old = delta_theta[k]
if isinstance(old, Array):
old = old.get_vector()
red_old = self.momentum * old
new = self.gamma * nabla[k]
diff = red_old - new
delta_theta[k] = diff
theta[k] = theta[k] + delta_theta[k]
if self.debug:
for x, name in (old, 'old'), (red_old, 'red_old'), \
(new, 'new'), (diff, 'diff'):
x = x.reveal()
print_ln_if((x > 1000) + (x < -1000),
name + ': %s %s %s %s',
*[y.v.reveal() for y in (old, red_old, \
new, diff)])
if self.debug:
d = delta_theta.get_vector().reveal()
a = cfix.Array(len(d.v))
a.assign(d)
@for_range(len(a))
def _(i):
x = a[i]
print_ln_if((x > 1000) + (x < -1000),
'update len=%d' % len(nabla))
a.assign(nabla.get_vector().reveal())
@for_range(len(a))
def _(i):
x = a[i]
print_ln_if((x > 1000) + (x < -1000),
'nabla len=%d' % len(nabla))
self.gamma.imul(1 - 10 ** - 6)