transformer is training

This commit is contained in:
George Hotz
2020-12-27 18:46:32 -05:00
parent a361ef6861
commit d864e1c71a
2 changed files with 9 additions and 9 deletions

View File

@@ -55,6 +55,7 @@ class TransformerBlock:
value = value.transpose(order=(0,2,1,3)) # (bs, num_heads, T, head_size)
score = query.dot(key) * (1 / np.sqrt(self.head_size))
# TODO: this should be a normal softmax
weights = score.logsoftmax() # (bs, num_heads, T, T)
attention = weights.dot(value).transpose(order=(0,2,1,3))
x = inputs + attention.reshape(shape=(-1, self.num_heads * self.head_size)).dot(self.final)
@@ -77,19 +78,18 @@ class Transformer:
def forward(self, x):
bs = x.shape[0]
xnp = x.cpu().data
onehot = np.zeros((bs, x.shape[1], self.maxlen+self.syms), dtype=np.float32)
onehot = np.zeros((bs*x.shape[1], self.maxlen+self.syms), dtype=np.float32)
print(onehot.shape)
for i in range(x.shape[1]):
onehot[range(bs), i, i] = 1
onehot[range(bs), i, self.maxlen + xnp[:, i]] = 1
x = Tensor(onehot, device=x.device).dot(self.embed)
print(x.shape)
onehot[range(bs*i, bs*(i+1)), i] = 1
onehot[range(bs*i, bs*(i+1)), self.maxlen + xnp[:, i]] = 1
x = Tensor(onehot, device=x.device).dot(self.embed).reshape(shape=(bs, x.shape[1], -1))
for t in self.tbs:
x = t(x)
return x.dot(self.final).logsoftmax()
x = x.reshape(shape=(-1, x.shape[-1])).dot(self.final).logsoftmax()
return x.reshape(shape=(bs, -1, x.shape[-1]))
from tinygrad.optim import Adam
from tinygrad.optim import Adam
if __name__ == "__main__":
model = Transformer(10, 6, 2, 128, 4)

View File

@@ -33,7 +33,7 @@ def train(model, X_train, Y_train, optim, steps, BS=128, device=Device.CPU, loss
loss.backward()
optim.step()
cat = np.argmax(out.cpu().data, axis=1)
cat = np.argmax(out.cpu().data, axis=-1)
accuracy = (cat == y).mean()
# printing