diff --git a/tinygrad/nn/__init__.py b/tinygrad/nn/__init__.py index 729c16051b..d949fd2940 100644 --- a/tinygrad/nn/__init__.py +++ b/tinygrad/nn/__init__.py @@ -385,5 +385,4 @@ class LSTMCell: i, f, g, o = gates.chunk(4, dim=1) i, f, g, o = i.sigmoid(), f.sigmoid(), g.tanh(), o.sigmoid() new_c = f * hc[1] + i * g - new_h = o * new_c.tanh() - return (new_h.contiguous(), new_c.contiguous()) + return o * new_c.tanh(), new_c diff --git a/tinygrad/nn/onnx.py b/tinygrad/nn/onnx.py index 164a04c85a..90d98d7e85 100644 --- a/tinygrad/nn/onnx.py +++ b/tinygrad/nn/onnx.py @@ -1170,7 +1170,6 @@ def get_onnx_ops() -> dict[str, types.FunctionType|dict[OpSetId, types.FunctionT return ret.reshape(*x_shape[:batch_dims], *i_shape[batch_dims:-1], *ret.shape[indices.ndim-1:]) def ScatterND(x:Tensor, indices:Tensor, updates:Tensor, reduction:Literal["none", "add", "mul", "max", "min"]='none'): assert updates.shape == indices.shape[:-1] + x.shape[cast(int, indices.shape[-1]):] - x = x.contiguous() for index, u in zip(indices.split(1, 0), updates.split(1, 0)): i = tuple(idx.squeeze(-1) for idx in index.squeeze(0).split(1, -1)) u = u.squeeze(0) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 0b5733166b..1ed16b48a5 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -3656,9 +3656,9 @@ class Tensor(OpMixin): assert self.ndim > 1, f"expected two or more dimensions, got {self.ndim}" b_shape, m, n = self.shape[:-2], int(self.shape[-2]), int(self.shape[-1]) R = self.clone() - Q = Tensor.eye(m, dtype=self.dtype).reshape((1,) * len(b_shape) + (m, m)).expand(b_shape + (m, m)).contiguous() + Q = Tensor.eye(m, dtype=self.dtype).reshape((1,) * len(b_shape) + (m, m)).expand(b_shape + (m, m)) for i in range(min(m, n)): - x = R[..., i:m, i].contiguous() # TODO: without contigous this can silently be wrong, should at least assert + x = R[..., i:m, i] norm = x.square().sum(-1).sqrt() s = (x[..., 0] != 0).where(-x[..., 0].sign(), -1) u1 = x[..., 0] - s * norm @@ -3677,10 +3677,10 @@ class Tensor(OpMixin): #preprocess the matrix Q, R = (self.qr() if m >= n else self.transpose(-2, -1).qr()) num, q_num = min(m, n), max(m, n) - U = R.shrink(tuple([None] * len(b_shape) + [(0, num), (0, num)])).contiguous() - V = Tensor.eye(num, dtype=self.dtype).reshape((1,) * len(b_shape) + (num, num)).expand(b_shape + (num, num)).contiguous() + U = R.shrink(tuple([None] * len(b_shape) + [(0, num), (0, num)])) + V = Tensor.eye(num, dtype=self.dtype).reshape((1,) * len(b_shape) + (num, num)).expand(b_shape + (num, num)) #prepare round robin pairing - permute, inverse_permute = Tensor.arange(0, num, dtype=dtypes.int), Tensor.zeros(num, dtype=dtypes.int).contiguous() + permute, inverse_permute = Tensor.arange(0, num, dtype=dtypes.int), Tensor.zeros(num, dtype=dtypes.int) permute[num//2:num] = permute[num//2:num].flip(0) inverse_permute[permute] = Tensor.arange(num, dtype=dtypes.int) def one_round_jacobi(U, V,permute,inverse_permute): @@ -3716,7 +3716,7 @@ class Tensor(OpMixin): U = U.gather(-1, new_indices) / (S != 0).where(S, 1).unsqueeze(-2) V = V.gather(-1, new_indices) - padded_u = Tensor.eye(q_num, dtype=U.dtype).reshape((1,) * len(b_shape) + (q_num, q_num)).expand(b_shape + (q_num, q_num)).contiguous() + padded_u = Tensor.eye(q_num, dtype=U.dtype).reshape((1,) * len(b_shape) + (q_num, q_num)).expand(b_shape + (q_num, q_num)) padded_u[..., 0:num, 0:num] = U U = Q @ padded_u if not full_matrices: U, V = U[..., 0:num], V[..., 0:num]