From 39b4d72687f853fdbc42eff4c960676aa38cba19 Mon Sep 17 00:00:00 2001 From: chenyu Date: Fri, 4 Jul 2025 15:15:27 -0400 Subject: [PATCH] remove flatten and reshape in sparse_categorical_crossentropy [pr] (#11093) not needed, directly operating on the classes dim is fine --- tinygrad/tensor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 55e8cf6735..b0cbd875a3 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -3920,9 +3920,9 @@ class Tensor(MathTrait): """ assert 0.0 <= label_smoothing <= 1.0, "label_smoothing must be in [0.0, 1.0]" assert reduction in get_args(ReductionStr), f"reduction must be one of {get_args(ReductionStr)}" - log_probs, loss_mask = self.log_softmax(), (Y != ignore_index) if ignore_index != -1 else Y.ones_like(dtype=dtypes.bool) - y_counted = Y.to(self.device).flatten().reshape(-1, 1)._one_hot_along_dim(self.shape[-1]) - y = (y_counted * loss_mask.reshape(-1, 1)).reshape(*Y.shape, self.shape[-1]) + log_probs = self.log_softmax() + loss_mask = (Y != ignore_index) if ignore_index != -1 else Y.ones_like(dtype=dtypes.bool) + y = Y.to(self.device).unsqueeze(-1)._one_hot_along_dim(self.shape[-1], dim=-1) * loss_mask.unsqueeze(-1) smoothing = label_smoothing * (log_probs.mean(-1) * loss_mask) unreduced = ((1 - label_smoothing) * (log_probs * y).sum(-1) + smoothing) # NOTE: because of ignore_index, we can't use Tensor.mean (so can't use `_do_reduction` here)