fix max op, less lines

2026-01-09 15:08:02 -05:00 · 2020-12-29 10:47:04 -05:00
parent dc8fa7999c
commit a2e6562330
3 changed files with 6 additions and 7 deletions
--- a/README.md
+++ b/README.md
@@ -105,12 +105,12 @@ Warning: do not rely on the ANE port. It segfaults sometimes. So if you were doi
 ### Adding an accelerator
-You need to support 14 basic ops:
+You need to support 15 basic ops:
 ```
 Add, Sub, Mul, Pow              # binary ops
 Relu, Log, Exp                  # unary ops
-Sum                             # reduce op
+Sum, Max                        # reduce ops
 Dot                             # matrix multiplication
 Conv2D, MaxPool2D               # 2D ops
 Pad2D, Reshape, Transpose       # moving things around ops
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -83,9 +83,11 @@ class TestOps(unittest.TestCase):
  @cpu_only
  def test_max(self):
    helper_test_op([(45,3)], lambda x: x.max(), Tensor.max, device=self.device)
    helper_test_op([(45,3)], lambda x: x.max().mul(0.5), lambda x: Tensor.max(x).mul(0.5), device=self.device)
  @cpu_only
  def test_max_axis(self):
    helper_test_op([(3,4,5,6)], lambda x: x.max(axis=1)[0], lambda x: Tensor.max(x, axis=1), device=self.device)
    helper_test_op([(3,4,5,6)], lambda x: x.max(axis=1)[0].mul(0.5), lambda x: Tensor.max(x, axis=1).mul(0.5), device=self.device)
  def test_sum_axis(self):
    helper_test_op([(3,4,5,6)], lambda x: x.sum(axis=(1,2)), lambda x: Tensor.sum(x, axis=(1,2)), device=self.device)
  def test_mean_axis(self):
--- a/tinygrad/ops_cpu.py
+++ b/tinygrad/ops_cpu.py
@@ -75,10 +75,7 @@ class Max(Function):
  @staticmethod
  def forward(ctx, input, axis=None):
    am = input.argmax(axis=axis)
-    if axis is not None:
+    am = np.expand_dims(am, axis=axis) if axis is not None else np.array([am])
      am = np.expand_dims(am, axis=axis)
    else:
      am = np.array([am])
    ctx.save_for_backward(input.shape, am, axis)
    return np.take_along_axis(input, am, axis=axis).squeeze(axis=axis)
@@ -86,7 +83,7 @@ class Max(Function):
  def backward(ctx, grad_output):
    shape, am, axis = ctx.saved_tensors
    ret = np.zeros(shape, dtype=np.float32)
-    np.put_along_axis(ret, am, 1/np.prod(am.shape), axis=axis)
+    np.put_along_axis(ret, am, grad_output.reshape(am.shape), axis=axis)
    return ret
 register('max', Max)