Onnx Layer/Group/RMS/Batch-Norm ReduceL2 fp32 intermediates for fp16 (#12109)

* match onnx spec * use least_upper_dtype * promote the square * just cast before the square
2026-01-10 07:28:15 -05:00 · 2025-10-24 12:26:11 +02:00
parent 0bde87d8d7
commit e1f8c82938
2 changed files with 20 additions and 12 deletions
--- a/test/external/external_test_onnx_ops.py
+++ b/test/external/external_test_onnx_ops.py
@@ -272,6 +272,10 @@ class TestMainOnnxOps(TestOnnxOps):
  def test_qlinearmatmul_2D_int8_float32(self): self._run_qlinearmatmul_test(np.int8, np.float32, 2)
  def test_qlinearmatmul_3D_int8_float32(self): self._run_qlinearmatmul_test(np.int8, np.float32, 3)

+  def test_reduce_l2_half(self):
+    inputs = {"data": np.random.randn(1, 1, 32, 32, 32).astype(np.half)*100}
+    self.helper_test_single_op("ReduceL2", inputs, {}, ["reduced"])
+
 class TestTrainingOnnxOps(TestOnnxOps):
  # NOTE: ORT doesn't actually support training ops on cpu so we test using functions provided by onnx
  DOMAIN = AI_ONNX_PREVIEW_TRAINING_DOMAIN
@@ -487,4 +491,4 @@ class TestContribOnnxOps(TestOnnxOps):
        self.helper_test_single_op("QLinearGlobalAveragePool", inputs, attributes, outputs)

 if __name__ == "__main__":
-  unittest.main()
+  unittest.main()