mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
ROCM IFU: Extend input to 32-bit when necessary
Note: we'll need to check this later if we can use i8 for some reduction operations
This commit is contained in:
committed by
Jason Furmanek
parent
92edee723b
commit
a41f13adcd
@@ -1377,6 +1377,11 @@ def reduce(input, axis, combine_fn, _builder=None, _generator=None):
|
||||
@builtin
|
||||
def _promote_reduction_input(t, _builder=None):
|
||||
scalar_ty = t.type.scalar
|
||||
# input is extended to 32-bits if necessary
|
||||
# this increases numerical accuracy and can be done pretty much for free
|
||||
# on GPUs
|
||||
if scalar_ty.is_int() and scalar_ty.int_bitwidth < 32:
|
||||
return t.to(int32, _builder=_builder)
|
||||
|
||||
# hardware doesn't support FMAX, FMIN, CMP for bfloat16
|
||||
if scalar_ty is bfloat16:
|
||||
|
||||
Reference in New Issue
Block a user