If feasible, do not truncate float64 down to float32 in cstyle renderer (#3420)

* do not truncate float64 precision

* use l suffix to try avoid overload confusion

* long line, ruff bloats the function otherwise

* fmt

* remove long double suffix (l), it's sufficient to have the float32 (f) suffix to avoid function overload ambigouity; add test showcasing rtol=1e-12 precision increase, the test fails without the renderer changes

* use more reasonable test values, same as test_int_to_float_unary_func

* disable test for CUDACPU, does not support half and segfaults on some operations per dtypes_alu test

* disable test for HIP, renderer does not support f64 precision

* do not use noqa E501, break up condition
This commit is contained in:
zku
2024-02-16 10:08:59 +01:00
committed by GitHub
parent 30f26279c5
commit 2d702ca073
2 changed files with 20 additions and 1 deletions

View File

@@ -161,7 +161,25 @@ class TestHalfDtype(TestDType): DTYPE = dtypes.half
class TestFloatDType(TestDType): DTYPE = dtypes.float
class TestDoubleDtype(TestDType): DTYPE = dtypes.double
class TestDoubleDtype(TestDType):
DTYPE = dtypes.double
@unittest.skipIf(getenv("CUDACPU",0)==1, "conversion not supported on CUDACPU")
@unittest.skipIf(getenv("HIP",0)==1, "HIP renderer does not support f64 precision")
def test_float64_increased_precision(self):
for func in [
lambda t: t.exp(),
lambda t: t.exp2(),
lambda t: t.log(),
lambda t: t.log2(),
lambda t: t.sqrt(),
lambda t: t.rsqrt(),
lambda t: t.sin(),
lambda t: t.cos(),
lambda t: t.tan(),
lambda t: t.sigmoid(),
]:
a = [2, 3, 4]
np.testing.assert_allclose(func(Tensor(a, dtype=self.DTYPE)).numpy(), func(torch.tensor(a, dtype=torch.float64)), rtol=1e-12, atol=1e-12)
class TestInt8Dtype(TestDType):
DTYPE = dtypes.int8