[TEST] use numpy for reference results in test_core.py (#409)

Since numpy supports unsigned integers, and pytorch doesn't, this will make it easier to test unsigned integer support. This adds an explicit requirement for numpy in tests, but we already required scipy, so it was already an implicit dependency.
2026-04-05 03:01:17 -04:00 · 2022-01-04 13:07:29 -08:00
parent 03f1256f60
commit d8db0308cb
3 changed files with 241 additions and 204 deletions
--- a/python/triton/testing.py
+++ b/python/triton/testing.py
@@ -85,31 +85,6 @@ def allclose(x, y, tol=1e-2):
    return err <= tol


-def assert_allclose(x, y, tol=1e-2):
-    assert x.dtype == y.dtype
-    assert allclose(x, y, tol)
-
-
-def random(shape, dtype, device, seed=0):
-    """
-    Override the seed in tests if you're calling this function twice and don't
-    want the same result for both calls.
-    """
-    torch.manual_seed(seed)
-    if isinstance(shape, int):
-        shape = (shape, )
-    if dtype == torch.bool:
-        return torch.randint(0, 2, shape, dtype=dtype, device=device)
-    if dtype in [torch.int8, torch.int16, torch.int32, torch.int64]:
-        iinfo = torch.iinfo(dtype)
-        x = torch.randint(iinfo.min, iinfo.max, shape, dtype=dtype, device=device)
-        x[x == 0] = 1  # Hack. Never return zero so tests of division don't error out.
-        return x
-    if dtype in [torch.float16, torch.float32, torch.float64]:
-        return torch.normal(0, 1, shape, dtype=dtype, device=device)
-    raise RuntimeError(f'Unknown dtype {dtype}')
-
-
 def nvsmi(attrs):
    attrs = ','.join(attrs)
    cmd = ['nvidia-smi', '-i', '0', '--query-gpu=' + attrs, '--format=csv,noheader,nounits']
@@ -203,7 +178,7 @@ class Benchmark:
        styles=None,
    ):
        """
-        Constructor 
+        Constructor

        :param x_names: Name of the arguments that should appear on the x axis of the plot. If the list contains more than one element, all the arguments are assumed to have the same value.
        :type x_names: List[str]
@@ -344,4 +319,4 @@ def get_max_tensorcore_tflops(backend, device):
    else:
        ops_per_sub_core = 512
    tflops = num_subcores * clock_rate * ops_per_sub_core / (1024*1024*1024)
-    return tflops
+    return tflops