Files
tinygrad/test/test_dtype.py
cheeetoo a0965ee198 CI < 5 minutes (#1252)
* models matrix

* fix typo and install gpu deps

* install llvm deps if needed

* fix

* testops with cuda

* remove pip cache since not work

* cuda env

* install cuda deps

* maybe it will work now

* i can't read

* all tests in matrix

* trim down more

* opencl stuff in matrix

* opencl pip cache

* test split

* change cuda test exclusion

* test

* fix cuda maybe

* add models

* add more n=auto

* third thing

* fix bug

* cache pip more

* change name

* update tests

* try again cause why not

* balance

* try again...

* try apt cache for cuda

* try on gpu:

* try cuda again

* update packages step

* replace libz-dev with zlib1g-dev

* only cache cuda

* why error

* fix gpuocelot bug

* apt cache err

* apt cache to slow?

* opt and image in single runner

* add a couple n=autos

* remove test matrix

* try cuda apt cache again

* libz-dev -> zlib1g-dev

* remove -s since not supported by xdist

* the cache takes too long and doesn't work

* combine webgpu and metal tests

* combine imagenet to c and cpu tests

* torch tests with linters

* torch back by itself

* small windows clang test with torch tests

* fix a goofy windows bug

* im dumb

* bro

* clang with linters

* fix pylint error

* linter not work on windows

* try with clang again

* clang and imagenet?

* install deps

* fix

* fix quote

* clang by itself (windows too slow)

* env vars for imagenet

* cache pip for metal and webgpu tests

* try torch with metal and webgpu

* doesn't work, too long

* remove -v

* try -n=logical

* don't use logical

* revert accidental thing

* remove some prints unless CI

* fix print unless CI

* ignore speed tests for slow tests

* clang windows in matrix (ubuntu being tested in imagenet->c test)

* try manual pip cache

* fix windows pip cache path

* all manual pip cache

* fix pip cache dir for macos

* print_ci function in helpers

* CI as variable, no print_ci

* missed one

* cuda tests with docker image

* remove setup-python action for cuda

* python->python3?

* remove -s -v

* try fix pip cache

* maybe fix

* try to fix pip cache

* is this the path?

* maybe cache pip

* try again

* create wheels dir

* ?

* cuda pip deps in dockerfile

* disable pip cache for clang

* image from ghcr instead of docker hub

* why is clang like this

* fast deps

* try use different caches

* remove the fast thing

* try with lighter image

* remove setup python for cuda

* small docker and cuda fast deps

* ignore a few more tests

* cool docker thing (maybe)

* oops

* quotes

* fix docker command

* fix bug

* ignore train efficientnet test

* remove dockerfile (docker stuff takes too long)

* remove docker stuff and normal cuda

* oops

* ignore the tests for cuda

* does this work

* ignore test_train on slow backends

* add space

* llvm ignore same tests as cuda

* nvm

* ignore lr scheduler tests

* get some stats

* fix ignore bug

* remove extra '

* remove and

* ignore test for llvm

* change ignored tests and durationon all backends

* fix

* and -> or

* ignore some more cuda tests

* finally?

* does this fix it

* remove durations=0

* add some more tests to llvm

* make last pytest more readable

* fix

* don't train efficientnet on cpu

* try w/out pip cache

* pip cache seems to be generally better

* pytest file markers

* try apt fast for cuda

* use quick install for apt-fast

* apt-fast not worth

* apt-get to apt

* fix typo

* suppress warnings

* register markers

* disable debug on fuzz tests

* change marker names

* apt update and apt install in one command

* update marker names in test.yml

* webgpu pytest marker
2023-07-23 13:00:56 -07:00

125 lines
7.1 KiB
Python

import unittest
import numpy as np
from tinygrad.helpers import getenv, DType, DEBUG, CI
from tinygrad.lazy import Device
from tinygrad.tensor import Tensor, dtypes
from typing import List, Optional
from extra.utils import OSX, temp
import copy
def _test_to_np(a:Tensor, np_dtype, target):
if DEBUG >= 2: print(a)
na = a.numpy()
if DEBUG >= 2: print(na, na.dtype, a.lazydata.realized)
try:
assert na.dtype == np_dtype
np.testing.assert_allclose(na, target)
except AssertionError as e:
raise AssertionError(f"\ntensor {a.numpy()} does not match target {target} with np_dtype {np_dtype}") from e
def _assert_eq(tensor:Tensor, target_dtype:DType, target):
if DEBUG >= 2: print(tensor.numpy())
try:
assert tensor.dtype == target_dtype
np.testing.assert_allclose(tensor.numpy(), target)
except AssertionError as e:
raise AssertionError(f"\ntensor {tensor.numpy()} dtype {tensor.dtype} does not match target {target} with dtype {target_dtype}") from e
def _test_op(fxn, target_dtype:DType, target): _assert_eq(fxn(), target_dtype, target)
def _test_cast(a:Tensor, target_dtype:DType, target): _test_op(lambda: a.cast(target_dtype), target_dtype, target)
# tests no-op casts from source_dtype to target_dtypes
def _test_casts_from(tensor_contents:List, source_dtype:DType, target_dtypes:List[DType], target_contents:Optional[List]=None):
if target_contents is None: target_contents = copy.deepcopy(tensor_contents)
list(map(
lambda t_dtype: _test_cast(Tensor(tensor_contents, dtype=source_dtype), t_dtype, target_contents),
target_dtypes
))
# tests no-op casts from source_dtypes to target_dtype
def _test_casts_to(tensor_contents:List, source_dtypes:List[DType], target_dtype:DType, target_contents:Optional[List]=None):
if target_contents is None: target_contents = copy.deepcopy(tensor_contents)
list(map(
lambda s_dtype: _test_cast(Tensor(tensor_contents, dtype=s_dtype), target_dtype, target_contents),
source_dtypes
))
def _test_ops(a_dtype:DType, b_dtype:DType, target_dtype:DType):
_assert_eq(Tensor([1,2,3,4], dtype=a_dtype)+Tensor([1,2,3,4], dtype=b_dtype), target_dtype, [2,4,6,8])
_assert_eq(Tensor([1,2,3,4], dtype=a_dtype)*Tensor([1,2,3,4], dtype=b_dtype), target_dtype, [1,4,9,16])
_assert_eq(Tensor([[1,2],[3,4]], dtype=a_dtype)@Tensor.eye(2, dtype=b_dtype), target_dtype, [[1,2],[3,4]])
class TestBFloat16DType(unittest.TestCase):
def test_bf16_to_float(self):
with self.assertRaises(AssertionError):
_test_cast(Tensor([100000], dtype=dtypes.bfloat16), dtypes.float32, [100000])
def test_float_to_bf16(self):
with self.assertRaises(AssertionError):
_test_cast(Tensor([100000], dtype=dtypes.float32), dtypes.bfloat16, [100000])
# torch.tensor([10000, -1, -1000, -10000, 20]).type(torch.bfloat16)
@unittest.skipIf(Device.DEFAULT not in ["LLVM"], "bf16 only on LLVM")
def test_bf16(self):
t = Tensor([10000, -1, -1000, -10000, 20]).cast(dtypes.bfloat16)
t.realize()
back = t.cast(dtypes.float32)
assert tuple(back.numpy().tolist()) == (9984., -1, -1000, -9984, 20)
@unittest.skipIf(Device.DEFAULT not in ["LLVM"], "bf16 only on LLVM")
def test_bf16_disk_write_read(self):
t = Tensor([10000, -1, -1000, -10000, 20]).cast(dtypes.float32)
t.to(f"disk:{temp('f32')}").realize()
# hack to "cast" f32 -> bf16
dat = open(temp('f32'), "rb").read()
adat = b''.join([dat[i+2:i+4] for i in range(0, len(dat), 4)])
with open(temp('bf16'), "wb") as f: f.write(adat)
t = Tensor.empty(5, dtype=dtypes.bfloat16, device=f"disk:{temp('bf16')}").llvm().realize()
back = t.cast(dtypes.float32)
assert tuple(back.numpy().tolist()) == (9984., -1, -1000, -9984, 20)
# for GPU, cl_khr_fp16 isn't supported (except now we don't need it!)
# for LLVM, it segfaults because it can't link to the casting function
@unittest.skipIf((getenv("CI", "") != "" and Device.DEFAULT in ["LLVM"]) or Device.DEFAULT == "WEBGPU", "float16 broken in some CI backends")
class TestHalfDtype(unittest.TestCase):
def test_float16_to_np(self): _test_to_np(Tensor([1,2,3,4], dtype=dtypes.float16), np.float16, [1,2,3,4])
def test_casts_to_half(self): _test_casts_to([1,2,3,4], source_dtypes=[dtypes.float32, dtypes.int8, dtypes.uint8], target_dtype=dtypes.float16)
def test_casts_from_half(self): _test_casts_from([1,2,3,4], source_dtype=dtypes.float16, target_dtypes=[dtypes.int8, dtypes.uint8, dtypes.float32, dtypes.int32, dtypes.int64])
def test_half_upcast_ops(self): _test_ops(a_dtype=dtypes.float16, b_dtype=dtypes.float32, target_dtype=dtypes.float32)
def test_upcast_to_half_ops(self): _test_ops(a_dtype=dtypes.int8, b_dtype=dtypes.float16, target_dtype=dtypes.float16)
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "webgpu does not support int8")
class TestInt8Dtype(unittest.TestCase):
def test_int8_to_np(self): _test_to_np(Tensor([1,2,3,4], dtype=dtypes.int8), np.int8, [1,2,3,4])
def test_uint8_to_np(self): _test_to_np(Tensor([1,2,3,4], dtype=dtypes.uint8), np.uint8, [1,2,3,4])
def test_int64_to_np(self): _test_to_np(Tensor([1,2,3,4], dtype=dtypes.int64), np.int64, [1,2,3,4])
def test_casts_to_int8(self): _test_casts_from([1,2,3,4], source_dtype=dtypes.float32, target_dtypes=[dtypes.int8, dtypes.uint8, dtypes.int32, dtypes.int64])
def test_casts_from_int8(self): _test_casts_from([1,2,3,4], source_dtype=dtypes.int8, target_dtypes=[dtypes.float32, dtypes.uint8, dtypes.int32, dtypes.int64])
def test_casts_from_uint8(self): _test_casts_from([1,2,3,4], source_dtype=dtypes.uint8, target_dtypes=[dtypes.float32, dtypes.int8, dtypes.int32, dtypes.int64])
def test_int8_ops(self): _test_ops(a_dtype=dtypes.int8, b_dtype=dtypes.int8, target_dtype=dtypes.int8)
def test_int64_ops(self): _test_ops(a_dtype=dtypes.int64, b_dtype=dtypes.int64, target_dtype=dtypes.int64)
def test_int8_upcast_float(self): _test_ops(a_dtype=dtypes.int8, b_dtype=dtypes.float32, target_dtype=dtypes.float32)
def test_int8_upcast_int64(self): _test_ops(a_dtype=dtypes.int8, b_dtype=dtypes.int64, target_dtype=dtypes.int64)
@unittest.skipIf(getenv("CUDA",0)==1, "cuda saturation works differently")
def test_int8_to_uint8_negative(self): _test_op(lambda: Tensor([-1, -2, -3, -4], dtype=dtypes.int8).cast(dtypes.uint8), dtypes.uint8, [255, 254, 253, 252])
def test_uint8_to_int8_overflow(self): _test_op(lambda: Tensor([255, 254, 253, 252], dtype=dtypes.uint8).cast(dtypes.int8), dtypes.int8, [-1, -2, -3, -4])
class TestInt32Dtype(unittest.TestCase):
def test_int32_to_np(self): _test_to_np(Tensor([1,2,3,4], dtype=dtypes.int32), np.int32, [1,2,3,4])
def test_casts_to_int32(self): _test_casts_to([1,2,3,4], source_dtypes=[dtypes.float32, dtypes.int64], target_dtype=dtypes.int32)
def test_casts_from_int32(self): _test_casts_from([1,2,3,4], source_dtype=dtypes.int32, target_dtypes=[dtypes.float32, dtypes.int64])
def test_int32_ops(self): _test_ops(a_dtype=dtypes.int32, b_dtype=dtypes.int32, target_dtype=dtypes.int32)
def test_int32_upcast_float32(self): _test_ops(a_dtype=dtypes.int32, b_dtype=dtypes.float32, target_dtype=dtypes.float32)
def test_int32_upcast_int64(self): _test_ops(a_dtype=dtypes.int32, b_dtype=dtypes.int64, target_dtype=dtypes.int64)
if __name__ == '__main__':
unittest.main()