Merge branch 'master' into retinanet_mlperf

This commit is contained in:
Francis Lata
2024-12-03 06:06:38 -05:00
4 changed files with 20 additions and 14 deletions

View File

@@ -500,14 +500,14 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 2 # NOTE: this fetches the HEAD commit of the PR
- name: Set up Python 3.11
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: 3.11
python-version: 3.12
- name: Cache python packages
uses: actions/cache@v4
with:
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.12/site-packages
key: ${{ matrix.backend }}-packages-${{ hashFiles('**/setup.py') }}
- name: Cache downloads
uses: actions/cache@v4

View File

@@ -7,11 +7,17 @@ from tinygrad.helpers import prod, unwrap
@unittest.skipIf(Device.DEFAULT not in ("QCOM", "GPU"), "only images on GPU")
class TestImageCopy(unittest.TestCase):
def test_image_copyout_1x1(self):
it = Tensor.arange(4).cast(dtypes.imagef((1,1,4))).realize()
def test_image_copyout_1x1(self, img_type=dtypes.imagef):
it = Tensor.arange(4).cast(img_type((1,1,4))).realize()
buf = it.lazydata.buffer
out = buf.as_buffer()
np.testing.assert_equal(out.cast('f').tolist(), np.arange(4))
np.testing.assert_equal(out.cast(it.dtype.fmt).tolist(), np.arange(4))
def test_imageh_copyout_1x1(self): self.test_image_copyout_1x1(img_type=dtypes.imageh)
def test_image_numpy_1x1(self, img_type=dtypes.imagef):
it = Tensor.arange(4).cast(img_type((1,1,4))).realize()
np.testing.assert_equal(it.numpy(), np.arange(4))
def test_imageh_numpy_1x1(self): self.test_image_numpy_1x1(img_type=dtypes.imageh)
def test_image_copyout_2x3(self):
it = Tensor.arange(2*3*4).cast(dtypes.imagef((2,3,4))).realize()

View File

@@ -56,13 +56,13 @@ def to_uop(buf:LazyBuffer, ctx:ScheduleContext, buffers:Dict[UOp, Buffer], cache
return ret
assert buf.op is not None, f"base must be base itself {buf}"
# make things that can't be images not images
dtype = buf.dtype
dtype = buf.buffer.dtype
if isinstance(dtype, ImageDType) and (prod(buf.shape) != prod(dtype.shape) or not any(buf.shape[x]%4 == 0 for x in buf.st.unit_stride_axes())):
assert buf.realized is None, "can't fixup allocated buffer"
if DEBUG >= 2: print(f"forcing image {dtype} with shape {buf.shape} to {dtype.base}")
dtype = buf.dtype.base
# hack the underlying buffer too
buf.buffer.dtype = buf.dtype = dtype
buf.buffer.dtype = dtype
buf.buffer.options = None
if buf.is_realized:
ubuf = UOp.new_buffer(buf.device, buf.size, dtype)

View File

@@ -278,10 +278,10 @@ class Tensor(SimpleMathTrait):
print(np.frombuffer(t.data(), dtype=np.int32))
```
"""
assert self.dtype.fmt is not None, f"no fmt dtype for {self.dtype}"
assert self.dtype.base.fmt is not None, f"no fmt dtype for {self.dtype.base}"
assert all_int(self.shape), f"no data if shape is symbolic, {self.shape=}"
if TYPE_CHECKING or sys.version_info < (3, 12): assert self.dtype.fmt != "e"
return self._data().cast(self.dtype.fmt) if 0 in self.shape else self._data().cast(self.dtype.fmt, self.shape)
if TYPE_CHECKING or sys.version_info < (3, 12): assert self.dtype.base.fmt != "e"
return self._data().cast(self.dtype.base.fmt) if 0 in self.shape else self._data().cast(self.dtype.base.fmt, self.shape)
def item(self) -> ConstType:
"""
@@ -318,10 +318,10 @@ class Tensor(SimpleMathTrait):
```
"""
import numpy as np
if self.dtype == dtypes.bfloat16: return self.float().numpy()
assert _to_np_dtype(self.dtype) is not None, f"no np dtype for {self.dtype}"
if self.dtype.base == dtypes.bfloat16: return self.float().numpy()
assert _to_np_dtype(self.dtype.base) is not None, f"no np dtype for {self.dtype.base}"
assert all_int(self.shape), f"no data if shape is symbolic, {self.shape=}"
return np.frombuffer(self._data(), dtype=_to_np_dtype(self.dtype)).reshape(self.shape)
return np.frombuffer(self._data(), dtype=_to_np_dtype(self.dtype.base)).reshape(self.shape)
def clone(self) -> Tensor:
"""