mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-07 22:23:55 -05:00
u32 to f16 in tinygrad (#8074)
* f16 decompression in tinygrad * Typing and cleanup
This commit is contained in:
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -387,7 +387,7 @@ jobs:
|
|||||||
WEBGPU=1 WGPU_BACKEND_TYPE=Vulkan python3 -m pytest -n=auto test/test_assign.py test/test_arange.py test/test_const_folding.py test/test_dtype.py \
|
WEBGPU=1 WGPU_BACKEND_TYPE=Vulkan python3 -m pytest -n=auto test/test_assign.py test/test_arange.py test/test_const_folding.py test/test_dtype.py \
|
||||||
test/test_dtype_alu.py test/test_conv.py test/test_conv_shapetracker.py test/test_nn.py test/test_ops.py test/test_optim.py \
|
test/test_dtype_alu.py test/test_conv.py test/test_conv_shapetracker.py test/test_nn.py test/test_ops.py test/test_optim.py \
|
||||||
test/test_jit.py test/test_randomness.py test/test_symbolic_ops.py test/test_symbolic_jit.py test/test_uops_stats.py test/test_uops.py \
|
test/test_jit.py test/test_randomness.py test/test_symbolic_ops.py test/test_symbolic_jit.py test/test_uops_stats.py test/test_uops.py \
|
||||||
test/testextra/test_export_model.py --durations=20
|
test/testextra/test_export_model.py test/testextra/test_f16_decompress.py --durations=20
|
||||||
- name: Run process replay tests
|
- name: Run process replay tests
|
||||||
run: |
|
run: |
|
||||||
export PR_TITLE=$(jq -r .pull_request.title "$GITHUB_EVENT_PATH")
|
export PR_TITLE=$(jq -r .pull_request.title "$GITHUB_EVENT_PATH")
|
||||||
|
|||||||
16
extra/f16_decompress.py
Normal file
16
extra/f16_decompress.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
from tinygrad import Tensor
|
||||||
|
|
||||||
|
def bit_extract(x: Tensor, e: int, s: int) -> Tensor:
|
||||||
|
mask = (1 << (e - s + 1)) - 1
|
||||||
|
return (x >> s) & mask
|
||||||
|
|
||||||
|
def u16_to_f16(x: Tensor) -> Tensor:
|
||||||
|
sign = bit_extract(x, 15, 15).float()
|
||||||
|
exponent = bit_extract(x, 14, 10).float()
|
||||||
|
fraction = bit_extract(x, 9, 0).float()
|
||||||
|
return sign.where(-1, 1) * exponent.where((exponent - 15.0).exp2() * (1 + fraction / 1024.0), 6.103515625e-5 * (fraction / 1024.0))
|
||||||
|
|
||||||
|
def u32_to_f16(oo: Tensor) -> Tensor:
|
||||||
|
f1 = u16_to_f16(oo>>16)
|
||||||
|
f2 = u16_to_f16(oo&0xFFFF)
|
||||||
|
return Tensor.cat(f2.reshape(-1, 1), f1.reshape(-1, 1), dim=1).flatten()
|
||||||
@@ -1,40 +0,0 @@
|
|||||||
import numpy as np
|
|
||||||
from tinygrad import Device, dtypes, Tensor
|
|
||||||
|
|
||||||
# TODO: will be better when tinygrad does math in the target dtype, can remove the floor and use a mul
|
|
||||||
def bit_extract(x, s, e) -> Tensor:
|
|
||||||
# extract the top bits we don't want
|
|
||||||
top_bits = (x / (1<<(s+1))).floor() * (1<<(s+1))
|
|
||||||
x = (x - top_bits) / (1<<e)
|
|
||||||
return x.contiguous()
|
|
||||||
|
|
||||||
def u16_to_f16(x):
|
|
||||||
sign = bit_extract(x, 15, 15).float()
|
|
||||||
exponent = bit_extract(x, 14, 10).float()
|
|
||||||
fraction = bit_extract(x, 9, 0).float()
|
|
||||||
return sign.where(-1, 1) * exponent.where((exponent - 15).exp2() * (1 + fraction / 0x400), 6.103515625e-5 * (fraction / 0x400))
|
|
||||||
|
|
||||||
def u32_to_f16(oo):
|
|
||||||
oo1 = (oo/0x10000).floor().contiguous()
|
|
||||||
# TODO: this is wrong and unextractable until we do this math in u32
|
|
||||||
oo2 = (oo-(oo1*0x10000)).floor().contiguous()
|
|
||||||
f1 = u16_to_f16(oo1)
|
|
||||||
f2 = u16_to_f16(oo2)
|
|
||||||
return Tensor.cat(f2.reshape(-1, 1), f1.reshape(-1, 1), dim=1).flatten()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# random float16
|
|
||||||
Tensor.manual_seed(2)
|
|
||||||
a = Tensor.randn(100, dtype=dtypes.float16)
|
|
||||||
|
|
||||||
# this converts it to u32 on disk
|
|
||||||
oo = a.to("disk:/tmp/f16").cast(dtypes.uint32)[:50].to(Device.DEFAULT).realize()
|
|
||||||
|
|
||||||
# convert to 2xf16 using tinygrad math ops
|
|
||||||
f16 = u32_to_f16(oo)
|
|
||||||
|
|
||||||
ref = a.numpy()
|
|
||||||
out = f16.numpy().astype(np.float16)
|
|
||||||
print(ref-out)
|
|
||||||
|
|
||||||
np.testing.assert_allclose(ref, out)
|
|
||||||
15
test/testextra/test_f16_decompress.py
Normal file
15
test/testextra/test_f16_decompress.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
import unittest
|
||||||
|
from extra.f16_decompress import u32_to_f16
|
||||||
|
from tinygrad.tensor import Tensor
|
||||||
|
from tinygrad.device import Device, is_dtype_supported
|
||||||
|
from tinygrad import dtypes
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class TestF16Decompression(unittest.TestCase):
|
||||||
|
def test_u32_to_f16(self):
|
||||||
|
a = Tensor.randn(50, dtype=dtypes.float16, device=None if is_dtype_supported(dtypes.float16) else "CLANG:0")
|
||||||
|
f16_as_u32 = a.bitcast(dtypes.uint32) if is_dtype_supported(dtypes.float16) else a.bitcast(dtypes.uint32).to(Device.DEFAULT)
|
||||||
|
f16 = u32_to_f16(f16_as_u32)
|
||||||
|
ref = a.numpy()
|
||||||
|
out = f16.numpy().astype(np.float16)
|
||||||
|
np.testing.assert_allclose(out, ref)
|
||||||
Reference in New Issue
Block a user