mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
add Q1_0 gguf type (#15683)
* add Q1_0 * better description * fix trailing whitespace --------- Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
This commit is contained in:
@@ -81,6 +81,13 @@ class TestGGUF(unittest.TestCase):
|
||||
out = ggml_data_to_tensor(Tensor(block), 32, GGMLQuantizationType.MXFP4.value)
|
||||
np.testing.assert_equal(out.numpy(), expected)
|
||||
|
||||
def test_dequantization_q1_0(self):
|
||||
# Q1_0: 2 bytes fp16 scale + 16 bytes (128 1-bit values)
|
||||
block = np.frombuffer(np.float16(2.0).tobytes() + np.packbits(np.random.choice([0, 1], size=128)).tobytes(), dtype=np.uint8).copy()
|
||||
expected = np.float16(2.0) * (np.unpackbits(block[2:], bitorder="little").astype(np.int8) * 2 - 1)
|
||||
# TODO: replace 41 with GGMLQuantizationType.Q1_0.value on next gguf-py release
|
||||
np.testing.assert_equal(ggml_data_to_tensor(Tensor(block), 128, 41).numpy().flatten(), expected)
|
||||
|
||||
def test_expected_failure_unknown_type(self):
|
||||
with self.assertRaises(ValueError):
|
||||
ggml_data_to_tensor(Tensor.empty(512, dtype=dtypes.uint8), 256, 1337)
|
||||
|
||||
@@ -301,7 +301,7 @@ def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor:
|
||||
int8 (id: 16), int16 (id: 17), int32 (id: 18)
|
||||
Supported quantized types: Q4_0 (id: 2), Q4_1 (id: 3), Q5_0 (id: 6),
|
||||
Q5_1 (id: 7), Q8_0 (id: 8), Q4_K (id: 12), Q5_K (id: 13),
|
||||
Q6_K (id: 14), MXFP4 (id: 39)
|
||||
Q6_K (id: 14), MXFP4 (id: 39), Q1_0 (id: 41)
|
||||
"""
|
||||
# https://github.com/ggerganov/ggml/blob/323951f1bdcdfbd5b5ff3a9a7c3770e63b1a560e/include/ggml.h#L356
|
||||
|
||||
@@ -321,6 +321,7 @@ def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor:
|
||||
if (nelements_nbytes := {
|
||||
2:(32,18), 3:(32,20), 6:(32,22), 7:(32,24), 8:(32,34),
|
||||
12:(256,144), 13:(256,176), 14:(256,210), 39:(32,17),
|
||||
41:(128,18)
|
||||
}.get(ggml_type)) is not None:
|
||||
blocks = t[:(n//nelements_nbytes[0])*nelements_nbytes[1]].reshape((-1, nelements_nbytes[1])).contiguous()
|
||||
if ggml_type == 2: return (q_to_uint8(blocks[:,2:], 4).bitcast(dtypes.int8) - 8) * blocks[:,:2].bitcast(dtypes.float16).cast(dtypes.float32)
|
||||
@@ -360,6 +361,10 @@ def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor:
|
||||
dtype=dtypes.float32, device=t.device)
|
||||
fp4_val = fp4_lut[codes]
|
||||
return (fp4_val * d).flatten(-2)[:n]
|
||||
if ggml_type == 41:
|
||||
d = blocks[:,:2].bitcast(dtypes.float16)
|
||||
bits = q_to_uint8(blocks[:,2:], 1).reshape(-1, 8, 16).transpose(-1, -2).flatten(-2).bitcast(dtypes.int8)
|
||||
return d * (bits * 2 - 1)
|
||||
raise ValueError(f"GGML type '{ggml_type}' is not supported!")
|
||||
|
||||
@accept_filename
|
||||
|
||||
Reference in New Issue
Block a user