From 4ca844e96b775329660371dc972351fd39371c9b Mon Sep 17 00:00:00 2001 From: Graham Robbins <167053539+graham-ro@users.noreply.github.com> Date: Sat, 11 Apr 2026 05:17:24 -0500 Subject: [PATCH] add Q1_0 gguf type (#15683) * add Q1_0 * better description * fix trailing whitespace --------- Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com> --- test/unit/test_gguf.py | 7 +++++++ tinygrad/nn/state.py | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/test/unit/test_gguf.py b/test/unit/test_gguf.py index ee042b72e4..dfd587f29c 100644 --- a/test/unit/test_gguf.py +++ b/test/unit/test_gguf.py @@ -81,6 +81,13 @@ class TestGGUF(unittest.TestCase): out = ggml_data_to_tensor(Tensor(block), 32, GGMLQuantizationType.MXFP4.value) np.testing.assert_equal(out.numpy(), expected) + def test_dequantization_q1_0(self): + # Q1_0: 2 bytes fp16 scale + 16 bytes (128 1-bit values) + block = np.frombuffer(np.float16(2.0).tobytes() + np.packbits(np.random.choice([0, 1], size=128)).tobytes(), dtype=np.uint8).copy() + expected = np.float16(2.0) * (np.unpackbits(block[2:], bitorder="little").astype(np.int8) * 2 - 1) + # TODO: replace 41 with GGMLQuantizationType.Q1_0.value on next gguf-py release + np.testing.assert_equal(ggml_data_to_tensor(Tensor(block), 128, 41).numpy().flatten(), expected) + def test_expected_failure_unknown_type(self): with self.assertRaises(ValueError): ggml_data_to_tensor(Tensor.empty(512, dtype=dtypes.uint8), 256, 1337) diff --git a/tinygrad/nn/state.py b/tinygrad/nn/state.py index fe8736c117..b5bd1e2f81 100644 --- a/tinygrad/nn/state.py +++ b/tinygrad/nn/state.py @@ -301,7 +301,7 @@ def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor: int8 (id: 16), int16 (id: 17), int32 (id: 18) Supported quantized types: Q4_0 (id: 2), Q4_1 (id: 3), Q5_0 (id: 6), Q5_1 (id: 7), Q8_0 (id: 8), Q4_K (id: 12), Q5_K (id: 13), - Q6_K (id: 14), MXFP4 (id: 39) + Q6_K (id: 14), MXFP4 (id: 39), Q1_0 (id: 41) """ # https://github.com/ggerganov/ggml/blob/323951f1bdcdfbd5b5ff3a9a7c3770e63b1a560e/include/ggml.h#L356 @@ -321,6 +321,7 @@ def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor: if (nelements_nbytes := { 2:(32,18), 3:(32,20), 6:(32,22), 7:(32,24), 8:(32,34), 12:(256,144), 13:(256,176), 14:(256,210), 39:(32,17), + 41:(128,18) }.get(ggml_type)) is not None: blocks = t[:(n//nelements_nbytes[0])*nelements_nbytes[1]].reshape((-1, nelements_nbytes[1])).contiguous() if ggml_type == 2: return (q_to_uint8(blocks[:,2:], 4).bitcast(dtypes.int8) - 8) * blocks[:,:2].bitcast(dtypes.float16).cast(dtypes.float32) @@ -360,6 +361,10 @@ def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor: dtype=dtypes.float32, device=t.device) fp4_val = fp4_lut[codes] return (fp4_val * d).flatten(-2)[:n] + if ggml_type == 41: + d = blocks[:,:2].bitcast(dtypes.float16) + bits = q_to_uint8(blocks[:,2:], 1).reshape(-1, 8, 16).transpose(-1, -2).flatten(-2).bitcast(dtypes.int8) + return d * (bits * 2 - 1) raise ValueError(f"GGML type '{ggml_type}' is not supported!") @accept_filename