From 4ca844e96b775329660371dc972351fd39371c9b Mon Sep 17 00:00:00 2001
From: Graham Robbins <167053539+graham-ro@users.noreply.github.com>
Date: Sat, 11 Apr 2026 05:17:24 -0500
Subject: [PATCH] add Q1_0 gguf type (#15683)

* add Q1_0

* better description

* fix trailing whitespace

---------

Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
---
 test/unit/test_gguf.py | 7 +++++++
 tinygrad/nn/state.py   | 7 ++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/test/unit/test_gguf.py b/test/unit/test_gguf.py
index ee042b72e4..dfd587f29c 100644
--- a/test/unit/test_gguf.py
+++ b/test/unit/test_gguf.py
@@ -81,6 +81,13 @@ class TestGGUF(unittest.TestCase):
     out = ggml_data_to_tensor(Tensor(block), 32, GGMLQuantizationType.MXFP4.value)
     np.testing.assert_equal(out.numpy(), expected)
 
+  def test_dequantization_q1_0(self):
+    # Q1_0: 2 bytes fp16 scale + 16 bytes (128 1-bit values)
+    block = np.frombuffer(np.float16(2.0).tobytes() + np.packbits(np.random.choice([0, 1], size=128)).tobytes(), dtype=np.uint8).copy()
+    expected = np.float16(2.0) * (np.unpackbits(block[2:], bitorder="little").astype(np.int8) * 2 - 1)
+    # TODO: replace 41 with GGMLQuantizationType.Q1_0.value on next gguf-py release
+    np.testing.assert_equal(ggml_data_to_tensor(Tensor(block), 128, 41).numpy().flatten(), expected)
+
   def test_expected_failure_unknown_type(self):
     with self.assertRaises(ValueError):
       ggml_data_to_tensor(Tensor.empty(512, dtype=dtypes.uint8), 256, 1337)
diff --git a/tinygrad/nn/state.py b/tinygrad/nn/state.py
index fe8736c117..b5bd1e2f81 100644
--- a/tinygrad/nn/state.py
+++ b/tinygrad/nn/state.py
@@ -301,7 +301,7 @@ def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor:
   int8 (id: 16), int16 (id: 17), int32 (id: 18)
   Supported quantized types: Q4_0 (id: 2), Q4_1 (id: 3), Q5_0 (id: 6),
   Q5_1 (id: 7), Q8_0 (id: 8), Q4_K (id: 12), Q5_K (id: 13),
-  Q6_K (id: 14), MXFP4 (id: 39)
+  Q6_K (id: 14), MXFP4 (id: 39), Q1_0 (id: 41)
   """
   # https://github.com/ggerganov/ggml/blob/323951f1bdcdfbd5b5ff3a9a7c3770e63b1a560e/include/ggml.h#L356
 
@@ -321,6 +321,7 @@ def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor:
   if (nelements_nbytes := {
     2:(32,18), 3:(32,20), 6:(32,22), 7:(32,24), 8:(32,34),
     12:(256,144), 13:(256,176), 14:(256,210), 39:(32,17),
+    41:(128,18)
   }.get(ggml_type)) is not None:
     blocks = t[:(n//nelements_nbytes[0])*nelements_nbytes[1]].reshape((-1, nelements_nbytes[1])).contiguous()
     if ggml_type == 2: return (q_to_uint8(blocks[:,2:], 4).bitcast(dtypes.int8) - 8) * blocks[:,:2].bitcast(dtypes.float16).cast(dtypes.float32)
@@ -360,6 +361,10 @@ def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor:
                        dtype=dtypes.float32, device=t.device)
       fp4_val = fp4_lut[codes]
       return (fp4_val * d).flatten(-2)[:n]
+    if ggml_type == 41:
+      d = blocks[:,:2].bitcast(dtypes.float16)
+      bits = q_to_uint8(blocks[:,2:], 1).reshape(-1, 8, 16).transpose(-1, -2).flatten(-2).bitcast(dtypes.int8)
+      return d * (bits * 2 - 1)
   raise ValueError(f"GGML type '{ggml_type}' is not supported!")
 
 @accept_filename