feat: add partial blob reconstruction (#8567)

**Motivation** When retrieving blobs via `GET /eth/v1/beacon/blobs/{block_id}` it's possible to specify versioned hashes of the blobs as a consumer and commonly (eg. in case of L2s) there isn't the need to get all the blobs. There is not a huge difference in reconstruction of all or just a subset of blobs when we have the a full set of data columns (128) as a supernode as we don't need to do cell recovery but in the case the node only has 64 columns (["semi supernode"](https://github.com/ChainSafe/lodestar/pull/8568)) we need to recover cells which is very expensive for all blob rows but with this change we can only recover cells for the requested blobs which can make a huge difference (see benchmarks below). **Description** Add partial blob reconstruction - update `reconstructBlobs` to only reconstruct blobs for `indices` (if provided) - add new `recoverBlobCells` which is similar to `dataColumnMatrixRecovery` but allows partial recovery - only call `asyncRecoverCellsAndKzgProofs` for blob rows we need to recover Results from running benchmark locally ``` reconstructBlobs Reconstruct blobs - 6 blobs ✔ Full columns - reconstruct all 6 blobs 7492.994 ops/s 133.4580 us/op x0.952 1416 runs 0.381 s ✔ Full columns - reconstruct half of the blobs out of 6 14825.80 ops/s 67.45000 us/op x0.989 3062 runs 0.304 s ✔ Full columns - reconstruct single blob out of 6 34335.94 ops/s 29.12400 us/op x1.009 8595 runs 0.306 s ✔ Half columns - reconstruct all 6 blobs 3.992937 ops/s 250.4422 ms/op x0.968 10 runs 4.83 s ✔ Half columns - reconstruct half of the blobs out of 6 7.329427 ops/s 136.4363 ms/op x0.980 10 runs 1.90 s ✔ Half columns - reconstruct single blob out of 6 13.21413 ops/s 75.67658 ms/op x1.003 11 runs 1.36 s Reconstruct blobs - 10 blobs ✔ Full columns - reconstruct all 10 blobs 4763.833 ops/s 209.9150 us/op x0.908 1324 runs 0.536 s ✔ Full columns - reconstruct half of the blobs out of 10 9749.439 ops/s 102.5700 us/op x0.935 1818 runs 0.319 s ✔ Full columns - reconstruct single blob out of 10 36794.47 ops/s 27.17800 us/op x0.923 9087 runs 0.307 s ✔ Half columns - reconstruct all 10 blobs 2.346124 ops/s 426.2349 ms/op x1.033 10 runs 5.09 s ✔ Half columns - reconstruct half of the blobs out of 10 4.509997 ops/s 221.7296 ms/op x1.022 10 runs 2.84 s ✔ Half columns - reconstruct single blob out of 10 13.73414 ops/s 72.81126 ms/op x0.910 11 runs 1.30 s Reconstruct blobs - 20 blobs ✔ Full columns - reconstruct all 20 blobs 2601.524 ops/s 384.3900 us/op x0.982 723 runs 0.727 s ✔ Full columns - reconstruct half of the blobs out of 20 5049.306 ops/s 198.0470 us/op x0.961 933 runs 0.421 s ✔ Full columns - reconstruct single blob out of 20 34156.51 ops/s 29.27700 us/op x0.980 8441 runs 0.306 s ✔ Half columns - reconstruct all 20 blobs 1.211887 ops/s 825.1593 ms/op x1.010 10 runs 9.10 s ✔ Half columns - reconstruct half of the blobs out of 20 2.350099 ops/s 425.5140 ms/op x0.977 10 runs 5.13 s ✔ Half columns - reconstruct single blob out of 20 13.93751 ops/s 71.74882 ms/op x0.915 11 runs 1.31 s Reconstruct blobs - 48 blobs ✔ Full columns - reconstruct all 48 blobs 1031.150 ops/s 969.7910 us/op x0.853 286 runs 0.805 s ✔ Full columns - reconstruct half of the blobs out of 48 2042.254 ops/s 489.6550 us/op x0.933 581 runs 0.805 s ✔ Full columns - reconstruct single blob out of 48 33946.64 ops/s 29.45800 us/op x0.961 7685 runs 0.306 s ✔ Half columns - reconstruct all 48 blobs 0.5274713 ops/s 1.895838 s/op x0.940 10 runs 21.0 s ✔ Half columns - reconstruct half of the blobs out of 48 1.033691 ops/s 967.4067 ms/op x0.951 10 runs 10.7 s ✔ Half columns - reconstruct single blob out of 48 12.54519 ops/s 79.71183 ms/op x1.072 11 runs 1.44 s Reconstruct blobs - 72 blobs ✔ Full columns - reconstruct all 72 blobs 586.0658 ops/s 1.706293 ms/op x0.985 178 runs 0.806 s ✔ Full columns - reconstruct half of the blobs out of 72 1390.803 ops/s 719.0090 us/op x0.959 386 runs 0.804 s ✔ Full columns - reconstruct single blob out of 72 34457.81 ops/s 29.02100 us/op x0.995 8437 runs 0.306 s ✔ Half columns - reconstruct all 72 blobs 0.3519770 ops/s 2.841095 s/op x0.972 10 runs 31.4 s ✔ Half columns - reconstruct half of the blobs out of 72 0.6779473 ops/s 1.475041 s/op x1.027 10 runs 16.2 s ✔ Half columns - reconstruct single blob out of 72 13.59862 ops/s 73.53685 ms/op x0.927 11 runs 1.38 s ```
2026-01-09 15:48:08 -05:00 · 2025-10-27 12:38:13 +00:00
parent 88fbac9fcf
commit 9fe2e4d0bd
4 changed files with 246 additions and 51 deletions
--- a/packages/beacon-node/src/api/impl/beacon/blocks/index.ts
+++ b/packages/beacon-node/src/api/impl/beacon/blocks/index.ts
@@ -654,18 +654,21 @@ export function getBeaconBlockApi({
            );
          }

-          const blobs = await reconstructBlobs(dataColumnSidecars);
+          for (const index of indices ?? []) {
+            if (index < 0 || index >= blobCount) {
+              throw new ApiError(400, `Invalid blob index ${index}, must be between 0 and ${blobCount - 1}`);
+            }
+          }
+
+          const indicesToReconstruct = indices ?? Array.from({length: blobCount}, (_, i) => i);
+          const blobs = await reconstructBlobs(dataColumnSidecars, indicesToReconstruct);
          const signedBlockHeader = signedBlockToSignedHeader(config, block);
-          const requestedIndices = indices ?? Array.from({length: blobKzgCommitments.length}, (_, i) => i);

          data = await Promise.all(
-            requestedIndices.map(async (index) => {
+            indicesToReconstruct.map(async (index, i) => {
              // Reconstruct blob sidecar from blob
              const kzgCommitment = blobKzgCommitments[index];
-              if (kzgCommitment === undefined) {
-                throw new ApiError(400, `Blob index ${index} not found in block`);
-              }
-              const blob = blobs[index];
+              const blob = blobs[i]; // Use i since blobs only contains requested indices
              const kzgProof = await kzg.asyncComputeBlobKzgProof(blob, kzgCommitment);
              const kzgCommitmentInclusionProof = computePreFuluKzgCommitmentsInclusionProof(
                fork,
@@ -724,7 +727,8 @@ export function getBeaconBlockApi({
          );
        }

-        const blobCount = (block.message.body as deneb.BeaconBlockBody).blobKzgCommitments.length;
+        const blobKzgCommitments = (block.message.body as deneb.BeaconBlockBody).blobKzgCommitments;
+        const blobCount = blobKzgCommitments.length;

        if (blobCount > 0) {
          let dataColumnSidecars = await fromAsync(db.dataColumnSidecar.valuesStream(blockRoot));
@@ -739,7 +743,25 @@ export function getBeaconBlockApi({
            );
          }

-          blobs = await reconstructBlobs(dataColumnSidecars);
+          let indicesToReconstruct: number[];
+          if (versionedHashes) {
+            const blockVersionedHashes = blobKzgCommitments.map((commitment) =>
+              toHex(kzgCommitmentToVersionedHash(commitment))
+            );
+            indicesToReconstruct = [];
+            for (const requestedHash of versionedHashes) {
+              const index = blockVersionedHashes.findIndex((hash) => hash === requestedHash);
+              if (index === -1) {
+                throw new ApiError(400, `Versioned hash ${requestedHash} not found in block`);
+              }
+              indicesToReconstruct.push(index);
+            }
+            indicesToReconstruct.sort((a, b) => a - b);
+          } else {
+            indicesToReconstruct = Array.from({length: blobCount}, (_, i) => i);
+          }
+
+          blobs = await reconstructBlobs(dataColumnSidecars, indicesToReconstruct);
        } else {
          blobs = [];
        }
@@ -757,29 +779,29 @@ export function getBeaconBlockApi({
        }

        blobs = blobSidecars.sort((a, b) => a.index - b.index).map(({blob}) => blob);
+
+        if (blobs.length && versionedHashes) {
+          const kzgCommitments = (block as deneb.SignedBeaconBlock).message.body.blobKzgCommitments;
+
+          const blockVersionedHashes = kzgCommitments.map((commitment) =>
+            toHex(kzgCommitmentToVersionedHash(commitment))
+          );
+
+          const requestedIndices: number[] = [];
+          for (const requestedHash of versionedHashes) {
+            const index = blockVersionedHashes.findIndex((hash) => hash === requestedHash);
+            if (index === -1) {
+              throw new ApiError(400, `Versioned hash ${requestedHash} not found in block`);
+            }
+            requestedIndices.push(index);
+          }
+
+          blobs = requestedIndices.sort((a, b) => a - b).map((index) => blobs[index]);
+        }
      } else {
        blobs = [];
      }

-      if (blobs.length && versionedHashes?.length) {
-        const kzgCommitments = (block as deneb.SignedBeaconBlock).message.body.blobKzgCommitments;
-
-        const blockVersionedHashes = kzgCommitments.map((commitment) =>
-          toHex(kzgCommitmentToVersionedHash(commitment))
-        );
-
-        const requestedIndices: number[] = [];
-        for (const requestedHash of versionedHashes) {
-          const index = blockVersionedHashes.findIndex((hash) => hash === requestedHash);
-          if (index === -1) {
-            throw new ApiError(400, `Versioned hash ${requestedHash} not found in block`);
-          }
-          requestedIndices.push(index);
-        }
-
-        blobs = requestedIndices.sort((a, b) => a - b).map((index) => blobs[index]);
-      }
-
      return {
        data: blobs,
        meta: {
--- a/packages/beacon-node/src/util/blobs.ts
+++ b/packages/beacon-node/src/util/blobs.ts
@@ -149,41 +149,85 @@ export async function dataColumnMatrixRecovery(
 * Reconstruct blobs from a set of data columns, at least 50%+ of all the columns
 * must be provided to allow to reconstruct the full data matrix
 */
-export async function reconstructBlobs(sidecars: fulu.DataColumnSidecars): Promise<deneb.Blobs> {
+export async function reconstructBlobs(sidecars: fulu.DataColumnSidecars, indices?: number[]): Promise<deneb.Blobs> {
  if (sidecars.length < NUMBER_OF_COLUMNS / 2) {
    throw Error(
      `Expected at least ${NUMBER_OF_COLUMNS / 2} data columns to reconstruct blobs, received ${sidecars.length}`
    );
  }
+  const blobCount = sidecars[0].column.length;

-  let fullSidecars: fulu.DataColumnSidecars;
-
-  if (sidecars.length === NUMBER_OF_COLUMNS) {
-    // Full columns, no need to recover
-    fullSidecars = sidecars;
-  } else {
-    const sidecarsByIndex = new Map<number, fulu.DataColumnSidecar>(sidecars.map((sc) => [sc.index, sc]));
-    const recoveredSidecars = await dataColumnMatrixRecovery(sidecarsByIndex);
-    if (recoveredSidecars === null) {
-      // Should not happen because we check the column count above
-      throw Error("Failed to reconstruct the full data matrix");
+  for (const index of indices ?? []) {
+    if (index < 0 || index >= blobCount) {
+      throw Error(`Invalid blob index ${index}, must be between 0 and ${blobCount - 1}`);
    }
-    fullSidecars = recoveredSidecars;
+  }
+  const indicesToReconstruct = indices ?? Array.from({length: blobCount}, (_, i) => i);
+
+  const recoveredCells = await recoverBlobCells(sidecars, indicesToReconstruct);
+  if (recoveredCells === null) {
+    // Should not happen because we check the column count above
+    throw Error("Failed to recover cells to reconstruct blobs");
  }

-  const blobCount = fullSidecars[0].column.length;
-  const blobs: deneb.Blobs = new Array(blobCount);
+  const blobs: deneb.Blobs = new Array(indicesToReconstruct.length);

-  const ordered = fullSidecars.slice().sort((a, b) => a.index - b.index);
-  for (let row = 0; row < blobCount; row++) {
-    // 128 cells that make up one "extended blob" row
-    const cells = ordered.map((col) => col.column[row]);
-    blobs[row] = cellsToBlob(cells);
+  for (let i = 0; i < indicesToReconstruct.length; i++) {
+    const blobIndex = indicesToReconstruct[i];
+    const cells = recoveredCells.get(blobIndex);
+    if (!cells) {
+      throw Error(`Failed to get recovered cells for blob index ${blobIndex}`);
+    }
+    blobs[i] = cellsToBlob(cells);
  }

  return blobs;
 }

+/**
+ * Recover cells for specific blob indices from a set of data columns
+ */
+async function recoverBlobCells(
+  partialSidecars: fulu.DataColumnSidecar[],
+  blobIndices: number[]
+): Promise<Map<number, fulu.Cell[]> | null> {
+  const columnCount = partialSidecars.length;
+  if (columnCount < NUMBER_OF_COLUMNS / 2) {
+    // We don't have enough columns to recover
+    return null;
+  }
+
+  const recoveredCells = new Map<number, fulu.Cell[]>();
+  // Sort data columns by index in ascending order
+  const partialSidecarsSorted = partialSidecars.slice().sort((a, b) => a.index - b.index);
+
+  if (columnCount === NUMBER_OF_COLUMNS) {
+    // Full columns, no need to recover
+    for (const blobIndex of blobIndices) {
+      // 128 cells that make up one "extended blob" row
+      const cells = partialSidecarsSorted.map((col) => col.column[blobIndex]);
+      recoveredCells.set(blobIndex, cells);
+    }
+    return recoveredCells;
+  }
+
+  await Promise.all(
+    blobIndices.map(async (blobIndex) => {
+      const cellIndices: number[] = [];
+      const cells: fulu.Cell[] = [];
+      for (const dataColumn of partialSidecarsSorted) {
+        cellIndices.push(dataColumn.index);
+        cells.push(dataColumn.column[blobIndex]);
+      }
+      // Recover cells for this specific blob row
+      const recovered = await kzg.asyncRecoverCellsAndKzgProofs(cellIndices, cells);
+      recoveredCells.set(blobIndex, recovered.cells);
+    })
+  );
+
+  return recoveredCells;
+}
+
 /**
 * Concatenate the systematic half (columns 0‑63) of a row of cells into
 * the original 131072 byte blob. The parity half (64‑127) is ignored as
--- a/packages/beacon-node/test/perf/util/blobs.test.ts
+++ b/packages/beacon-node/test/perf/util/blobs.test.ts
@@ -0,0 +1,71 @@
+import {bench, describe} from "@chainsafe/benchmark";
+import {createChainForkConfig} from "@lodestar/config";
+import {NUMBER_OF_COLUMNS} from "@lodestar/params";
+import {ssz} from "@lodestar/types";
+import {reconstructBlobs} from "../../../src/util/blobs.ts";
+import {getDataColumnSidecarsFromBlock} from "../../../src/util/dataColumns.ts";
+import {kzg} from "../../../src/util/kzg.ts";
+import {generateRandomBlob} from "../../utils/kzg.ts";
+
+describe("reconstructBlobs", () => {
+  const config = createChainForkConfig({
+    ALTAIR_FORK_EPOCH: 0,
+    BELLATRIX_FORK_EPOCH: 0,
+    CAPELLA_FORK_EPOCH: 0,
+    DENEB_FORK_EPOCH: 0,
+    ELECTRA_FORK_EPOCH: 0,
+    FULU_FORK_EPOCH: 0,
+  });
+
+  const testCases = [
+    {blobCount: 6, name: "6 blobs"},
+    {blobCount: 10, name: "10 blobs"},
+    {blobCount: 20, name: "20 blobs"},
+    // Disabled as those take too long to run
+    // {blobCount: 48, name: "48 blobs"},
+    // {blobCount: 72, name: "72 blobs"},
+  ];
+
+  for (const {blobCount, name} of testCases) {
+    describe(`Reconstruct blobs - ${name}`, () => {
+      const blobs = Array.from({length: blobCount}, (_) => generateRandomBlob());
+      const kzgCommitments = blobs.map((blob) => kzg.blobToKzgCommitment(blob));
+      const cellsAndProofs = blobs.map((blob) => kzg.computeCellsAndKzgProofs(blob));
+
+      const signedBeaconBlock = ssz.fulu.SignedBeaconBlock.defaultValue();
+      signedBeaconBlock.message.body.blobKzgCommitments = kzgCommitments;
+
+      const allSidecars = getDataColumnSidecarsFromBlock(config, signedBeaconBlock, cellsAndProofs);
+      const halfSidecars = allSidecars.sort(() => Math.random() - 0.5).slice(0, NUMBER_OF_COLUMNS / 2);
+
+      const scenarios = [
+        {sidecars: allSidecars, name: "Full columns"},
+        {sidecars: halfSidecars, name: "Half columns"},
+      ];
+
+      for (const {sidecars, name} of scenarios) {
+        bench({
+          id: `${name} - reconstruct all ${blobCount} blobs`,
+          fn: async () => {
+            await reconstructBlobs(sidecars);
+          },
+        });
+
+        bench({
+          id: `${name} - reconstruct half of the blobs out of ${blobCount}`,
+          fn: async () => {
+            const indices = Array.from({length: blobCount / 2}, (_, i) => i);
+            await reconstructBlobs(sidecars, indices);
+          },
+        });
+
+        bench({
+          id: `${name} - reconstruct single blob out of ${blobCount}`,
+          fn: async () => {
+            await reconstructBlobs(sidecars, [0]);
+          },
+        });
+      }
+    });
+  }
+});
--- a/packages/beacon-node/test/unit/util/blobs.test.ts
+++ b/packages/beacon-node/test/unit/util/blobs.test.ts
@@ -18,7 +18,7 @@ describe("reconstructBlobs", () => {
  });

  // Generate test data
-  const blobs = [generateRandomBlob(), generateRandomBlob()];
+  const blobs = [generateRandomBlob(), generateRandomBlob(), generateRandomBlob()];

  // Compute commitments, cells, and proofs for each blob
  const kzgCommitments = blobs.map((blob) => kzg.blobToKzgCommitment(blob));
@@ -30,11 +30,11 @@ describe("reconstructBlobs", () => {

  const sidecars = getDataColumnSidecarsFromBlock(config, signedBeaconBlock, cellsAndProofs);

-  it("should reconstruct blobs from a complete set of data columns", async () => {
+  it("should reconstruct all blobs from a complete set of data columns", async () => {
    expect(await reconstructBlobs(sidecars)).toEqual(blobs);
  });

-  it("should reconstruct blobs from at least half of the data columns", async () => {
+  it("should reconstruct all blobs from at least half of the data columns", async () => {
    // random shuffle + take first 64
    const randomHalf = sidecars
      .slice()
@@ -44,6 +44,64 @@ describe("reconstructBlobs", () => {
    expect(await reconstructBlobs(randomHalf)).toEqual(blobs);
  });

+  it("should reconstruct only specified blobs from a complete set of data columns", async () => {
+    // only first blob
+    const firstBlobOnly = await reconstructBlobs(sidecars, [0]);
+    expect(firstBlobOnly).toHaveLength(1);
+    expect(firstBlobOnly[0]).toEqual(blobs[0]);
+
+    // only last blob
+    const lastBlobOnly = await reconstructBlobs(sidecars, [2]);
+    expect(lastBlobOnly).toHaveLength(1);
+    expect(lastBlobOnly[0]).toEqual(blobs[2]);
+
+    // first and last blobs
+    const firstAndLast = await reconstructBlobs(sidecars, [0, 2]);
+    expect(firstAndLast).toHaveLength(2);
+    expect(firstAndLast[0]).toEqual(blobs[0]);
+    expect(firstAndLast[1]).toEqual(blobs[2]);
+
+    // all blobs in different order
+    const reversedOrder = await reconstructBlobs(sidecars, [2, 1, 0]);
+    expect(reversedOrder).toHaveLength(3);
+    expect(reversedOrder[0]).toEqual(blobs[2]);
+    expect(reversedOrder[1]).toEqual(blobs[1]);
+    expect(reversedOrder[2]).toEqual(blobs[0]);
+  });
+
+  it("should reconstruct only specified blobs from at least half of the data columns", async () => {
+    // random shuffle + take first 64
+    const randomHalf = sidecars
+      .slice()
+      .sort(() => Math.random() - 0.5)
+      .slice(0, NUMBER_OF_COLUMNS / 2);
+
+    // only single blob
+    const firstBlobOnly = await reconstructBlobs(randomHalf, [0]);
+    expect(firstBlobOnly).toHaveLength(1);
+    expect(firstBlobOnly[0]).toEqual(blobs[0]);
+
+    // multiple specific blobs
+    const multipleBlobs = await reconstructBlobs(randomHalf, [0, 2]);
+    expect(multipleBlobs).toHaveLength(2);
+    expect(multipleBlobs[0]).toEqual(blobs[0]);
+    expect(multipleBlobs[1]).toEqual(blobs[2]);
+
+    // all blobs in sequential order
+    expect(await reconstructBlobs(randomHalf, [0, 1, 2])).toEqual(blobs);
+  });
+
+  it("should throw for invalid blob indices", async () => {
+    // negative index
+    await expect(reconstructBlobs(sidecars, [-1])).rejects.toThrow("Invalid blob index -1");
+
+    // out of range index
+    await expect(reconstructBlobs(sidecars, [3])).rejects.toThrow("Invalid blob index 3");
+
+    // valid and invalid
+    await expect(reconstructBlobs(sidecars, [0, 5])).rejects.toThrow("Invalid blob index 5");
+  });
+
  it("should throw if less than half of the data columns are provided", async () => {
    const lessThanHalf = sidecars.slice(0, NUMBER_OF_COLUMNS / 2 - 10);