From c47d5f3578d975aba0a8e673a105fbc451078a49 Mon Sep 17 00:00:00 2001
From: Hsiao-Wei Wang <hsiaowei.eth@gmail.com>
Date: Fri, 2 Feb 2024 01:45:02 +0800
Subject: [PATCH] Add `recover_matrix` and remove unused `FlatExtendedMatrix`
 type

---
 specs/_features/eip7594/das-core.md           | 34 ++++++++++----
 specs/deneb/polynomial-commitments.md         |  7 +++
 .../test_polynomial_commitments.py            |  8 +---
 .../test/eip7594/unittests/das/__init__.py    |  0
 .../test/eip7594/unittests/das/test_das.py    | 44 +++++++++++++++++++
 .../test_polynomial_commitments.py            | 14 +++---
 .../test/eip7594/unittests/test_custody.py    |  2 -
 7 files changed, 83 insertions(+), 26 deletions(-)
 create mode 100644 tests/core/pyspec/eth2spec/test/eip7594/unittests/das/__init__.py
 create mode 100644 tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py

diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md
index 0096322d1..a1565f4f7 100644
--- a/specs/_features/eip7594/das-core.md
+++ b/specs/_features/eip7594/das-core.md
@@ -18,7 +18,7 @@
   - [Helper functions](#helper-functions)
     - [`get_custody_columns`](#get_custody_columns)
     - [`compute_extended_data`](#compute_extended_data)
-    - [`compute_extended_matrix`](#compute_extended_matrix)
+    - [`recover_matrix`](#recover_matrix)
     - [`get_data_column_sidecars`](#get_data_column_sidecars)
 - [Custody](#custody)
   - [Custody requirement](#custody-requirement)
@@ -47,7 +47,6 @@ We define the following Python custom types for type hinting and readability:
 | - | - | - |
 | `DataColumn` | `List[Cell, MAX_BLOBS_PER_BLOCK]` | The data of each column in EIP-7594 |
 | `ExtendedMatrix` | `List[Cell, MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The full data of one-dimensional erasure coding extended blobs (in row major format) |
-| `FlatExtendedMatrix` | `List[BLSFieldElement, FIELD_ELEMENTS_PER_CELL * MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The flattened format of `ExtendedMatrix` |
 
 ## Configuration
 
@@ -122,12 +121,29 @@ def compute_extended_data(data: Sequence[BLSFieldElement]) -> Sequence[BLSFieldE
     ...
 ```
 
-#### `compute_extended_matrix`
+#### `recover_matrix`
 
 ```python
-def compute_extended_matrix(blobs: Sequence[Blob]) -> FlatExtendedMatrix:
-    matrix = [compute_extended_data(blob) for blob in blobs]
-    return FlatExtendedMatrix(matrix)
+def recover_matrix(cells_dict: Dict[Tuple[BlobIndex, CellID], Cell], blob_count: uint64) -> ExtendedMatrix:
+    """
+    Return the recovered ``ExtendedMatrix``.
+
+    This helper demonstrate how to apply ``recover_polynomial``.
+    The data structure for storing cells is implementation-dependent.
+    """
+    extended_matrix = []
+    for blob_index in range(blob_count):
+        cell_ids = [cell_id for b_index, cell_id in cells_dict.keys() if b_index == blob_index]
+        cells = [cells_dict[(blob_index, cell_id)] for cell_id in cell_ids]
+        cells_bytes = [[bls_field_to_bytes(element) for element in cell] for cell in cells]
+
+        full_polynomial = recover_polynomial(cell_ids, cells_bytes)
+        cells_from_full_polynomial = [
+            full_polynomial[i * FIELD_ELEMENTS_PER_CELL:(i + 1) * FIELD_ELEMENTS_PER_CELL]
+            for i in range(CELLS_PER_BLOB)
+        ]
+        extended_matrix.extend(cells_from_full_polynomial)
+    return ExtendedMatrix(extended_matrix)
 ```
 
 #### `get_data_column_sidecars`
@@ -204,7 +220,7 @@ To custody a particular column, a node joins the respective gossip subnet. Verif
 
 ### Reconstruction and cross-seeding
 
-If the node obtains 50%+ of all the columns, they can reconstruct the full data matrix via `recover_samples_impl` helper.
+If the node obtains 50%+ of all the columns, they can reconstruct the full data matrix via `recover_matrix` helper.
 
 If a node fails to sample a peer or fails to get a column on the column subnet, a node can utilize the Req/Resp message to query the missing column from other peers.
 
@@ -218,7 +234,7 @@ Once the node obtain the column, the node should send the missing columns to the
 
 ## Peer sampling
 
-At each slot, a node makes (locally randomly determined) `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarByRoot` request. A node utilizes `get_custody_columns` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success.
+At each slot, a node makes (locally randomly determined) `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarsByRoot` request. A node utilizes `get_custody_columns` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success.
 
 ## Peer scoring
 
@@ -240,7 +256,7 @@ The fork choice rule (essentially a DA filter) is *orthogonal to a given DAS des
 
 In any DAS design, there are probably a few degrees of freedom around timing, acceptability of short-term re-orgs, etc. 
 
-For example, the fork choice rule might require validators to do successful DAS on slot N to be able to include block of slot `N` in its fork choice. That's the tightest DA filter. But trailing filters are also probably acceptable, knowing that there might be some failures/short re-orgs but that they don't hurt the aggregate security. For example, the rule could be — DAS must be completed for slot N-1 for a child block in N to be included in the fork choice.
+For example, the fork choice rule might require validators to do successful DAS on slot `N` to be able to include block of slot `N` in its fork choice. That's the tightest DA filter. But trailing filters are also probably acceptable, knowing that there might be some failures/short re-orgs but that they don't hurt the aggregate security. For example, the rule could be — DAS must be completed for slot N-1 for a child block in N to be included in the fork choice.
 
 Such trailing techniques and their analysis will be valuable for any DAS construction. The question is — can you relax how quickly you need to do DA and in the worst case not confirm unavailable data via attestations/finality, and what impact does it have on short-term re-orgs and fast confirmation rules.
 
diff --git a/specs/deneb/polynomial-commitments.md b/specs/deneb/polynomial-commitments.md
index 33945d249..818bee643 100644
--- a/specs/deneb/polynomial-commitments.md
+++ b/specs/deneb/polynomial-commitments.md
@@ -20,6 +20,7 @@
   - [BLS12-381 helpers](#bls12-381-helpers)
     - [`hash_to_bls_field`](#hash_to_bls_field)
     - [`bytes_to_bls_field`](#bytes_to_bls_field)
+    - [`bls_field_to_bytes`](#bls_field_to_bytes)
     - [`validate_kzg_g1`](#validate_kzg_g1)
     - [`bytes_to_kzg_commitment`](#bytes_to_kzg_commitment)
     - [`bytes_to_kzg_proof`](#bytes_to_kzg_proof)
@@ -170,6 +171,12 @@ def bytes_to_bls_field(b: Bytes32) -> BLSFieldElement:
     return BLSFieldElement(field_element)
 ```
 
+#### `bls_field_to_bytes`
+
+```python
+def bls_field_to_bytes(x: BLSFieldElement) -> Bytes32:
+    return int.to_bytes(x % BLS_MODULUS, 32, KZG_ENDIANNESS)
+```
 
 #### `validate_kzg_g1`
 
diff --git a/tests/core/pyspec/eth2spec/test/deneb/unittests/polynomial_commitments/test_polynomial_commitments.py b/tests/core/pyspec/eth2spec/test/deneb/unittests/polynomial_commitments/test_polynomial_commitments.py
index 7ee7168d9..1d43d07ca 100644
--- a/tests/core/pyspec/eth2spec/test/deneb/unittests/polynomial_commitments/test_polynomial_commitments.py
+++ b/tests/core/pyspec/eth2spec/test/deneb/unittests/polynomial_commitments/test_polynomial_commitments.py
@@ -32,10 +32,6 @@ def bls_add_one(x):
     )
 
 
-def field_element_bytes(x):
-    return int.to_bytes(x % BLS_MODULUS, 32, "big")
-
-
 @with_deneb_and_later
 @spec_test
 @single_phase
@@ -43,7 +39,7 @@ def test_verify_kzg_proof(spec):
     """
     Test the wrapper functions (taking bytes arguments) for computing and verifying KZG proofs.
     """
-    x = field_element_bytes(3)
+    x = spec.bls_field_to_bytes(3)
     blob = get_sample_blob(spec)
     commitment = spec.blob_to_kzg_commitment(blob)
     proof, y = spec.compute_kzg_proof(blob, x)
@@ -58,7 +54,7 @@ def test_verify_kzg_proof_incorrect_proof(spec):
     """
     Test the wrapper function `verify_kzg_proof` fails on an incorrect proof.
     """
-    x = field_element_bytes(3465)
+    x = spec.bls_field_to_bytes(3465)
     blob = get_sample_blob(spec)
     commitment = spec.blob_to_kzg_commitment(blob)
     proof, y = spec.compute_kzg_proof(blob, x)
diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/__init__.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py
new file mode 100644
index 000000000..15ed6536f
--- /dev/null
+++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py
@@ -0,0 +1,44 @@
+import random
+from eth2spec.test.context import (
+    spec_test,
+    single_phase,
+    with_eip7594_and_later,
+)
+from eth2spec.test.helpers.sharding import (
+    get_sample_blob,
+)
+
+
+@with_eip7594_and_later
+@spec_test
+@single_phase
+def test_recover_matrix(spec):
+    rng = random.Random(5566)
+
+    # Number of samples we will be recovering from
+    N_SAMPLES = spec.CELLS_PER_BLOB // 2
+
+    blob_count = 2
+    cells_dict = {}
+    original_cells = []
+    for blob_index in range(blob_count):
+        # Get the data we will be working with
+        blob = get_sample_blob(spec, rng=rng)
+        # Extend data with Reed-Solomon and split the extended data in cells
+        cells = spec.compute_cells(blob)
+        original_cells.append(cells)
+        cell_ids = []
+        # First figure out just the indices of the cells
+        for _ in range(N_SAMPLES):
+            cell_id = rng.randint(0, spec.CELLS_PER_BLOB - 1)
+            while cell_id in cell_ids:
+                cell_id = rng.randint(0, spec.CELLS_PER_BLOB - 1)
+            cell_ids.append(cell_id)
+            cell = cells[cell_id]
+            cells_dict[(blob_index, cell_id)] = cell
+        assert len(cell_ids) == N_SAMPLES
+
+    # Recover the matrix
+    recovered_matrix = spec.recover_matrix(cells_dict, blob_count)
+    flatten_original_cells = [cell for cells in original_cells for cell in cells]
+    assert recovered_matrix == flatten_original_cells
diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py
index 9cdb9912c..4a080488b 100644
--- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py
+++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py
@@ -10,10 +10,6 @@ from eth2spec.test.helpers.sharding import (
 from eth2spec.utils.bls import BLS_MODULUS
 
 
-def field_element_bytes(x):
-    return int.to_bytes(x % BLS_MODULUS, 32, "big")
-
-
 @with_eip7594_and_later
 @spec_test
 @single_phase
@@ -39,7 +35,7 @@ def test_verify_cell_proof(spec):
     commitment = spec.blob_to_kzg_commitment(blob)
     cells, proofs = spec.compute_cells_and_proofs(blob)
 
-    cells_bytes = [[field_element_bytes(element) for element in cell] for cell in cells]
+    cells_bytes = [[spec.bls_field_to_bytes(element) for element in cell] for cell in cells]
 
     cell_id = 0
     assert spec.verify_cell_proof(commitment, cell_id, cells_bytes[cell_id], proofs[cell_id])
@@ -54,7 +50,7 @@ def test_verify_cell_proof_batch(spec):
     blob = get_sample_blob(spec)
     commitment = spec.blob_to_kzg_commitment(blob)
     cells, proofs = spec.compute_cells_and_proofs(blob)
-    cells_bytes = [[field_element_bytes(element) for element in cell] for cell in cells]
+    cells_bytes = [[spec.bls_field_to_bytes(element) for element in cell] for cell in cells]
 
     assert len(cells) == len(proofs)
 
@@ -83,15 +79,15 @@ def test_recover_polynomial(spec):
 
     # Extend data with Reed-Solomon and split the extended data in cells
     cells = spec.compute_cells(blob)
-    cells_bytes = [[field_element_bytes(element) for element in cell] for cell in cells]
+    cells_bytes = [[spec.bls_field_to_bytes(element) for element in cell] for cell in cells]
 
     # Compute the cells we will be recovering from
     cell_ids = []
     # First figure out just the indices of the cells
     for i in range(N_SAMPLES):
-        j = rng.randint(0, spec.CELLS_PER_BLOB)
+        j = rng.randint(0, spec.CELLS_PER_BLOB - 1)
         while j in cell_ids:
-            j = rng.randint(0, spec.CELLS_PER_BLOB)
+            j = rng.randint(0, spec.CELLS_PER_BLOB - 1)
         cell_ids.append(j)
     # Now the cells themselves
     known_cells_bytes = [cells_bytes[cell_id] for cell_id in cell_ids]
diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py
index 9c8168b33..e1ab136c4 100644
--- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py
+++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py
@@ -12,8 +12,6 @@ def run_get_custody_columns(spec, peer_count, custody_subnet_count):
     columns_per_subnet = spec.NUMBER_OF_COLUMNS // spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT
     for assignment in assignments:
         assert len(assignment) == custody_subnet_count * columns_per_subnet
-        print('assignment', assignment)
-        print('set(assignment)', set(assignment))
         assert len(assignment) == len(set(assignment))