From 8df5d119ba9ed547c826d29bcb1783eb8c86474f Mon Sep 17 00:00:00 2001 From: Agnish Ghosh <80243668+agnxsh@users.noreply.github.com> Date: Fri, 27 Jun 2025 18:06:13 +0530 Subject: [PATCH] Remove peer sampling in Fulu (#4393) --- specs/fulu/p2p-interface.md | 25 +++ specs/fulu/peer-sampling.md | 173 ------------------ .../test/fulu/unittests/das/test_das.py | 84 --------- 3 files changed, 25 insertions(+), 257 deletions(-) delete mode 100644 specs/fulu/peer-sampling.md diff --git a/specs/fulu/p2p-interface.md b/specs/fulu/p2p-interface.md index f65cc279f..97b629463 100644 --- a/specs/fulu/p2p-interface.md +++ b/specs/fulu/p2p-interface.md @@ -36,6 +36,8 @@ - [ENR structure](#enr-structure) - [Custody group count](#custody-group-count) - [Next fork digest](#next-fork-digest) +- [Peer Scoring](#peer-scoring) +- [DAS providers](#das-providers) @@ -545,3 +547,26 @@ disconnect from such peers at the fork boundary, but not sooner. Nodes unprepared to follow the Fulu fork will be unaware of `nfd` entries. However, their existing comparison of `eth2` entries (concretely `next_fork_epoch`) is sufficient to detect upcoming divergence. + +## Peer Scoring + +Due to the deterministic custody functions, a node knows exactly what a peer +should be able to respond to. In the event that a peer does not respond to +samples of their custodied rows/columns, a node may downscore or disconnect from +a peer. + +## DAS providers + +A DAS provider is a consistently-available-for-DAS-queries, super-full (or high +capacity) node. To the p2p, these look just like other nodes but with high +advertised capacity, and they should generally be able to be latently found via +normal discovery. + +DAS providers can also be found out-of-band and configured into a node to +connect to directly and prioritize. Nodes can add some set of these to their +local configuration for persistent connection to bolster their DAS quality of +service. + +Such direct peering utilizes a feature supported out of the box today on all +nodes and can complement (and reduce attackability and increase +quality-of-service) alternative peer discovery mechanisms. diff --git a/specs/fulu/peer-sampling.md b/specs/fulu/peer-sampling.md deleted file mode 100644 index a501d2f78..000000000 --- a/specs/fulu/peer-sampling.md +++ /dev/null @@ -1,173 +0,0 @@ -# Fulu -- Peer Sampling - -*Note*: This document is a work-in-progress for researchers and implementers. - - - -- [Introduction](#introduction) -- [Helper functions](#helper-functions) - - [`get_extended_sample_count`](#get_extended_sample_count) -- [Peer discovery](#peer-discovery) -- [Peer sampling](#peer-sampling) - - [Sample selection](#sample-selection) - - [Sample queries](#sample-queries) -- [Peer scoring](#peer-scoring) -- [DAS providers](#das-providers) - - - -## Introduction - -The purpose of this document is to complement -[Fulu -- Data Availability Sampling Core](das-core.md) by specifying the peer -sampling functionality of the full PeerDAS protocol. Initially, this -functionality may not be implemented by all clients. In such cases, it is -replaced by [subnet sampling](das-core.md#subnet-sampling), which is an -extension of the custody component of the protocol. - -## Helper functions - -### `get_extended_sample_count` - -```python -def get_extended_sample_count(allowed_failures: uint64) -> uint64: - assert 0 <= allowed_failures <= NUMBER_OF_COLUMNS // 2 - """ - Return the sample count if allowing failures. - - This helper demonstrates how to calculate the number of columns to query per slot when - allowing given number of failures, assuming uniform random selection without replacement. - Nested functions are direct replacements of Python library functions math.comb and - scipy.stats.hypergeom.cdf, with the same signatures. - """ - - def math_comb(n: int, k: int) -> int: - if not 0 <= k <= n: - return 0 - r = 1 - for i in range(min(k, n - k)): - r = r * (n - i) // (i + 1) - return r - - def hypergeom_cdf(k: uint64, M: uint64, n: uint64, N: uint64) -> float: - # Note: It contains float-point computations. - # Convert uint64 to Python integers before computations. - k = int(k) - M = int(M) - n = int(n) - N = int(N) - return sum( - [math_comb(n, i) * math_comb(M - n, N - i) / math_comb(M, N) for i in range(k + 1)] - ) - - worst_case_missing = NUMBER_OF_COLUMNS // 2 + 1 - false_positive_threshold = hypergeom_cdf( - 0, NUMBER_OF_COLUMNS, worst_case_missing, SAMPLES_PER_SLOT - ) - for sample_count in range(SAMPLES_PER_SLOT, NUMBER_OF_COLUMNS + 1): - if ( - hypergeom_cdf(allowed_failures, NUMBER_OF_COLUMNS, worst_case_missing, sample_count) - <= false_positive_threshold - ): - break - return sample_count -``` - -## Peer discovery - -At each slot, a node needs to be able to readily sample from *any* set of -columns. To this end, a node SHOULD find and maintain a set of diverse and -reliable peers that can regularly satisfy their sampling demands. - -A node runs a background peer discovery process, maintaining peers of various -custody distributions (both `custody_size` and column assignments). The -combination of advertised `custody_size` size and public node-id make this -readily and publicly accessible. The peer set should cover the whole column -space, with some redundancy. The number of peers, or at least the redundancy -implied by the custody distributions over the peer set, should be tuned upward -in the event of failed sampling. - -*Note*: while high-capacity and super-full nodes are high value with respect to -satisfying sampling requirements, a node SHOULD maintain a distribution across -node capacities as to not centralize the p2p graph too much (in the extreme -becomes hub/spoke) and to distribute sampling load better across all nodes. - -*Note*: A DHT-based peer discovery mechanism is expected to be utilized in the -above. The beacon-chain network currently utilizes discv5 in a similar method as -described for finding peers of particular distributions of attestation subnets. -Additional peer discovery methods are valuable to integrate (e.g., latent peer -discovery via libp2p gossipsub) to add a defense in breadth against one of the -discovery methods being attacked. - -## Peer sampling - -### Sample selection - -At each slot, a node SHOULD select at least `SAMPLES_PER_SLOT` column IDs for -sampling. It is recommended to use uniform random selection without replacement -based on local randomness. Sampling is considered successful if the node manages -to retrieve all selected columns. - -Alternatively, a node MAY use a method that selects more than `SAMPLES_PER_SLOT` -columns while allowing some missing, respecting the same target false positive -threshold (the probability of successful sampling of an unavailable block) as -dictated by the `SAMPLES_PER_SLOT` parameter. If using uniform random selection -without replacement, a node can use the -`get_extended_sample_count(allowed_failures) -> sample_count` helper function to -determine the sample count (number of unique column IDs) for any selected number -of allowed failures. Sampling is then considered successful if any -`sample_count - allowed_failures` columns are retrieved successfully. - -For reference, the table below shows the number of samples and the number of -allowed missing columns assuming `NUMBER_OF_COLUMNS = 128` and -`SAMPLES_PER_SLOT = 16`. - -| Allowed missing | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -| --------------- | --- | --- | --- | --- | --- | --- | --- | --- | --- | -| Sample count | 16 | 20 | 24 | 27 | 29 | 32 | 35 | 37 | 40 | - -### Sample queries - -A node SHOULD maintain a diverse set of peers for each column and each slot by -verifying responsiveness to sample queries. - -A node SHOULD query for samples from selected peers via -`DataColumnSidecarsByRoot` request. A node utilizes `get_custody_groups` helper -to determine which peer(s) it could request from, identifying a list of -candidate peers for each selected column. - -If more than one candidate peer is found for a given column, a node SHOULD -randomize its peer selection to distribute sample query load in the network. -Nodes MAY use peer scoring to tune this selection (for example, by using -weighted selection or by using a cut-off threshold). If possible, it is also -recommended to avoid requesting many columns from the same peer in order to -avoid relying on and exposing the sample selection to a single peer. - -If a node already has a column because of custody, it is not required to send -out queries for that column. - -If a node has enough good/honest peers across all columns, and the data is being -made available, the above procedure has a high chance of success. - -## Peer scoring - -Due to the deterministic custody functions, a node knows exactly what a peer -should be able to respond to. In the event that a peer does not respond to -samples of their custodied rows/columns, a node may downscore or disconnect from -a peer. - -## DAS providers - -A DAS provider is a consistently-available-for-DAS-queries, super-full (or high -capacity) node. To the p2p, these look just like other nodes but with high -advertised capacity, and they should generally be able to be latently found via -normal discovery. - -DAS providers can also be found out-of-band and configured into a node to -connect to directly and prioritize. Nodes can add some set of these to their -local configuration for persistent connection to bolster their DAS quality of -service. - -Such direct peering utilizes a feature supported out of the box today on all -nodes and can complement (and reduce attackability and increase -quality-of-service) alternative peer discovery mechanisms. diff --git a/tests/core/pyspec/eth2spec/test/fulu/unittests/das/test_das.py b/tests/core/pyspec/eth2spec/test/fulu/unittests/das/test_das.py index 0f19548ab..767b4cd27 100644 --- a/tests/core/pyspec/eth2spec/test/fulu/unittests/das/test_das.py +++ b/tests/core/pyspec/eth2spec/test/fulu/unittests/das/test_das.py @@ -1,10 +1,8 @@ import random from eth2spec.test.context import ( - expect_assertion_error, single_phase, spec_test, - with_config_overrides, with_fulu_and_later, ) from eth2spec.test.helpers.blob import ( @@ -67,85 +65,3 @@ def test_recover_matrix(spec): # Ensure that the recovered matrix matches the original matrix assert recovered_matrix == matrix - - -@with_fulu_and_later -@spec_test -@single_phase -def test_get_extended_sample_count__1(spec): - rng = random.Random(1111) - allowed_failures = rng.randint(0, spec.config.NUMBER_OF_COLUMNS // 2) - spec.get_extended_sample_count(allowed_failures) - - -@with_fulu_and_later -@spec_test -@single_phase -def test_get_extended_sample_count__2(spec): - rng = random.Random(2222) - allowed_failures = rng.randint(0, spec.config.NUMBER_OF_COLUMNS // 2) - spec.get_extended_sample_count(allowed_failures) - - -@with_fulu_and_later -@spec_test -@single_phase -def test_get_extended_sample_count__3(spec): - rng = random.Random(3333) - allowed_failures = rng.randint(0, spec.config.NUMBER_OF_COLUMNS // 2) - spec.get_extended_sample_count(allowed_failures) - - -@with_fulu_and_later -@spec_test -@single_phase -def test_get_extended_sample_count__lower_bound(spec): - allowed_failures = 0 - spec.get_extended_sample_count(allowed_failures) - - -@with_fulu_and_later -@spec_test -@single_phase -def test_get_extended_sample_count__upper_bound(spec): - allowed_failures = spec.config.NUMBER_OF_COLUMNS // 2 - spec.get_extended_sample_count(allowed_failures) - - -@with_fulu_and_later -@spec_test -@single_phase -def test_get_extended_sample_count__upper_bound_exceed(spec): - allowed_failures = spec.config.NUMBER_OF_COLUMNS // 2 + 1 - expect_assertion_error(lambda: spec.get_extended_sample_count(allowed_failures)) - - -@with_fulu_and_later -@spec_test -@with_config_overrides( - { - "NUMBER_OF_COLUMNS": 128, - "SAMPLES_PER_SLOT": 16, - } -) -@single_phase -def test_get_extended_sample_count__table_in_spec(spec): - table = dict( - # (allowed_failures, expected_extended_sample_count) - { - 0: 16, - 1: 20, - 2: 24, - 3: 27, - 4: 29, - 5: 32, - 6: 35, - 7: 37, - 8: 40, - } - ) - for allowed_failures, expected_extended_sample_count in table.items(): - assert ( - spec.get_extended_sample_count(allowed_failures=allowed_failures) - == expected_extended_sample_count - )