Gossipsub Simulations (#9)

* episub-sim * network generation * mainnet network generation * no pretty print * add small network for testing * upgrade versions, make the thing compile * checkpoint * this is ridiculous * make gen_topology reachable by testground * add 16 instsances for testing * add running command * remove params * pass new participants param * update upstream reg * update upstream reg * put msg generation back in * put publishing back in * adding metrics * record metrics on intervals * add gossip max limit * stop fghting testground. For now * update README * Revert "stop fghting testground. For now" This reverts commit a976c5371b. * use composition files to get the docker build context a layer up * fix params and logs * remove unused files * cache workspace deps * cache workspace deps _the right_ way * updates * some docs * Update CI and remove root workspace * Add duplicates, fix clippy, improve dash * Reduce message sizes, some debugging * Update dash and logs * Add scripts folder * Fix executor lockup * fmt and clippy * Add some docs * Dot to mermaid Co-authored-by: Diva M <divma@protonmail.com>
2026-01-09 21:48:13 -05:00 · 2022-11-29 16:06:53 +11:00
parent e31e401776
commit a260f69580
29 changed files with 404356 additions and 608 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,3 @@
 target/
 *.data
 *.tar.gz
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,20 +7,25 @@ on:
      - main
 jobs:
  cargo-fmt:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        simulations: ["eth_consensus", "censoring", "smoke"]
    steps:
    - uses: actions/checkout@v2
    - name: Get latest version of stable rust
      run: rustup update stable
    - name: Check formatting with cargofmt
-      run: cargo fmt --all -- --check
+      run: cd "${{ matrix.simulations }}" && cargo fmt --all -- --check
  clippy:
    needs: cargo-fmt
    name: clippy
    runs-on: ubuntu-latest
-    needs: cargo-fmt
+    strategy:
      matrix:
        simulations: ["eth_consensus", "censoring", "smoke"]
    steps:
    - uses: actions/checkout@v1
    - name: Install protoc
@@ -32,6 +37,6 @@ jobs:
    - name: Install Protoc
      uses: arduino/setup-protoc@64c0c85d18e984422218383b81c52f8b077404d3 # v1.1.2
    - name: Lint code for quality and style with Clippy
-      run: cargo clippy --workspace --tests -- -D warnings
+      run: cd "${{ matrix.simulations }}" && cargo clippy --workspace --tests -- -D warnings
    - name: Certify Cargo.lock freshness
-      run: git diff --exit-code Cargo.lock
+      run: cd "${{ matrix.simulations }}" && git diff --exit-code Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +0,0 @@
 [workspace]
 members = [
    "smoke",
    "censoring",
 ]
--- a/README.md
+++ b/README.md
@@ -41,6 +41,15 @@ be examined to determine their effectiveness.
 See the [censoring documentation](./censoring/README) for instructions on how to run
 the simulation.
 ### [Ethereum Consensus](./eth_consensus/README.md)
 This is a simulation of the standard gossipsub network for various sizes of the
 Ethereum consensus layer. It can be used in some forms to model network traffic
 for the Ethereum consensus layer.
 See the [Ethereum consensus documentation](./eth_consensus/README) for instructions on how to run
 the simulation.
 ## Dashboards
 Grafana dashboards are provided for some of the simulations. These are provided
--- a/censoring/README.md
+++ b/censoring/README.md
@@ -7,7 +7,7 @@ parameters to mitigate censoring attacks on gossipsub networks.
 ## Running the simulation
 ```shell
-testground run composition -f censoring/compositions/censoring.toml --wait
+testground run composition -f censoring/compositions/composition.toml --wait
 ```
 ## How the Simulation Works
--- a/censoring/compositions/composition.yml
+++ b/censoring/compositions/composition.yml
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,6 +9,7 @@ services:
      - ./grafana/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml
      - ./grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml
      - ./censoring/dashboards:/var/lib/grafana/dashboards/censoring
      - ./eth_consensus/dashboards:/var/lib/grafana/dashboards/eth_consensus
 networks:
  # Connect containers to `testground-control` to access Testground-supplied containers.
--- a/eth_consensus/.dockerignore
+++ b/eth_consensus/.dockerignore
@@ -0,0 +1,3 @@
 target/
 *.data
 *.tar.gz
--- a/eth_consensus/Cargo.lock
+++ b/eth_consensus/Cargo.lock
--- a/eth_consensus/Cargo.toml
+++ b/eth_consensus/Cargo.toml
@@ -0,0 +1,26 @@
 [workspace]
 members = [
 	"simulation",
    "utils/gen_topology",
    "utils/gen_topology_files"
 ]
 [workspace.dependencies]
 serde_json = "1.0"
 serde = "1.0"
 testground = "0.4.0"
 tokio = { version = "1.21.2", features = ["macros"] }
 tracing = "0.1.35"
 tracing-subscriber = { version = "0.3.14", features = ["env-filter"] }
 [patch]
 [patch.crates-io]
 types = { git = "https://github.com/divagant-martian/lighthouse", branch = "sizes" }
 eth2_ssz = { git = "https://github.com/divagant-martian/lighthouse", branch = "sizes" }
 eth2_ssz_types = { git = "https://github.com/divagant-martian/lighthouse", branch = "sizes" }
 eth2_serde_utils = { git = "https://github.com/divagant-martian/lighthouse", branch = "sizes" }
 tree_hash = { git = "https://github.com/divagant-martian/lighthouse", branch = "sizes" }
 eth2_hashing = { git = "https://github.com/divagant-martian/lighthouse", branch = "sizes" }
 eth2_ssz_derive = { git = "https://github.com/divagant-martian/lighthouse", branch = "sizes" }
 tree_hash_derive = { git = "https://github.com/divagant-martian/lighthouse", branch = "sizes" }
--- a/eth_consensus/Dockerfile
+++ b/eth_consensus/Dockerfile
@@ -0,0 +1,32 @@
 # Get chef
 FROM rust:1.65-bullseye as chef
 WORKDIR test-plan
 RUN cargo install cargo-chef
 # Get chef to create a skeleton workspace
 FROM chef AS planner
 COPY ./plan .
 RUN cargo chef prepare --recipe-path recipe.json
 FROM chef as builder
 # Build dependencies
 RUN apt-get update && apt-get -y upgrade && apt-get install -y protobuf-compiler
 COPY --from=planner /test-plan/recipe.json ./recipe.json
 # Cache the deps using the fake workspace
 RUN cargo chef cook --release --recipe-path recipe.json
 # Get the real code
 COPY ./plan .
 # Enjoy
 RUN cargo build --release -p simulation
 FROM debian:bullseye-slim
 COPY --from=builder /test-plan/target/release/simulation /usr/local/bin/eth_consensus
 #ENV RUST_LOG=libp2p_gossipsub=debug,simulation=debug
 ENV RUST_LOG=simulation=info
 ENTRYPOINT ["eth_consensus"]
--- a/eth_consensus/README.md
+++ b/eth_consensus/README.md
@@ -0,0 +1,80 @@
 # Ethereum Consensus Simulation
 This simulation mimics the timing, frequency and sizes of messages that would usually
 occur on a normal Ethereum consensus gossipsub-network. The simulation can
 specify the number of validators and nodes on the network in an attempt to
 model various sizes of Ethereum consensus networks.
 ## Running the Simulation
 This simulation can be run with the following command (from within the repos
 root directory):
 ```sh
 testground run composition -f ./eth_consensus/compositions/composition.toml --wait
 ```
 Various aspects of the simulation can be modified. Please read the `eth_consensus/manifest.toml` to understand test parameters and `eth_consensus/compositions/composition.toml` to modify them.
 ## Influx DB Queries
 The results of the simulation are stored in an Influx DB instance. Queries
 inside grafana can be used to build dashboards. An example query is given:
 `SELECT derivative("count", 10s) FROM "topic_msg_recv_bytes" WHERE $timeFilter GROUP BY "hash", "instance_name", "run_id"`
 `derivative` = calculation of the rate
 `hash` the topic
 `instance_name` the number given to the instance inside the test run starting from 0
 `run_id` the id of the run
 ## Controlling the Topology
 The topology of the network can be created and visualised using the `utils/gen_topology` and `utils/gen_topology_files` crate.
 The `gen_topology_files` binary can output json and dot files to visualise and
 understand the topology of the network to be created.
 There are a number of input parameters required for generating a network
 topology, each of these can be customised in the composition file
 `compositions/composition.toml`. 
 - `seed`: int - This is what is used to seed the random number generator and
 	can control differing variants of the topology by changing this number.
 - `total_validators`: int - This is the total number of validators to be used
 	within the network.
 - `total_node_with_vals`: int - The total number of nodes that will be assigned
 	validators
 - `min_peer_per_node`: int - The minimum number of connections per node to
 	target when generating the node topology
 - `max_peers_per_node`: int  - The maximum number of connections per node to
 	target when generating the node topology
 These parameters can be set in the composition in order to generate a specific
 topology for the simulation.
 To visualise the topology, a dot file and json file can be produced by running:
 ```
 cargo run --bin gen_topology_file <seed> <total_validators>
 <total_node_with_vals> <min_peer_per_node> <max_peer_per_node>
 <output-dot-file> <output-json-file>
 ```
 an example is:
 ```
 cargo run --bin gen_topology_file 40 200 2 3 2 3 output.dot output.json
 ```
 Which produces the following topology:
 ```mermaid
 graph TD; 
 	0-->2;
 	0-->3;
 	1-->2;
 	2-->4;
 	3-->1;
 	4-->1;
 ```
--- a/eth_consensus/compositions/composition.toml
+++ b/eth_consensus/compositions/composition.toml
@@ -0,0 +1,20 @@
 [metadata]
 name = "eth_consensus"
 [global]
  builder = "docker:generic"
  plan = "gossipsub-testground/eth_consensus"
  case = "eth_consensus"
  runner = "local:docker"
  [global.build_config]
    path = "./"
 [[groups]]
  id = "main-instances"
  instances = { count = 3 }
  [groups.run]
  # Check the plan (./eth_consensus/manifest.toml) to understand the meaning of each param
  test_params = { seed = "40", no_val_percentage = "60", total_validators = "200", min_peers_per_node = "2", max_peers_per_node_inclusive = "3", run = "120" }
--- a/eth_consensus/dashboards/BasicGossipsub.json
+++ b/eth_consensus/dashboards/BasicGossipsub.json
--- a/eth_consensus/manifest.toml
+++ b/eth_consensus/manifest.toml
@@ -0,0 +1,23 @@
 name = "eth_consensus"
 [defaults]
 builder = "docker:generic"
 runner = "local:docker"
 disable_metrics = false
 [builders."docker:generic"]
 enabled = true
 [runners."local:docker"]
 enabled = true
 [[testcases]]
 name = "eth_consensus"
 instances = { min = 3, max = 100, default = 3 }
  [testcases.params]
  seed = { type = "int", desc = "Seed to use for the rng", default = 40 }
  no_val_percentage = { type = "int", desc = "% of nodes without vals", default = 60 }
  total_validators = { type = "int", desc = "number of validators", default = 200 }
  min_peers_per_node = { type = "int", desc = "minimum number of peers to connect to", default = 2 }
  max_peers_per_node_inclusive = { type = "int", desc = "maximum number of peers to connect to", default = 3 }
  run = { type = "int", desc = "Time to run the emulation", default=120, unit="sec" }
--- a/eth_consensus/simulation/Cargo.toml
+++ b/eth_consensus/simulation/Cargo.toml
@@ -0,0 +1,24 @@
 [package]
 name = "simulation"
 version = "0.1.0"
 edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
 chrono = { version = "0.4.19", features = [ "std" ]}
 delay_map = "0.1.1"
 # libp2p = { version = "0.48.0", default-features = false, features = ["gossipsub", "dns-tokio", "tcp-tokio", "noise", "mplex", "yamux", "serde"] }
 prometheus-client = { git = "https://github.com/prometheus/client_rust.git", rev = "682b24ee8c6c857b76c0683b1dd7df5a97b75c27", features = ["protobuf"] }
 libp2p = { git = "https://github.com/ackintosh/rust-libp2p.git", branch = "prometheus-ptotobuf-support", default-features = false, features = ["gossipsub", "dns-tokio", "tcp-tokio", "noise", "mplex", "yamux", "serde"] }
 serde_json = "1.0"
 serde = "1.0"
 testground = "0.4.0"
 tokio = { version = "1.21.2", features = ["macros"] }
 tracing = "0.1.35"
 tracing-subscriber = { version = "0.3.14", features = ["env-filter"] }
 gen_topology = { path = "../utils/gen_topology" }
 futures = "0.3.24"
 npg = { git = "https://github.com/sigp/eth-npg", branch = "timing-change" }
 sha2 = "0.10.6"
--- a/eth_consensus/simulation/src/main.rs
+++ b/eth_consensus/simulation/src/main.rs
@@ -0,0 +1,60 @@
 mod node_run;
 mod utils;
 use crate::utils::publish_and_collect;
 use libp2p::identity::Keypair;
 use libp2p::multiaddr::Protocol;
 use libp2p::{Multiaddr, PeerId};
 use serde::{Deserialize, Serialize};
 use testground::client::Client;
 use tracing::info;
 use std::collections::HashMap;
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
    if let Ok(env_filter) = tracing_subscriber::EnvFilter::try_from_default_env() {
        tracing_subscriber::fmt().with_env_filter(env_filter).init();
    }
    let client = Client::new_and_init().await?;
    let local_key = Keypair::generate_ed25519();
    let peer_id = PeerId::from(local_key.public());
    let multiaddr = {
        let mut multiaddr = Multiaddr::from(
            client
                .run_parameters()
                .data_network_ip()?
                .expect("Should have an IP address for the data network"),
        );
        multiaddr.push(Protocol::Tcp(9000));
        multiaddr
    };
    // The network definition starts at 0 and the testground sequences start at 1, so adjust
    // accordingly.
    let node_id = client.global_seq() as usize - 1;
    info!("THIS IS MY NUMBER {node_id}");
    let instance_info = InstanceInfo { peer_id, multiaddr };
    let participants = {
        let infos =
            publish_and_collect("node_info", &client, (node_id, instance_info.clone())).await?;
        info!("Found {}", infos.len());
        infos
            .into_iter()
            .filter(|(other_node_id, _)| *other_node_id != node_id)
            .collect::<HashMap<usize, InstanceInfo>>()
    };
    node_run::run(client, node_id, instance_info, participants, local_key).await?;
    Ok(())
 }
 #[derive(Clone, Debug, Serialize, Deserialize)]
 struct InstanceInfo {
    peer_id: PeerId,
    multiaddr: Multiaddr,
 }
--- a/eth_consensus/simulation/src/node_run.rs
+++ b/eth_consensus/simulation/src/node_run.rs
@@ -0,0 +1,665 @@
 use crate::utils::{
    queries_for_counter, queries_for_counter_join, queries_for_gauge, queries_for_histogram,
    record_instance_info, BARRIER_LIBP2P_READY, BARRIER_TOPOLOGY_READY,
 };
 use crate::InstanceInfo;
 use chrono::{DateTime, Utc};
 use gen_topology::Params;
 use libp2p::core::muxing::StreamMuxerBox;
 use libp2p::core::upgrade::{SelectUpgrade, Version};
 use libp2p::dns::TokioDnsConfig;
 use libp2p::futures::StreamExt;
 use libp2p::gossipsub::metrics::Config;
 use libp2p::gossipsub::subscription_filter::AllowAllSubscriptionFilter;
 use libp2p::gossipsub::{
    Gossipsub, GossipsubConfigBuilder, GossipsubEvent, GossipsubMessage, IdentTopic,
    IdentityTransform, MessageAuthenticity, MessageId, PeerScoreParams, PeerScoreThresholds,
    Topic as GossipTopic, ValidationMode,
 };
 use libp2p::identity::Keypair;
 use libp2p::mplex::MplexConfig;
 use libp2p::noise::NoiseConfig;
 use libp2p::swarm::{SwarmBuilder, SwarmEvent};
 use libp2p::tcp::{GenTcpConfig, TokioTcpTransport};
 use libp2p::yamux::YamuxConfig;
 use libp2p::PeerId;
 use libp2p::Swarm;
 use libp2p::Transport;
 use npg::slot_generator::{Subnet, ValId};
 use npg::{Generator, Message};
 use prometheus_client::encoding::proto::openmetrics_data_model::MetricSet;
 use prometheus_client::encoding::proto::EncodeMetric;
 use prometheus_client::registry::Registry;
 use serde::{Deserialize, Serialize};
 use sha2::{Digest, Sha256};
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 use testground::client::Client;
 use tokio::time::{interval, Interval};
 use tracing::{debug, error, info};
 const ATTESTATION_SUBNETS: u64 = 4;
 const SYNC_SUBNETS: u64 = 4;
 const SLOTS_PER_EPOCH: u64 = 2;
 const SLOT_DURATION: u64 = 12;
 #[derive(Serialize, Deserialize, Clone, Debug)]
 enum Topic {
    Blocks,
    Attestations(u64),
    Aggregates(u64),
    SyncMessages(u64),
    SignedContributionAndProof(u64),
 }
 impl From<Topic> for IdentTopic {
    fn from(t: Topic) -> Self {
        let rep = serde_json::to_string(&t).expect("json serialization of topics never fails");
        GossipTopic::new(rep)
    }
 }
 impl From<IdentTopic> for Topic {
    fn from(t: IdentTopic) -> Self {
        let repr = t.hash().into_string();
        serde_json::from_str(&repr).expect("json deserialization of topics never fails")
    }
 }
 pub(crate) fn parse_params(
    instance_count: usize,
    instance_params: HashMap<String, String>,
 ) -> Result<(Duration, Params), Box<dyn std::error::Error>> {
    let seed = instance_params
        .get("seed")
        .ok_or("seed is not specified.")?
        .parse::<u64>()?;
    let no_val_percentage = instance_params
        .get("no_val_percentage")
        .ok_or("`no_val_percentage` is not specified")?
        .parse::<usize>()?
        .min(100);
    let total_validators = instance_params
        .get("total_validators")
        .ok_or("`total_validators` not specified")?
        .parse::<usize>()
        .map_err(|e| format!("Error reading total_validators {}", e))?;
    let min_peers_per_node = instance_params
        .get("min_peers_per_node")
        .ok_or("`min_peers_per_node` not specified")?
        .parse::<usize>()?;
    let max_peers_per_node_inclusive = instance_params
        .get("max_peers_per_node_inclusive")
        .ok_or("`max_peers_per_node_inclusive` not specified")?
        .parse::<usize>()?;
    let total_nodes_without_vals = instance_count * no_val_percentage / 100;
    let total_nodes_with_vals = instance_count - total_nodes_without_vals;
    let run = instance_params
        .get("run")
        .ok_or("run is not specified.")?
        .parse::<u64>()?;
    let params = Params::new(
        seed,
        total_validators,
        total_nodes_with_vals,
        total_nodes_without_vals,
        min_peers_per_node,
        max_peers_per_node_inclusive,
    )?;
    Ok((Duration::from_secs(run), params))
 }
 pub(crate) async fn run(
    client: Client,
    node_id: usize,
    instance_info: InstanceInfo,
    participants: HashMap<usize, InstanceInfo>,
    keypair: Keypair,
 ) -> Result<(), Box<dyn std::error::Error>> {
    let test_instance_count = client.run_parameters().test_instance_count;
    let (run_duration, params) = parse_params(
        test_instance_count as usize,
        client.run_parameters().test_instance_params,
    )?;
    let (params, outbound_peers, validator_assignments) =
        gen_topology::Network::generate(params)?.destructure();
    info!(
        "Running with params {params:?} and {} participants",
        participants.len()
    );
    let validator_set = validator_assignments
        .get(&node_id)
        .cloned()
        .unwrap_or_default();
    info!("[{}] Validators on this node: {:?}", node_id, validator_set);
    let validator_set: HashSet<ValId> =
        validator_set.into_iter().map(|v| ValId(v as u64)).collect();
    record_instance_info(
        &client,
        node_id,
        &instance_info.peer_id,
        &client.run_parameters().test_run,
    )
    .await?;
    let registry: Registry<Box<dyn EncodeMetric>> = Registry::default();
    let mut network = Network::new(
        registry,
        keypair,
        node_id,
        instance_info,
        participants.clone(),
        client.clone(),
        validator_set,
        params,
    );
    client
        .signal_and_wait(BARRIER_TOPOLOGY_READY, test_instance_count)
        .await?;
    // Set up the listening address
    network.start_libp2p().await;
    client
        .signal_and_wait(BARRIER_LIBP2P_READY, test_instance_count)
        .await?;
    // Dial the designated outbound peers
    network.dial_peers(outbound_peers).await;
    client
        .signal_and_wait(
            BARRIER_TOPOLOGY_READY,
            client.run_parameters().test_instance_count,
        )
        .await?;
    if let Err(e) = network.subscribe_topics() {
        error!("[{}] Failed to subscribe to topics {e}", network.node_id);
    };
    network.run_sim(run_duration).await;
    client.record_success().await?;
    Ok(())
 }
 /// Set up an encrypted TCP transport over the Mplex and Yamux protocols.
 fn build_transport(keypair: &Keypair) -> libp2p::core::transport::Boxed<(PeerId, StreamMuxerBox)> {
    let transport = TokioDnsConfig::system(TokioTcpTransport::new(
        GenTcpConfig::default().nodelay(true),
    ))
    .expect("DNS config");
    let noise_keys = libp2p::noise::Keypair::<libp2p::noise::X25519Spec>::new()
        .into_authentic(keypair)
        .expect("Signing libp2p-noise static DH keypair failed.");
    transport
        .upgrade(Version::V1)
        .authenticate(NoiseConfig::xx(noise_keys).into_authenticated())
        .multiplex(SelectUpgrade::new(
            YamuxConfig::default(),
            MplexConfig::default(),
        ))
        .timeout(Duration::from_secs(20))
        .boxed()
 }
 // A context struct for passing information into the `record_metrics` function that can be spawned
 // into its own task.
 struct RecordMetricsInfo {
    client: Arc<Client>,
    metrics: MetricSet,
    node_id: usize,
    instance_info: InstanceInfo,
    current: DateTime<Utc>,
 }
 pub(crate) struct Network {
    /// Libp2p2 swarm.
    swarm: Swarm<Gossipsub>,
    /// Node id for this node, local to the test run.
    node_id: usize,
    /// This nodes contact info.
    instance_info: InstanceInfo,
    /// Metrics registry.
    registry: Registry<Box<dyn EncodeMetric>>,
    /// Information of every other participant in the network, indexed by their (local to the test
    /// run) node_id.
    participants: HashMap<usize, InstanceInfo>,
    /// Testground client.
    client: Arc<Client>,
    /// Chronos time reported by testground as the start of the test run.
    start_time: DateTime<Utc>,
    /// Instant in which the simmulation starts running, according to the local time.
    local_start_time: Instant,
    /// How often metrics are recorded.
    metrics_interval: Interval,
    /// Generator of messages per slot.
    messages_gen: Generator,
 }
 impl Network {
    #[allow(clippy::too_many_arguments)]
    fn new(
        mut registry: Registry<Box<dyn EncodeMetric>>,
        keypair: Keypair,
        node_id: usize,
        instance_info: InstanceInfo,
        participants: HashMap<usize, InstanceInfo>,
        client: Client,
        validator_set: HashSet<ValId>,
        params: Params,
    ) -> Self {
        let gossipsub = {
            let gossip_message_id = move |message: &GossipsubMessage| {
                MessageId::from(
                    &Sha256::digest([message.topic.as_str().as_bytes(), &message.data].concat())
                        [..20],
                )
            };
            let gossipsub_config = GossipsubConfigBuilder::default()
                .max_transmit_size(10 * 1_048_576) // gossip_max_size(true)
                // .heartbeat_interval(Duration::from_secs(1))
                .prune_backoff(Duration::from_secs(60))
                .mesh_n(8)
                .mesh_n_low(4)
                .mesh_n_high(12)
                .gossip_lazy(6)
                .fanout_ttl(Duration::from_secs(60))
                .history_length(12)
                .max_messages_per_rpc(Some(500)) // Responses to IWANT can be quite large
                .history_gossip(3)
                // .validate_messages() // TODO: Reintroduce message validation delays
                .validation_mode(ValidationMode::Anonymous)
                .duplicate_cache_time(Duration::from_secs(SLOT_DURATION * SLOTS_PER_EPOCH + 1))
                .message_id_fn(gossip_message_id)
                .allow_self_origin(true)
                .build()
                .expect("valid gossipsub configuration");
            let mut gs = Gossipsub::new_with_subscription_filter_and_transform(
                MessageAuthenticity::Anonymous,
                gossipsub_config,
                Some((&mut registry, Config::default())),
                AllowAllSubscriptionFilter {},
                IdentityTransform {},
            )
            .expect("Valid configuration");
            // Setup the scoring system.
            let peer_score_params = PeerScoreParams::default();
            gs.with_peer_score(peer_score_params, PeerScoreThresholds::default())
                .expect("Valid score params and thresholds");
            gs
        };
        let swarm = SwarmBuilder::new(
            build_transport(&keypair),
            gossipsub,
            PeerId::from(keypair.public()),
        )
        .executor(Box::new(|future| {
            tokio::spawn(future);
        }))
        .build();
        info!(
            "[{}] running with {} validators",
            node_id,
            validator_set.len()
        );
        let genesis_slot = 0;
        let genesis_duration = Duration::ZERO;
        let slot_duration = Duration::from_secs(SLOT_DURATION);
        let slots_per_epoch = SLOTS_PER_EPOCH;
        let sync_subnet_size = 2;
        let target_aggregators = 14;
        let messages_gen = Generator::builder()
            .slot_clock(genesis_slot, genesis_duration, slot_duration)
            .slots_per_epoch(slots_per_epoch)
            .sync_subnet_size(sync_subnet_size)
            .sync_committee_subnets(SYNC_SUBNETS)
            .total_validators(params.total_validators() as u64)
            .target_aggregators(target_aggregators)
            .attestation_subnets(ATTESTATION_SUBNETS)
            .build(validator_set)
            .expect("need to adjust these params");
        let start_time: DateTime<Utc> =
            DateTime::parse_from_rfc3339(&client.run_parameters().test_start_time)
                .expect("Correct time date format from testground")
                .into();
        let local_start_time = Instant::now();
        Network {
            swarm,
            node_id,
            instance_info,
            participants,
            client: Arc::new(client),
            metrics_interval: interval(slot_duration / 3),
            messages_gen,
            start_time,
            local_start_time,
            registry,
        }
    }
    async fn start_libp2p(&mut self) {
        self.swarm
            .listen_on(self.instance_info.multiaddr.clone())
            .expect("Swarm starts listening");
        match self.swarm.next().await.unwrap() {
            SwarmEvent::NewListenAddr { address, .. } => {
                assert_eq!(address, self.instance_info.multiaddr)
            }
            e => panic!("Unexpected event {:?}", e),
        };
    }
    // Generates the necessary amount of information to record metrics.
    fn record_metrics_info(&self) -> RecordMetricsInfo {
        // Encode the metrics to an instance of the OpenMetrics protobuf format.
        // https://github.com/OpenObservability/OpenMetrics/blob/main/proto/openmetrics_data_model.proto
        let metrics = prometheus_client::encoding::proto::encode(&self.registry);
        let elapsed = chrono::Duration::from_std(self.local_start_time.elapsed())
            .expect("Durations are small");
        let current = self.start_time + elapsed;
        RecordMetricsInfo {
            client: self.client.clone(),
            metrics,
            node_id: self.node_id,
            instance_info: self.instance_info.clone(),
            current,
        }
    }
    pub async fn dial_peers(
        &mut self,
        outbound_peers: std::collections::BTreeMap<usize, Vec<usize>>,
    ) {
        let mut dialed_peers = 0;
        if let Some(outbound_peers) = outbound_peers.get(&self.node_id) {
            for peer_node_id in outbound_peers {
                let InstanceInfo { peer_id, multiaddr } = self
                    .participants
                    .get(peer_node_id)
                    .unwrap_or_else(|| {
                        panic!("[{}] All outbound peers are participants of the network {peer_node_id} {:?}", self.node_id,self.participants.keys().collect::<Vec<_>>())
                    })
                    .clone();
                info!(
                    "[{}] dialing {} on {}",
                    self.node_id, peer_node_id, multiaddr
                );
                if let Err(e) = self.swarm.dial(
                    libp2p::swarm::dial_opts::DialOpts::peer_id(peer_id)
                        .addresses(vec![multiaddr])
                        .build(),
                ) {
                    panic!(
                        "[{}] Dialing -> {} failed {}",
                        self.node_id, peer_node_id, e
                    );
                }
                dialed_peers += 1;
            }
        }
        info!("[{}] dialed {} peers", self.node_id, dialed_peers);
    }
    async fn run_sim(&mut self, run_duration: Duration) {
        let deadline = tokio::time::sleep(run_duration);
        futures::pin_mut!(deadline);
        loop {
            tokio::select! {
                _ = deadline.as_mut() => {
                    // Sim complete
                    break;
                }
                Some(m) = self.messages_gen.next() => {
                    let payload = m.payload();
                    let (topic, val) = match m {
                        Message::BeaconBlock { proposer: ValId(v), slot: _ } => {
                            (Topic::Blocks, v)
                        },
                        Message::AggregateAndProofAttestation { aggregator: ValId(v), subnet: Subnet(s), slot: _ } => {
                            (Topic::Aggregates(s), v)
                        },
                        Message::Attestation { attester: ValId(v), subnet: Subnet(s), slot: _ } => {
                            (Topic::Attestations(s), v)
                        },
                        Message::SignedContributionAndProof { validator: ValId(v), subnet: Subnet(s), slot: _ } => {
                            (Topic::SignedContributionAndProof(s), v)
                        },
                        Message::SyncCommitteeMessage { validator: ValId(v), subnet: Subnet(s), slot: _ } => {
                            (Topic::SyncMessages(s), v)
                        },
                    };
                    if let Err(e) = self.publish(topic.clone(), val, &payload) {
                        error!("Failed to publish message {e} to topic {topic:?}");
                    }
                }
                // Record peer scores
                _ = self.metrics_interval.tick() => {
                    let metrics_info = self.record_metrics_info();
                    // Spawn into its own task
                    tokio::spawn(record_metrics(metrics_info));
                }
                event = self.swarm.select_next_some() => {
                    match event {
                        SwarmEvent::Behaviour(GossipsubEvent::Message { propagation_source,
                    message_id: _,
                    message,
                        }
                        ) => {
                            let src_node = self.participants.iter().find(|(_k,v)| v.peer_id == propagation_source).map(|(k,_v)| k);
                            if message.topic.as_str() == "\"Blocks\"" {
                                    info!("[{}] Received block from: {:?}, size {}", self.node_id, src_node, message.data.len());
                            }
                        }
                        _ =>  debug!("SwarmEvent: {:?}", event),
                    }
                }
            }
        }
    }
    fn publish(
        &mut self,
        topic: Topic,
        validator: u64,
        payload: &[u8],
    ) -> Result<libp2p::gossipsub::MessageId, libp2p::gossipsub::error::PublishError> {
        // simple tuples as messages
        let msg =
            serde_json::to_vec(&(validator, payload)).expect("json serialization never fails");
        if let Topic::Blocks = topic {
            info!(
                "[{}] Publishing message topic: {}, size: {}",
                self.node_id,
                IdentTopic::from(topic.clone()),
                msg.len()
            );
        }
        let ident_topic: IdentTopic = topic.into();
        self.swarm.behaviour_mut().publish(ident_topic, msg)
    }
    pub fn subscribe_topics(&mut self) -> Result<(), Box<dyn std::error::Error>> {
        // blocks, attestations and aggregates, sync messages and aggregates
        let blocks_topic: IdentTopic =
            GossipTopic::new(serde_json::to_string(&Topic::Blocks).unwrap());
        self.swarm.behaviour_mut().subscribe(&blocks_topic)?;
        for subnet_n in 0..ATTESTATION_SUBNETS {
            let attestation_subnet: IdentTopic = Topic::Attestations(subnet_n).into();
            let aggregate_subnet: IdentTopic = Topic::Aggregates(subnet_n).into();
            self.swarm.behaviour_mut().subscribe(&attestation_subnet)?;
            self.swarm.behaviour_mut().subscribe(&aggregate_subnet)?;
        }
        for subnet_n in 0..SYNC_SUBNETS {
            let sync_subnet: IdentTopic = Topic::SyncMessages(subnet_n).into();
            let sync_aggregates: IdentTopic = Topic::SignedContributionAndProof(subnet_n).into();
            self.swarm.behaviour_mut().subscribe(&sync_subnet)?;
            self.swarm.behaviour_mut().subscribe(&sync_aggregates)?;
        }
        Ok(())
    }
 }
 async fn record_metrics(info: RecordMetricsInfo) {
    let run_id = &info.client.run_parameters().test_run;
    // Encode the metrics to an instance of the OpenMetrics protobuf format.
    // https://github.com/OpenObservability/OpenMetrics/blob/main/proto/openmetrics_data_model.proto
    let metric_set = info.metrics;
    let mut queries = vec![];
    let current = info.current;
    let node_id = info.node_id;
    for family in metric_set.metric_families.iter() {
        let q = match family.name.as_str() {
            // ///////////////////////////////////
            // Metrics per known topic
            // ///////////////////////////////////
            "topic_subscription_status" => queries_for_gauge(
                &current,
                family,
                node_id,
                &info.instance_info,
                run_id,
                "status",
            ),
            "topic_peers_counts" => queries_for_gauge(
                &current,
                family,
                node_id,
                &info.instance_info,
                run_id,
                "count",
            ),
            "invalid_messages_per_topic"
            | "accepted_messages_per_topic"
            | "ignored_messages_per_topic"
            | "rejected_messages_per_topic" => {
                queries_for_counter(&current, family, node_id, &info.instance_info, run_id)
            }
            // ///////////////////////////////////
            // Metrics regarding mesh state
            // ///////////////////////////////////
            "mesh_peer_counts" => queries_for_gauge(
                &current,
                family,
                info.node_id,
                &info.instance_info,
                run_id,
                "count",
            ),
            "mesh_peer_inclusion_events" => {
                queries_for_counter(&current, family, info.node_id, &info.instance_info, run_id)
            }
            "mesh_peer_churn_events" => {
                queries_for_counter(&current, family, info.node_id, &info.instance_info, run_id)
            }
            // ///////////////////////////////////
            // Metrics regarding messages sent/received
            // ///////////////////////////////////
            "topic_msg_sent_counts"
            | "topic_msg_published"
            | "topic_msg_sent_bytes"
            | "topic_msg_recv_counts_unfiltered"
            | "topic_msg_recv_counts"
            | "topic_msg_recv_bytes" => {
                queries_for_counter(&current, family, info.node_id, &info.instance_info, run_id)
            }
            // ///////////////////////////////////
            // Metrics related to scoring
            // ///////////////////////////////////
            "score_per_mesh" => {
                queries_for_histogram(&current, family, info.node_id, &info.instance_info, run_id)
            }
            "scoring_penalties" => {
                queries_for_counter(&current, family, info.node_id, &info.instance_info, run_id)
            }
            // ///////////////////////////////////
            // General Metrics
            // ///////////////////////////////////
            "peers_per_protocol" => queries_for_gauge(
                &current,
                family,
                info.node_id,
                &info.instance_info,
                run_id,
                "peers",
            ),
            "heartbeat_duration" => {
                queries_for_histogram(&current, family, info.node_id, &info.instance_info, run_id)
            }
            // ///////////////////////////////////
            // Performance metrics
            // ///////////////////////////////////
            "topic_iwant_msgs" => {
                queries_for_counter(&current, family, info.node_id, &info.instance_info, run_id)
            }
            "memcache_misses" => {
                queries_for_counter(&current, family, info.node_id, &info.instance_info, run_id)
            }
            _ => unreachable!(),
        };
        queries.extend(q);
    }
    // We can't do joins in InfluxDB easily, so do some custom queries here to calculate
    // duplicates.
    let recvd_unfiltered = metric_set
        .metric_families
        .iter()
        .find(|family| family.name.as_str() == "topic_msg_recv_counts_unfiltered");
    if let Some(recvd_unfiltered) = recvd_unfiltered {
        let recvd = metric_set
            .metric_families
            .iter()
            .find(|family| family.name.as_str() == "topic_msg_recv_counts");
        if let Some(recvd) = recvd {
            let q = queries_for_counter_join(
                &current,
                recvd_unfiltered,
                recvd,
                "topic_msg_recv_duplicates",
                info.node_id,
                &info.instance_info,
                run_id,
                |a, b| a.saturating_sub(b),
            );
            queries.extend(q);
        }
    }
    for query in queries {
        if let Err(e) = info.client.record_metric(query).await {
            error!("Failed to record metrics: {:?}", e);
        }
    }
 }
--- a/eth_consensus/simulation/src/utils.rs
+++ b/eth_consensus/simulation/src/utils.rs
@@ -0,0 +1,261 @@
 use crate::InstanceInfo;
 use chrono::{DateTime, Local, Utc};
 use libp2p::futures::StreamExt;
 use libp2p::PeerId;
 use prometheus_client::encoding::proto::openmetrics_data_model::counter_value;
 use prometheus_client::encoding::proto::openmetrics_data_model::gauge_value;
 use prometheus_client::encoding::proto::openmetrics_data_model::metric_point;
 use prometheus_client::encoding::proto::openmetrics_data_model::Metric;
 use prometheus_client::encoding::proto::openmetrics_data_model::MetricFamily;
 use prometheus_client::encoding::proto::HistogramValue;
 use serde::de::DeserializeOwned;
 use serde::Serialize;
 use std::borrow::Cow;
 use testground::client::Client;
 use testground::WriteQuery;
 // States for `barrier()`
 pub(crate) const BARRIER_LIBP2P_READY: &str = "Started libp2p";
 pub(crate) const BARRIER_TOPOLOGY_READY: &str = "Topology generated";
 // Tags for InfluxDB
 const TAG_INSTANCE_PEER_ID: &str = "instance_peer_id";
 const TAG_INSTANCE_NAME: &str = "instance_name";
 const TAG_RUN_ID: &str = "run_id";
 /// Publish info and collect it from the participants. The return value includes one published by
 /// myself.
 pub(crate) async fn publish_and_collect<T: Serialize + DeserializeOwned>(
    topic: &'static str,
    client: &Client,
    info: T,
 ) -> Result<Vec<T>, Box<dyn std::error::Error>> {
    let instance_count = client.run_parameters().test_instance_count as usize;
    let serialized = Cow::Owned(serde_json::to_value(&info)?);
    client.publish(topic, serialized).await?;
    let mut stream = client.subscribe(topic, instance_count * 2).await;
    let mut vec: Vec<T> = Vec::with_capacity(instance_count);
    for _ in 0..instance_count {
        match stream.next().await {
            Some(Ok(other)) => {
                let info: T = serde_json::from_value(other)?;
                vec.push(info);
            }
            Some(Err(e)) => return Err(Box::new(e)),
            None => unreachable!(),
        }
    }
    Ok(vec)
 }
 /// Create InfluxDB queries for Counter metrics.
 pub(crate) fn queries_for_counter(
    datetime: &DateTime<Utc>,
    family: &MetricFamily,
    node_id: usize,
    instance_info: &InstanceInfo,
    run_id: &str,
 ) -> Vec<WriteQuery> {
    let mut queries = vec![];
    for metric in family.metrics.iter() {
        let mut query = WriteQuery::new((*datetime).into(), family.name.clone())
            .add_tag(TAG_INSTANCE_PEER_ID, instance_info.peer_id.to_string())
            .add_tag(TAG_INSTANCE_NAME, node_id.to_string())
            .add_tag(TAG_RUN_ID, run_id.to_owned())
            .add_field(
                "count",
                get_counter_value(metric).0.expect("should have int value"),
            );
        for l in &metric.labels {
            query = query.add_tag(l.name.clone(), l.value.clone());
        }
        queries.push(query);
    }
    queries
 }
 /// Create InfluxDB queries joining counter metrics
 #[allow(clippy::too_many_arguments)]
 pub(crate) fn queries_for_counter_join(
    datetime: &DateTime<Utc>,
    family1: &MetricFamily,
    family2: &MetricFamily,
    name: &str,
    node_id: usize,
    instance_info: &InstanceInfo,
    run_id: &str,
    predicate: fn(u64, u64) -> u64,
 ) -> Vec<WriteQuery> {
    let mut queries = vec![];
    for metric in family1.metrics.iter() {
        // Match on metric values
        let value = {
            let current_val = get_counter_value(metric).0.expect("should have int value");
            let other_val = family2
                .metrics
                .iter()
                .find(|m2| {
                    // match on all labels
                    let mut found = true;
                    for label in &metric.labels {
                        if !m2
                            .labels
                            .iter()
                            .any(|l| l.name == label.name && l.value == label.value)
                        {
                            found = false;
                            break;
                        }
                    }
                    found
                })
                .and_then(|m| get_counter_value(m).0);
            other_val.map(|other| predicate(current_val, other))
        };
        if let Some(val) = value {
            let mut query = WriteQuery::new((*datetime).into(), name)
                .add_tag(TAG_INSTANCE_PEER_ID, instance_info.peer_id.to_string())
                .add_tag(TAG_INSTANCE_NAME, node_id.to_string())
                .add_tag(TAG_RUN_ID, run_id.to_owned())
                .add_field("count", val);
            for l in &metric.labels {
                query = query.add_tag(l.name.clone(), l.value.clone());
            }
            queries.push(query);
        }
    }
    queries
 }
 /// Create InfluxDB queries for Gauge metrics.
 pub(crate) fn queries_for_gauge(
    datetime: &DateTime<Utc>,
    family: &MetricFamily,
    node_id: usize,
    instance_info: &InstanceInfo,
    run_id: &str,
    field_name: &str,
 ) -> Vec<WriteQuery> {
    let mut queries = vec![];
    for metric in family.metrics.iter() {
        let mut query = WriteQuery::new((*datetime).into(), family.name.clone())
            .add_tag(TAG_INSTANCE_PEER_ID, instance_info.peer_id.to_string())
            .add_tag(TAG_INSTANCE_NAME, node_id.to_string())
            .add_tag(TAG_RUN_ID, run_id.to_owned())
            .add_field(
                field_name,
                get_gauge_value(metric).0.expect("should have int value"),
            );
        for l in &metric.labels {
            query = query.add_tag(l.name.clone(), l.value.clone());
        }
        queries.push(query);
    }
    queries
 }
 /// Create InfluxDB queries for Histogram metrics.
 pub(crate) fn queries_for_histogram(
    datetime: &DateTime<Utc>,
    family: &MetricFamily,
    node_id: usize,
    instance_info: &InstanceInfo,
    run_id: &str,
 ) -> Vec<WriteQuery> {
    let mut queries = vec![];
    for metric in family.metrics.iter() {
        let histogram = get_histogram_value(metric);
        for bucket in histogram.buckets.iter() {
            let mut query = WriteQuery::new((*datetime).into(), family.name.clone())
                .add_tag(TAG_INSTANCE_PEER_ID, instance_info.peer_id.to_string())
                .add_tag(TAG_INSTANCE_NAME, node_id.to_string())
                .add_tag(TAG_RUN_ID, run_id.to_owned())
                .add_field("count", bucket.count)
                .add_field("upper_bound", bucket.upper_bound);
            for l in &metric.labels {
                query = query.add_tag(l.name.clone(), l.value.clone());
            }
            queries.push(query);
        }
    }
    queries
 }
 fn get_gauge_value(metric: &Metric) -> (Option<i64>, Option<f64>) {
    assert_eq!(1, metric.metric_points.len());
    let metric_point = metric.metric_points.first().unwrap();
    let metric_point_value = metric_point.value.as_ref().unwrap().clone();
    match metric_point_value {
        metric_point::Value::GaugeValue(gauge_value) => match gauge_value.value {
            Some(gauge_value::Value::IntValue(i)) => (Some(i), None),
            Some(gauge_value::Value::DoubleValue(f)) => (None, Some(f)),
            _ => unreachable!(),
        },
        _ => unreachable!(),
    }
 }
 pub(crate) fn get_counter_value(metric: &Metric) -> (Option<u64>, Option<f64>) {
    assert_eq!(1, metric.metric_points.len());
    let metric_point = metric.metric_points.first().unwrap();
    let metric_point_value = metric_point.value.as_ref().unwrap().clone();
    match metric_point_value {
        metric_point::Value::CounterValue(counter_value) => match counter_value.total {
            Some(counter_value::Total::IntValue(i)) => (Some(i), None),
            Some(counter_value::Total::DoubleValue(f)) => (None, Some(f)),
            _ => unreachable!(),
        },
        _ => unreachable!(),
    }
 }
 fn get_histogram_value(metric: &Metric) -> HistogramValue {
    assert_eq!(1, metric.metric_points.len());
    let metric_point = metric.metric_points.first().unwrap();
    let metric_point_value = metric_point.value.as_ref().unwrap().clone();
    match metric_point_value {
        metric_point::Value::HistogramValue(histogram_value) => histogram_value,
        _ => unreachable!(),
    }
 }
 /// Record an InstanceInfo to InfluxDB. This is useful on Grafana dashboard.
 pub(crate) async fn record_instance_info(
    client: &Client,
    node_id: usize,
    peer_id: &PeerId,
    run_id: &str,
 ) -> Result<(), testground::errors::Error> {
    let query = WriteQuery::new(Local::now().into(), "participants")
        .add_tag(TAG_RUN_ID, run_id.to_owned())
        // Add below as "field" not tag, because in InfluxQL, SELECT clause can't specify only tag.
        // https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#select-clause
        // > The SELECT clause must specify at least one field when it includes a tag.
        .add_field(TAG_INSTANCE_NAME, node_id.to_string())
        .add_field(TAG_INSTANCE_PEER_ID, peer_id.to_string());
    client.record_metric(query).await
 }
--- a/eth_consensus/utils/gen_topology/Cargo.toml
+++ b/eth_consensus/utils/gen_topology/Cargo.toml
@@ -0,0 +1,10 @@
 [package]
 name = "gen_topology"
 version = "0.1.0"
 edition = "2021"
 [dependencies]
 rand = "0.8.5"
 rand_chacha = "0.3.1"
 serde = { workspace = true, features = [ "derive" ]}
--- a/eth_consensus/utils/gen_topology/src/lib.rs
+++ b/eth_consensus/utils/gen_topology/src/lib.rs
@@ -0,0 +1,280 @@
 use std::collections::{BTreeMap, BTreeSet, HashSet};
 use std::ops::RangeInclusive;
 use rand::seq::SliceRandom;
 use rand::Rng;
 use rand::SeedableRng;
 use rand_chacha::ChaCha8Rng;
 use serde::{Deserialize, Serialize};
 pub type NodeId = usize;
 pub type ValId = usize;
 #[derive(Serialize, Deserialize, Debug)]
 pub struct Params {
    seed: u64,
    total_validators: usize,
    total_nodes_with_vals: usize,
    total_nodes: usize,
    connections_range: RangeInclusive<usize>,
 }
 impl Params {
    pub fn new(
        seed: u64,
        total_validators: usize,
        total_nodes_with_vals: usize,
        total_nodes_without_vals: usize,
        min_peers_per_node: usize,
        max_peers_per_node_inclusive: usize,
    ) -> Result<Self, &'static str> {
        let total_nodes = total_nodes_with_vals + total_nodes_without_vals;
        if total_nodes_with_vals > total_nodes {
            return Err("bad number of nodes with validators");
        }
        if total_nodes == 0 {
            return Err("Empty network");
        }
        if total_validators == 0 {
            return Err("no validators in the network");
        }
        let connections_range = min_peers_per_node..=max_peers_per_node_inclusive;
        if connections_range.is_empty() {
            return Err("bad connection range");
        }
        Ok(Self {
            seed,
            total_validators,
            total_nodes_with_vals,
            total_nodes,
            connections_range,
        })
    }
    pub fn seed(&self) -> u64 {
        self.seed
    }
    pub fn total_validators(&self) -> usize {
        self.total_validators
    }
    pub fn total_nodes_with_vals(&self) -> usize {
        self.total_nodes_with_vals
    }
    pub fn total_nodes(&self) -> usize {
        self.total_nodes
    }
    pub fn connections_range(&self) -> std::ops::RangeInclusive<usize> {
        self.connections_range.clone()
    }
 }
 #[derive(Serialize, Deserialize, Debug)]
 pub struct Network {
    /// Params used to generate this network configuration. Stored to allow reproduction.
    params: Params,
    /// Validators managed by each node.
    validator_assignments: BTreeMap<NodeId, BTreeSet<ValId>>,
    /// Peers to connect to for each node.
    outbound_peers: BTreeMap<NodeId, Vec<NodeId>>,
 }
 impl Network {
    fn new(
        params: Params,
        validator_assignments: BTreeMap<NodeId, BTreeSet<ValId>>,
        outbound_peers: BTreeMap<NodeId, Vec<NodeId>>,
    ) -> Result<Self, String> {
        let total_vals: usize = validator_assignments
            .values()
            .map(|validator_list| validator_list.len())
            .sum();
        //
        // validator assignment checks
        //
        // first check that under possibility of repetition the number of validators is right
        if total_vals != params.total_validators {
            return Err(format!("the number of assigned validators does not match the expected number. Found {}, expected {}", total_vals, params.total_validators));
        }
        let assigned_vals: HashSet<&ValId> = validator_assignments
            .values()
            .flat_map(|validator_list| validator_list.iter())
            .collect();
        if assigned_vals.len() != total_vals {
            return Err("a validator id was assigned more than once".to_string());
        }
        if assigned_vals.iter().max().expect("total_validators > 0")
            != &&(params.total_validators - 1)
        {
            return Err("validator ids do not cover the expected range (wrong max)".to_string());
        }
        if assigned_vals.iter().min().expect("total_validators > 0") != &&0 {
            return Err("validator ids do not cover the expected range (wrong min)".to_string());
        }
        //
        // topology checks
        //
        let connected_peers: HashSet<NodeId> = outbound_peers
            .iter()
            .flat_map(|(peer_a, dialed_peers)| dialed_peers.iter().chain(Some(peer_a)))
            .cloned()
            .collect();
        let expected_peers: HashSet<usize> = (0..params.total_nodes).collect();
        if connected_peers != expected_peers {
            return Err(format!(
                "set of dialed peers and expected peers differ: missing {:?}",
                expected_peers.difference(&connected_peers)
            ));
        }
        println!("Connectedness should be checked with an external tool!");
        Ok(Self {
            params,
            validator_assignments,
            outbound_peers,
        })
    }
    pub fn generate(params: Params) -> Result<Self, String> {
        // Use a deterministic pseudo random number generator
        let mut gen = ChaCha8Rng::seed_from_u64(params.seed);
        let validator_assignments = {
            // Assing validators to each node that has any validator at all
            let mut all_validators = (0..params.total_validators).collect::<Vec<_>>();
            let mut cuts: Vec<_> = all_validators
                .choose_multiple(&mut gen, params.total_nodes_with_vals - 1)
                .cloned()
                .collect();
            cuts.push(params.total_validators);
            cuts.push(0);
            all_validators.shuffle(&mut gen);
            cuts.sort();
            let validator_assignments: BTreeMap<NodeId, BTreeSet<ValId>> = cuts
                .windows(2)
                .enumerate()
                .map(|(node_id, current_cut)| {
                    // NOTE: this means the nodes that have validators are the first
                    // `params.total_nodes_with_vals`. Since the node_id is an abstract construct not
                    // used anywhere I don't think it's worth randomizing this part
                    let start = current_cut[0];
                    let end = current_cut[1];
                    let assigned_vals: BTreeSet<ValId> =
                        all_validators[start..end].iter().cloned().collect();
                    (node_id, assigned_vals)
                })
                .collect();
            validator_assignments
        };
        let outbound_peers = {
            let mut outbound_peers: BTreeMap<NodeId, Vec<NodeId>> = BTreeMap::default();
            // First build the set of all possible connections (a,b) ignoring connection direction
            let mut all_connections: Vec<(usize, usize)> = (0..params.total_nodes)
                .flat_map(|i| (i + 1..params.total_nodes).map(move |j| (i, j)))
                .collect();
            // Keep track of all connections (inbound and outbound) per peer.
            // BTreeSet is useful for debugging with consistent order.
            type IsOutbound = bool;
            let mut topology = BTreeMap::<NodeId, (usize, BTreeMap<NodeId, IsOutbound>)>::default();
            let connections_range = params.connections_range();
            // for each node_id, generate a random expected number of connections within the given
            // range
            for p in 0..params.total_nodes {
                // decide how many connections should the node have.
                let num_peers = gen.gen_range(connections_range.clone());
                // store the expected number of connections and a default map to keep track of the
                // added connections.
                topology.insert(p, (num_peers, BTreeMap::default()));
            }
            // Pick a random connection and add it if it's useful.
            all_connections.shuffle(&mut gen);
            while let Some((a, b)) = all_connections.pop() {
                let (expected, current) = topology.get(&a).unwrap();
                if current.len() >= *expected {
                    continue;
                }
                let (expected, current) = topology.get(&b).unwrap();
                if current.len() >= *expected {
                    continue;
                }
                let from_a_to_b = gen.gen_ratio(3, 5);
                topology.get_mut(&a).unwrap().1.insert(b, from_a_to_b);
                topology.get_mut(&b).unwrap().1.insert(a, !from_a_to_b);
                if from_a_to_b {
                    outbound_peers.entry(a).or_default().push(b);
                } else {
                    outbound_peers.entry(b).or_default().push(a);
                }
            }
            if topology
                .values()
                .any(|(_expected_connections, connections)| {
                    !connections_range.contains(&connections.len())
                })
            {
                // The _expected_connections number might not be reached for a few nodes. We really
                // care about the number of connections being withing the set range. I haven't seen
                // this happen so far.
                eprintln!("Some nodes didn't reach the expected number of connections");
            }
            outbound_peers
        };
        Network::new(params, validator_assignments, outbound_peers)
            .map_err(|e| format!("Network generation failed: {e}"))
    }
    pub fn outbound_peers(&self) -> &BTreeMap<NodeId, Vec<NodeId>> {
        &self.outbound_peers
    }
    pub fn validator_assignments(&self) -> &BTreeMap<NodeId, BTreeSet<ValId>> {
        &self.validator_assignments
    }
    pub fn params(&self) -> &Params {
        &self.params
    }
    #[allow(clippy::type_complexity)]
    pub fn destructure(
        self,
    ) -> (
        Params,
        BTreeMap<NodeId, Vec<NodeId>>,
        BTreeMap<NodeId, BTreeSet<ValId>>,
    ) {
        let Network {
            params,
            validator_assignments,
            outbound_peers,
        } = self;
        (params, outbound_peers, validator_assignments)
    }
 }
--- a/eth_consensus/utils/gen_topology_files/Cargo.toml
+++ b/eth_consensus/utils/gen_topology_files/Cargo.toml
@@ -0,0 +1,11 @@
 [package]
 name = "gen_topology_files"
 version = "0.1.0"
 edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
 gen_topology = { path = "../gen_topology" }
 clap = { version = "4.0.10", features = ["derive"] }
 serde_json.workspace = true
--- a/eth_consensus/utils/gen_topology_files/mainnet2.dot
+++ b/eth_consensus/utils/gen_topology_files/mainnet2.dot
--- a/eth_consensus/utils/gen_topology_files/mainnet2.json
+++ b/eth_consensus/utils/gen_topology_files/mainnet2.json
--- a/eth_consensus/utils/gen_topology_files/src/main.rs
+++ b/eth_consensus/utils/gen_topology_files/src/main.rs
@@ -0,0 +1,65 @@
 use clap::Parser;
 extern crate gen_topology;
 use std::io::Write;
 #[derive(Parser)]
 pub struct RunParams {
    seed: u64,
    total_validators: usize,
    total_nodes_with_vals: usize,
    total_nodes: usize,
    min_peers_per_node: usize,
    max_peers_per_node_inc: usize,
    dotfile_name: String,
    config_file_name: String,
 }
 fn main() {
    if let Err(e) = gen_and_save() {
        eprintln!("{}", e);
        std::process::exit(1)
    }
 }
 fn gen_and_save() -> Result<(), String> {
    let RunParams {
        seed,
        total_validators,
        total_nodes_with_vals,
        total_nodes,
        min_peers_per_node,
        max_peers_per_node_inc,
        dotfile_name,
        config_file_name,
    } = RunParams::parse();
    let params = gen_topology::Params::new(
        seed,
        total_validators,
        total_nodes_with_vals,
        total_nodes,
        min_peers_per_node,
        max_peers_per_node_inc,
    )?;
    let network = gen_topology::Network::generate(params)?;
    // gen the dotfile
    let mut file =
        std::fs::File::create(dotfile_name).map_err(|e| format!("Failed to create dotfile {e}"))?;
    writeln!(file, "digraph {{").map_err(|e| format!("Failed writing dotfile {e}"))?;
    for (peer_a, dialed_peers) in network.outbound_peers() {
        for peer_b in dialed_peers {
            writeln!(file, "\t{peer_a} -> {peer_b};")
                .map_err(|e| format!("Failed writing dotfile {e}"))?;
        }
    }
    writeln!(file, "}}").map_err(|e| format!("Failed writing dotfile {e}"))?;
    // gen the config file
    let mut file = std::fs::File::create(config_file_name)
        .map_err(|e| format!("Failed creating network file {e}"))?;
    let network_rep = serde_json::to_string(&network)
        .map_err(|e| format!("Failed to create network file {e}"))?;
    write!(file, "{}", network_rep).map_err(|e| format!("Failed to write network file {e}"))?;
    Ok(())
 }
--- a/grafana/dashboards.yml
+++ b/grafana/dashboards.yml
@@ -13,4 +13,9 @@ providers:
   type: file
   options:
     path: /var/lib/grafana/dashboards/censoring/PeerScores.json
-
+ - name: 'Basic Gossipsub'
   orgId: 1
   folder: 'Eth Consensus'
   type: file
   options:
     path: /var/lib/grafana/dashboards/eth_consensus/BasicGossipsub.json
--- a/scripts/clear_db.sh
+++ b/scripts/clear_db.sh
@@ -0,0 +1,6 @@
 #! /bin/sh
 # This script is used to drop and re-create the InfluxDB docker container,
 # removing all past testground runs.
 docker rm -f testground-influxdb
 testground healthcheck --runner local:docker --fix
--- a/smoke/Dockerfile
+++ b/smoke/Dockerfile
@@ -1,6 +1,6 @@
 # This Dockerfile is for the `docker:generic` builder.
 # See https://docs.testground.ai/builder-library/docker-generic for details about the builder.
-FROM rust:1.62-bullseye as builder
+FROM rust:1.64-bullseye as builder
 WORKDIR /usr/src/test-plan
 # * `prost-build`, a dependency of `libp2p-gossipsub`, requires cmake.
@@ -16,6 +16,7 @@ RUN apt-get update && apt-get install -y cmake && apt-get install -y protobuf-co
 RUN mkdir -p ./plan/src/
 # This is a placeholder main function to build only the dependencies.
 RUN echo "fn main() { println!(\"If you see this message, you may want to clean up the target directory or the Docker build cache.\") }" > ./plan/src/main.rs
 COPY ./plan/Cargo.toml ./plan/
 RUN cd ./plan/ && cargo build --release
@@ -37,4 +38,4 @@ COPY --from=builder /usr/src/test-plan/plan/target/release/smoke /usr/local/bin/
 # Configure Logging
 # ENV RUST_LOG=libp2p_gossipsub=debug
-ENTRYPOINT ["smoke"]
+ENTRYPOINT ["smoke"]
--- a/smoke/manifest.toml
+++ b/smoke/manifest.toml
@@ -14,5 +14,5 @@ enabled = true
 name = "smoke"
 instances = { min = 2, max = 100, default = 2 }
-  [testcases.params]
+[testcases.params]
-  gossipsub_history_length = { type = "int", desc = "Number of heartbeats to keep in the `memcache`", default = 5 }
+gossipsub_history_length = { type = "int", desc = "Number of heartbeats to keep in the `memcache`", default = 5 }