chore(harness): improve UX with progress bar, separate sweep benches (#1068)

2026-01-09 21:38:00 -05:00 · 2025-12-23 15:59:14 +02:00
parent b41d678829
commit 9dfac850d5
8 changed files with 416 additions and 43 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2051,7 +2051,7 @@ checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
 dependencies = [
 "serde",
 "termcolor",
- "unicode-width",
+ "unicode-width 0.1.14",
 ]

 [[package]]
@@ -2096,6 +2096,19 @@ dependencies = [
 "crossbeam-utils",
 ]

+[[package]]
+name = "console"
+version = "0.15.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
+dependencies = [
+ "encode_unicode",
+ "libc",
+ "once_cell",
+ "unicode-width 0.2.2",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "console_error_panic_hook"
 version = "0.1.7"
@@ -2900,6 +2913,12 @@ dependencies = [
 "zeroize",
 ]

+[[package]]
+name = "encode_unicode"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
+
 [[package]]
 name = "enum-ordinalize"
 version = "4.3.2"
@@ -3877,6 +3896,19 @@ dependencies = [
 "serde_core",
 ]

+[[package]]
+name = "indicatif"
+version = "0.17.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
+dependencies = [
+ "console",
+ "number_prefix",
+ "portable-atomic",
+ "unicode-width 0.2.2",
+ "web-time 1.1.0",
+]
+
 [[package]]
 name = "inout"
 version = "0.1.4"
@@ -5011,6 +5043,12 @@ dependencies = [
 "syn 2.0.111",
 ]

+[[package]]
+name = "number_prefix"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
+
 [[package]]
 name = "nybbles"
 version = "0.4.6"
@@ -5371,6 +5409,12 @@ dependencies = [
 "universal-hash 0.5.1",
 ]

+[[package]]
+name = "portable-atomic"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f59e70c4aef1e55797c2e8fd94a4f2a973fc972cfde0e0b05f683667b0cd39dd"
+
 [[package]]
 name = "potential_utf"
 version = "0.1.4"
@@ -7459,6 +7503,7 @@ dependencies = [
 "csv",
 "duct",
 "futures",
+ "indicatif",
 "ipnet",
 "serde_json",
 "serio",
@@ -8160,6 +8205,12 @@ version = "0.1.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"

+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
 [[package]]
 name = "unicode-xid"
 version = "0.2.6"
--- a/crates/harness/bench.toml
+++ b/crates/harness/bench.toml
@@ -1,51 +1,59 @@
-#### Latency ####
+#### Default Representative Benchmarks ####
+#
+# This benchmark measures TLSNotary performance on three representative network scenarios.
+# Each scenario is run multiple times to produce statistical metrics (median, std dev, etc.)
+# rather than plots. Use this for quick performance checks and CI regression testing.
+#
+# Payload sizes:
+#   - upload-size: 1KB (typical HTTP request)
+#   - download-size: 2KB (typical HTTP response/API data)
+#
+# Network scenarios are chosen to represent real-world user conditions where
+# TLSNotary is primarily bottlenecked by upload bandwidth.
+
+#### Cable/DSL Home Internet ####
+# Most common residential internet connection
+# - Asymmetric: high download, limited upload (typical bottleneck)
+# - Upload bandwidth: 20 Mbps (realistic cable/DSL upload speed)
+# - Latency: 20ms (typical ISP latency)

 [[group]]
-name = "latency"
-bandwidth = 1000
+name = "cable"
+bandwidth = 20
+protocol_latency = 20
+upload-size = 1024
+download-size = 2048

 [[bench]]
-group = "latency"
-protocol_latency = 10
+group = "cable"

-[[bench]]
-group = "latency"
-protocol_latency = 25
-
-[[bench]]
-group = "latency"
-protocol_latency = 50
-
-[[bench]]
-group = "latency"
-protocol_latency = 100
-
-[[bench]]
-group = "latency"
-protocol_latency = 200
-
-#### Bandwidth ####
+#### Mobile 5G ####
+# Modern mobile connection with good coverage
+# - Upload bandwidth: 30 Mbps (typical 5G upload in good conditions)
+# - Latency: 30ms (higher than wired due to mobile tower hops)

 [[group]]
-name = "bandwidth"
-protocol_latency = 25
+name = "mobile_5g"
+bandwidth = 30
+protocol_latency = 30
+upload-size = 1024
+download-size = 2048

 [[bench]]
-group = "bandwidth"
-bandwidth = 10
+group = "mobile_5g"

-[[bench]]
-group = "bandwidth"
-bandwidth = 50
+#### Fiber Home Internet ####
+# High-end residential connection (best case scenario)
+# - Symmetric: equal upload/download bandwidth
+# - Upload bandwidth: 100 Mbps (typical fiber upload)
+# - Latency: 15ms (lower latency than cable)

-[[bench]]
-group = "bandwidth"
+[[group]]
+name = "fiber"
 bandwidth = 100
+protocol_latency = 15
+upload-size = 1024
+download-size = 2048

 [[bench]]
-group = "bandwidth"
-bandwidth = 250
-
-[[bench]]
-group = "bandwidth"
-bandwidth = 1000
+group = "fiber"
--- a/crates/harness/bench_bandwidth_sweep.toml
+++ b/crates/harness/bench_bandwidth_sweep.toml
@@ -0,0 +1,52 @@
+#### Bandwidth Sweep Benchmark ####
+#
+# Measures how network bandwidth affects TLSNotary runtime.
+# Keeps latency and payload sizes fixed while varying upload bandwidth.
+#
+# Fixed parameters:
+#   - Latency: 25ms (typical internet latency)
+#   - Upload: 1KB (typical request)
+#   - Download: 2KB (typical response)
+#
+# Variable: Bandwidth from 5 Mbps to 1000 Mbps
+#
+# Use this to plot "Bandwidth vs Runtime" and understand bandwidth sensitivity.
+# Focus on upload bandwidth as TLSNotary is primarily upload-bottlenecked
+
+[[group]]
+name = "bandwidth_sweep"
+protocol_latency = 25
+upload-size = 1024
+download-size = 2048
+
+[[bench]]
+group = "bandwidth_sweep"
+bandwidth = 5
+
+[[bench]]
+group = "bandwidth_sweep"
+bandwidth = 10
+
+[[bench]]
+group = "bandwidth_sweep"
+bandwidth = 20
+
+[[bench]]
+group = "bandwidth_sweep"
+bandwidth = 50
+
+[[bench]]
+group = "bandwidth_sweep"
+bandwidth = 100
+
+[[bench]]
+group = "bandwidth_sweep"
+bandwidth = 250
+
+[[bench]]
+group = "bandwidth_sweep"
+bandwidth = 500
+
+[[bench]]
+group = "bandwidth_sweep"
+bandwidth = 1000
--- a/crates/harness/bench_download_sweep.toml
+++ b/crates/harness/bench_download_sweep.toml
@@ -0,0 +1,61 @@
+#### Download Size Sweep Benchmark ####
+#
+# Measures how download payload size affects TLSNotary runtime.
+# Keeps network conditions fixed while varying the response size.
+#
+# Fixed parameters:
+#   - Bandwidth: 100 Mbps (typical good connection)
+#   - Latency: 25ms (typical internet latency)
+#   - Upload: 1KB (typical request size)
+#
+# Variable: Download size from 1KB to 100KB
+#
+# Use this to plot "Download Size vs Runtime" and understand how much data
+# TLSNotary can efficiently notarize. Useful for determining optimal
+# chunking strategies for large responses.
+
+[[group]]
+name = "download_sweep"
+bandwidth = 100
+protocol_latency = 25
+upload-size = 1024
+
+[[bench]]
+group = "download_sweep"
+download-size = 1024
+
+[[bench]]
+group = "download_sweep"
+download-size = 2048
+
+[[bench]]
+group = "download_sweep"
+download-size = 5120
+
+[[bench]]
+group = "download_sweep"
+download-size = 10240
+
+[[bench]]
+group = "download_sweep"
+download-size = 20480
+
+[[bench]]
+group = "download_sweep"
+download-size = 30720
+
+[[bench]]
+group = "download_sweep"
+download-size = 40960
+
+[[bench]]
+group = "download_sweep"
+download-size = 51200
+
+[[bench]]
+group = "download_sweep"
+download-size = 76800
+
+[[bench]]
+group = "download_sweep"
+download-size = 102400
--- a/crates/harness/bench_latency_sweep.toml
+++ b/crates/harness/bench_latency_sweep.toml
@@ -0,0 +1,47 @@
+#### Latency Sweep Benchmark ####
+#
+# Measures how network latency affects TLSNotary runtime.
+# Keeps bandwidth and payload sizes fixed while varying protocol latency.
+#
+# Fixed parameters:
+#   - Bandwidth: 100 Mbps (typical good connection)
+#   - Upload: 1KB (typical request)
+#   - Download: 2KB (typical response)
+#
+# Variable: Protocol latency from 10ms to 200ms
+#
+# Use this to plot "Latency vs Runtime" and understand latency sensitivity.
+
+[[group]]
+name = "latency_sweep"
+bandwidth = 100
+upload-size = 1024
+download-size = 2048
+
+[[bench]]
+group = "latency_sweep"
+protocol_latency = 10
+
+[[bench]]
+group = "latency_sweep"
+protocol_latency = 25
+
+[[bench]]
+group = "latency_sweep"
+protocol_latency = 50
+
+[[bench]]
+group = "latency_sweep"
+protocol_latency = 75
+
+[[bench]]
+group = "latency_sweep"
+protocol_latency = 100
+
+[[bench]]
+group = "latency_sweep"
+protocol_latency = 150
+
+[[bench]]
+group = "latency_sweep"
+protocol_latency = 200
--- a/crates/harness/runner/Cargo.toml
+++ b/crates/harness/runner/Cargo.toml
@@ -22,6 +22,7 @@ clap = { workspace = true, features = ["derive", "env"] }
 csv = { version = "1.3" }
 duct = { version = "1" }
 futures = { workspace = true }
+indicatif = { version = "0.17" }
 ipnet = { workspace = true }
 serio = { workspace = true }
 serde_json = { workspace = true }
--- a/crates/harness/runner/src/cli.rs
+++ b/crates/harness/runner/src/cli.rs
@@ -31,10 +31,13 @@ pub enum Command {
    },
    /// runs benchmarks.
    Bench {
-        /// Configuration path.
+        /// Configuration path. Defaults to bench.toml which contains
+        /// representative scenarios (cable, 5G, fiber) for quick performance
+        /// checks. Use bench_*_sweep.toml files for parametric
+        /// analysis.
        #[arg(short, long, default_value = "bench.toml")]
        config: PathBuf,
-        /// Output file path.
+        /// Output CSV file path for detailed metrics and post-processing.
        #[arg(short, long, default_value = "metrics.csv")]
        output: PathBuf,
        /// Number of samples to measure per benchmark. This is overridden by
--- a/crates/harness/runner/src/lib.rs
+++ b/crates/harness/runner/src/lib.rs
@@ -9,7 +9,7 @@ mod ws_proxy;
 #[cfg(feature = "debug")]
 mod debug_prelude;

-use std::time::Duration;
+use std::{collections::HashMap, time::Duration};

 use anyhow::Result;
 use clap::Parser;
@@ -22,6 +22,7 @@ use harness_core::{
    rpc::{BenchCmd, TestCmd},
    test::TestStatus,
 };
+use indicatif::{ProgressBar, ProgressStyle};

 use cli::{Cli, Command};
 use executor::Executor;
@@ -32,6 +33,60 @@ use crate::debug_prelude::*;

 use crate::{cli::Route, network::Network, wasm_server::WasmServer, ws_proxy::WsProxy};

+/// Statistics for a benchmark configuration
+#[derive(Debug, Clone)]
+struct BenchStats {
+    group: Option<String>,
+    bandwidth: usize,
+    latency: usize,
+    upload_size: usize,
+    download_size: usize,
+    times: Vec<u64>,
+}
+
+impl BenchStats {
+    fn median(&self) -> f64 {
+        let mut sorted = self.times.clone();
+        sorted.sort();
+        let len = sorted.len();
+        if len == 0 {
+            return 0.0;
+        }
+        if len.is_multiple_of(2) {
+            (sorted[len / 2 - 1] + sorted[len / 2]) as f64 / 2.0
+        } else {
+            sorted[len / 2] as f64
+        }
+    }
+}
+
+/// Print summary table of benchmark results
+fn print_bench_summary(stats: &[BenchStats]) {
+    if stats.is_empty() {
+        println!("\nNo benchmark results to display (only warmup was run).");
+        return;
+    }
+
+    println!("\n{}", "=".repeat(80));
+    println!("TLSNotary Benchmark Results");
+    println!("{}", "=".repeat(80));
+    println!();
+
+    for stat in stats {
+        let group_name = stat.group.as_deref().unwrap_or("unnamed");
+        println!(
+            "{} ({} Mbps, {}ms latency, {}KB↑ {}KB↓):",
+            group_name,
+            stat.bandwidth,
+            stat.latency,
+            stat.upload_size / 1024,
+            stat.download_size / 1024
+        );
+        println!("  Median:  {:.2}s", stat.median() / 1000.0);
+        println!();
+    }
+}
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum, Default)]
 pub enum Target {
    #[default]
@@ -206,6 +261,12 @@ pub async fn main() -> Result<()> {
            samples_override,
            skip_warmup,
        } => {
+            // Print configuration info
+            println!("TLSNotary Benchmark Harness");
+            println!("Running benchmarks from: {}", config.display());
+            println!("Output will be written to: {}", output.display());
+            println!();
+
            let items: BenchItems = toml::from_str(&std::fs::read_to_string(config)?)?;
            let output_file = std::fs::File::create(output)?;
            let mut writer = WriterBuilder::new().from_writer(output_file);
@@ -220,7 +281,34 @@ pub async fn main() -> Result<()> {
            runner.exec_p.start().await?;
            runner.exec_v.start().await?;

-            for config in benches {
+            // Create progress bar
+            let pb = ProgressBar::new(benches.len() as u64);
+            pb.set_style(
+                ProgressStyle::default_bar()
+                    .template("[{elapsed_precise}] {bar:40.cyan/blue} {pos}/{len} {msg}")
+                    .expect("valid template")
+                    .progress_chars("█▓▒░ "),
+            );
+
+            // Collect measurements for stats
+            let mut measurements_by_config: HashMap<String, Vec<u64>> = HashMap::new();
+
+            let warmup_count = if skip_warmup { 0 } else { 3 };
+
+            for (idx, config) in benches.iter().enumerate() {
+                let is_warmup = idx < warmup_count;
+
+                let group_name = if is_warmup {
+                    format!("Warmup {}/{}", idx + 1, warmup_count)
+                } else {
+                    config.group.as_deref().unwrap_or("unnamed").to_string()
+                };
+
+                pb.set_message(format!(
+                    "{} ({} Mbps, {}ms)",
+                    group_name, config.bandwidth, config.protocol_latency
+                ));
+
                runner
                    .network
                    .set_proto_config(config.bandwidth, config.protocol_latency.div_ceil(2))?;
@@ -249,11 +337,73 @@ pub async fn main() -> Result<()> {
                    panic!("expected prover output");
                };

-                let measurement = Measurement::new(config, metrics);
+                // Collect metrics for stats (skip warmup benches)
+                if !is_warmup {
+                    let config_key = format!(
+                        "{:?}|{}|{}|{}|{}",
+                        config.group,
+                        config.bandwidth,
+                        config.protocol_latency,
+                        config.upload_size,
+                        config.download_size
+                    );
+                    measurements_by_config
+                        .entry(config_key)
+                        .or_default()
+                        .push(metrics.time_total);
+                }
+
+                let measurement = Measurement::new(config.clone(), metrics);

                writer.serialize(measurement)?;
                writer.flush()?;
+
+                pb.inc(1);
            }
+
+            pb.finish_with_message("Benchmarks complete");
+
+            // Compute and print statistics
+            let mut all_stats: Vec<BenchStats> = Vec::new();
+            for (key, times) in measurements_by_config {
+                // Parse back the config from the key
+                let parts: Vec<&str> = key.split('|').collect();
+                if parts.len() >= 5 {
+                    let group = if parts[0] == "None" {
+                        None
+                    } else {
+                        Some(
+                            parts[0]
+                                .trim_start_matches("Some(\"")
+                                .trim_end_matches("\")")
+                                .to_string(),
+                        )
+                    };
+                    let bandwidth: usize = parts[1].parse().unwrap_or(0);
+                    let latency: usize = parts[2].parse().unwrap_or(0);
+                    let upload_size: usize = parts[3].parse().unwrap_or(0);
+                    let download_size: usize = parts[4].parse().unwrap_or(0);
+
+                    all_stats.push(BenchStats {
+                        group,
+                        bandwidth,
+                        latency,
+                        upload_size,
+                        download_size,
+                        times,
+                    });
+                }
+            }
+
+            // Sort stats by group name for consistent output
+            all_stats.sort_by(|a, b| {
+                a.group
+                    .cmp(&b.group)
+                    .then(a.latency.cmp(&b.latency))
+                    .then(a.bandwidth.cmp(&b.bandwidth))
+            });
+
+            print_bench_summary(&all_stats);
        }
        Command::Serve {} => {
            runner.start_services().await?;