Allow plotting multiple data runs

This commit is contained in:
Hendrik Eeckhaut
2025-12-23 14:31:54 +01:00
parent 3cb7c5c0b4
commit ac1df8fc75
2 changed files with 244 additions and 50 deletions

View File

@@ -0,0 +1,111 @@
# TLSNotary Benchmark Plot Tool
Generates interactive HTML and SVG plots from TLSNotary benchmark results. Supports comparing multiple benchmark runs (e.g., before/after optimization, native vs browser).
## Usage
```bash
tlsn-harness-plot <TOML> <CSV>... [OPTIONS]
```
### Arguments
- `<TOML>` - Path to Bench.toml file defining benchmark structure
- `<CSV>...` - One or more CSV files with benchmark results
### Options
- `-l, --labels <LABEL>...` - Labels for each dataset (optional)
- If omitted, datasets are labeled "Dataset 1", "Dataset 2", etc.
- Number of labels must match number of CSV files
- `--min-max-band` - Add min/max bands to plots showing variance
- `-h, --help` - Print help information
## Examples
### Single Dataset
```bash
tlsn-harness-plot bench.toml results.csv
```
Generates plots from a single benchmark run.
### Compare Two Runs
```bash
tlsn-harness-plot bench.toml before.csv after.csv \
--labels "Before Optimization" "After Optimization"
```
Overlays two datasets to compare performance improvements.
### Multiple Datasets
```bash
tlsn-harness-plot bench.toml native.csv browser.csv wasm.csv \
--labels "Native" "Browser" "WASM"
```
Compare three different runtime environments.
### With Min/Max Bands
```bash
tlsn-harness-plot bench.toml run1.csv run2.csv \
--labels "Config A" "Config B" \
--min-max-band
```
Shows variance ranges for each dataset.
## Output Files
The tool generates two files per benchmark group:
- `<output>.html` - Interactive HTML chart (zoomable, hoverable)
- `<output>.svg` - Static SVG image for documentation
Default output filenames:
- `runtime_vs_bandwidth.{html,svg}` - When `protocol_latency` is defined in group
- `runtime_vs_latency.{html,svg}` - When `bandwidth` is defined in group
## Plot Format
Each dataset displays:
- **Solid line** - Total runtime (preprocessing + online phase)
- **Dashed line** - Online phase only
- **Shaded area** (optional) - Min/max variance bands
Different datasets automatically use distinct colors for easy comparison.
## CSV Format
Expected columns in each CSV file:
- `group` - Benchmark group name (must match TOML)
- `bandwidth` - Network bandwidth in Kbps (for bandwidth plots)
- `latency` - Network latency in ms (for latency plots)
- `time_preprocess` - Preprocessing time in ms
- `time_online` - Online phase time in ms
- `time_total` - Total runtime in ms
## TOML Format
The benchmark TOML file defines groups with either:
```toml
[[group]]
name = "my_benchmark"
protocol_latency = 50 # Fixed latency for bandwidth plots
# OR
bandwidth = 10000 # Fixed bandwidth for latency plots
```
All datasets must use the same TOML file to ensure consistent benchmark structure.
## Tips
- Use descriptive labels to make plots self-documenting
- Keep CSV files from the same benchmark configuration for valid comparisons
- Min/max bands are useful for showing stability but can clutter plots with many datasets
- Interactive HTML plots support zooming and hovering for detailed values

View File

@@ -3,7 +3,10 @@ use std::f32;
use charming::{
Chart, HtmlRenderer, ImageRenderer,
component::{Axis, Legend, Title},
element::{AreaStyle, LineStyle, NameLocation, Orient, TextStyle, Tooltip, Trigger},
element::{
AreaStyle, ItemStyle, LineStyle, LineStyleType, NameLocation, Orient, TextStyle, Tooltip,
Trigger,
},
series::Line,
theme::Theme,
};
@@ -19,39 +22,60 @@ struct Cli {
/// Path to the Bench.toml file with benchmark spec
toml: String,
/// Path to the CSV file with benchmark results
csv: String,
/// Paths to CSV files with benchmark results (one or more)
csv: Vec<String>,
/// Prover kind: native or browser
#[arg(short, long, value_enum, default_value = "native")]
prover_kind: ProverKind,
/// Labels for each dataset (optional, defaults to "Dataset 1", "Dataset 2", etc.)
#[arg(short, long, num_args = 0..)]
labels: Vec<String>,
/// Add min/max bands to plots
#[arg(long, default_value_t = false)]
min_max_band: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
enum ProverKind {
Native,
Browser,
}
impl std::fmt::Display for ProverKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ProverKind::Native => write!(f, "Native"),
ProverKind::Browser => write!(f, "Browser"),
}
}
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let cli = Cli::parse();
let df = CsvReadOptions::default()
.try_into_reader_with_file_path(Some(cli.csv.clone().into()))?
.finish()?;
if cli.csv.is_empty() {
return Err("At least one CSV file must be provided".into());
}
// Generate labels if not provided
let labels: Vec<String> = if cli.labels.is_empty() {
cli.csv
.iter()
.enumerate()
.map(|(i, _)| format!("Dataset {}", i + 1))
.collect()
} else if cli.labels.len() != cli.csv.len() {
return Err(format!(
"Number of labels ({}) must match number of CSV files ({})",
cli.labels.len(),
cli.csv.len()
)
.into());
} else {
cli.labels.clone()
};
// Load all CSVs and add dataset label
let mut dfs = Vec::new();
for (csv_path, label) in cli.csv.iter().zip(labels.iter()) {
let mut df = CsvReadOptions::default()
.try_into_reader_with_file_path(Some(csv_path.clone().into()))?
.finish()?;
let label_series = Series::new("dataset_label".into(), vec![label.as_str(); df.height()]);
df.with_column(label_series)?;
dfs.push(df);
}
// Combine all dataframes
let df = dfs
.into_iter()
.reduce(|acc, df| acc.vstack(&df).unwrap())
.unwrap();
let items: BenchItems = toml::from_str(&std::fs::read_to_string(&cli.toml)?)?;
let groups = items.group;
@@ -61,12 +85,13 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let latency = group.protocol_latency.unwrap();
plot_runtime_vs(
&df,
&labels,
cli.min_max_band,
&group.name,
"bandwidth",
1.0 / 1000.0, // Kbps to Mbps
"Runtime vs Bandwidth",
format!("{} ms Latency, {} mode", latency, cli.prover_kind),
format!("{} ms Latency", latency),
"runtime_vs_bandwidth",
"Bandwidth (Mbps)",
)?;
@@ -76,12 +101,13 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let bandwidth = group.bandwidth.unwrap();
plot_runtime_vs(
&df,
&labels,
cli.min_max_band,
&group.name,
"latency",
1.0,
"Runtime vs Latency",
format!("{} bps bandwidth, {} mode", bandwidth, cli.prover_kind),
format!("{} bps bandwidth", bandwidth),
"runtime_vs_latency",
"Latency (ms)",
)?;
@@ -94,6 +120,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
#[allow(clippy::too_many_arguments)]
fn plot_runtime_vs(
df: &DataFrame,
labels: &[String],
show_min_max: bool,
group: &str,
x_col: &str,
@@ -113,7 +140,7 @@ fn plot_runtime_vs(
(col("time_online").cast(DataType::Float32) / lit(1000.0)).alias("online"),
(col("time_total").cast(DataType::Float32) / lit(1000.0)).alias("total"),
])
.group_by([col("x")])
.group_by([col("x"), col("dataset_label")])
.agg([
col("preprocess").min().alias("preprocess_min"),
col("preprocess").mean().alias("preprocess_mean"),
@@ -125,9 +152,16 @@ fn plot_runtime_vs(
col("total").mean().alias("total_mean"),
col("total").max().alias("total_max"),
])
.sort(["x"], Default::default())
.sort(["dataset_label", "x"], Default::default())
.collect()?;
// Build legend entries
let mut legend_data = Vec::new();
for label in labels {
legend_data.push(format!("Total Mean ({})", label));
legend_data.push(format!("Online Mean ({})", label));
}
let mut chart = Chart::new()
.title(
Title::new()
@@ -139,7 +173,7 @@ fn plot_runtime_vs(
.tooltip(Tooltip::new().trigger(Trigger::Axis))
.legend(
Legend::new()
.data(vec!["Preprocess Mean", "Online Mean", "Total Mean"])
.data(legend_data)
.top("80")
.right("110")
.orient(Orient::Vertical)
@@ -163,20 +197,47 @@ fn plot_runtime_vs(
.name_text_style(TextStyle::new().font_size(21)),
);
chart = add_mean_series(&chart, &stats_df, "Preprocess Mean", "preprocess_mean")?;
chart = add_mean_series(&chart, &stats_df, "Online Mean", "online_mean")?;
chart = add_mean_series(&chart, &stats_df, "Total Mean", "total_mean")?;
// Add series for each dataset
// Define colors for each dataset
let colors = vec![
"#5470c6", "#91cc75", "#fac858", "#ee6666", "#73c0de", "#3ba272", "#fc8452", "#9a60b4",
];
if show_min_max {
chart = add_min_max_band(
for (idx, label) in labels.iter().enumerate() {
let color = colors.get(idx % colors.len()).unwrap();
// Total time - solid line
chart = add_dataset_series(
&chart,
&stats_df,
"Preprocess Min/Max",
"preprocess",
"#ccc",
label,
&format!("Total Mean ({})", label),
"total_mean",
false,
color,
)?;
chart = add_min_max_band(&chart, &stats_df, "Online Min/Max", "online", "#ccc")?;
chart = add_min_max_band(&chart, &stats_df, "Total Min/Max", "total", "#ccc")?;
// Online time - dashed line (same color as total)
chart = add_dataset_series(
&chart,
&stats_df,
label,
&format!("Online Mean ({})", label),
"online_mean",
true,
color,
)?;
if show_min_max {
chart = add_dataset_min_max_band(
&chart,
&stats_df,
label,
&format!("Total Min/Max ({})", label),
"total",
color,
)?;
}
}
// Save the chart as HTML file.
HtmlRenderer::new(title, 1000, 800)
@@ -192,14 +253,21 @@ fn plot_runtime_vs(
Ok(chart)
}
fn add_mean_series(
fn add_dataset_series(
chart: &Chart,
df: &DataFrame,
name: &str,
dataset_label: &str,
series_name: &str,
col_name: &str,
dashed: bool,
color: &str,
) -> Result<Chart, Box<dyn std::error::Error>> {
let x = df.column("x")?.f32()?;
let y = df.column(col_name)?.f32()?;
// Filter for specific dataset
let mask = df.column("dataset_label")?.str()?.equal(dataset_label);
let filtered = df.filter(&mask)?;
let x = filtered.column("x")?.f32()?;
let y = filtered.column(col_name)?.f32()?;
let data: Vec<Vec<f32>> = x
.into_iter()
@@ -207,21 +275,36 @@ fn add_mean_series(
.filter_map(|(x, y)| Some(vec![x?, y?]))
.collect();
Ok(chart
.clone()
.series(Line::new().name(name).data(data).symbol_size(6)))
let mut line = Line::new()
.name(series_name)
.data(data)
.symbol_size(6)
.item_style(ItemStyle::new().color(color));
let mut line_style = LineStyle::new();
if dashed {
line_style = line_style.type_(LineStyleType::Dashed);
}
line = line.line_style(line_style.color(color));
Ok(chart.clone().series(line))
}
fn add_min_max_band(
fn add_dataset_min_max_band(
chart: &Chart,
df: &DataFrame,
dataset_label: &str,
name: &str,
col_prefix: &str,
color: &str,
) -> Result<Chart, Box<dyn std::error::Error>> {
let x = df.column("x")?.f32()?;
let min_col = df.column(&format!("{}_min", col_prefix))?.f32()?;
let max_col = df.column(&format!("{}_max", col_prefix))?.f32()?;
// Filter for specific dataset
let mask = df.column("dataset_label")?.str()?.equal(dataset_label);
let filtered = df.filter(&mask)?;
let x = filtered.column("x")?.f32()?;
let min_col = filtered.column(&format!("{}_min", col_prefix))?.f32()?;
let max_col = filtered.column(&format!("{}_max", col_prefix))?.f32()?;
let max_data: Vec<Vec<f32>> = x
.into_iter()