Compare commits

...

4 Commits
dev ... plot_py

Author SHA1 Message Date
Hendrik Eeckhaut
b76775fc7c correction + legend placement 2025-12-23 15:11:35 +01:00
Hendrik Eeckhaut
72041d1f07 export dark svg 2025-12-23 14:47:19 +01:00
Hendrik Eeckhaut
ac1df8fc75 Allow plotting multiple data runs 2025-12-23 14:31:54 +01:00
Hendrik Eeckhaut
3cb7c5c0b4 Working on benchmark plots 2025-12-23 14:07:39 +01:00
10 changed files with 2375 additions and 209 deletions

1560
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -7,10 +7,9 @@ publish = false
[dependencies]
tlsn-harness-core = { workspace = true }
# tlsn-server-fixture = { workspace = true }
charming = { version = "0.5.1", features = ["ssr"] }
csv = "1.3.0"
charming = { version = "0.6.0", features = ["ssr"] }
clap = { workspace = true, features = ["derive", "env"] }
itertools = "0.14.0"
polars = { version = "0.44", features = ["csv", "lazy"] }
toml = { workspace = true }

View File

@@ -0,0 +1,111 @@
# TLSNotary Benchmark Plot Tool
Generates interactive HTML and SVG plots from TLSNotary benchmark results. Supports comparing multiple benchmark runs (e.g., before/after optimization, native vs browser).
## Usage
```bash
tlsn-harness-plot <TOML> <CSV>... [OPTIONS]
```
### Arguments
- `<TOML>` - Path to Bench.toml file defining benchmark structure
- `<CSV>...` - One or more CSV files with benchmark results
### Options
- `-l, --labels <LABEL>...` - Labels for each dataset (optional)
- If omitted, datasets are labeled "Dataset 1", "Dataset 2", etc.
- Number of labels must match number of CSV files
- `--min-max-band` - Add min/max bands to plots showing variance
- `-h, --help` - Print help information
## Examples
### Single Dataset
```bash
tlsn-harness-plot bench.toml results.csv
```
Generates plots from a single benchmark run.
### Compare Two Runs
```bash
tlsn-harness-plot bench.toml before.csv after.csv \
--labels "Before Optimization" "After Optimization"
```
Overlays two datasets to compare performance improvements.
### Multiple Datasets
```bash
tlsn-harness-plot bench.toml native.csv browser.csv wasm.csv \
--labels "Native" "Browser" "WASM"
```
Compare three different runtime environments.
### With Min/Max Bands
```bash
tlsn-harness-plot bench.toml run1.csv run2.csv \
--labels "Config A" "Config B" \
--min-max-band
```
Shows variance ranges for each dataset.
## Output Files
The tool generates two files per benchmark group:
- `<output>.html` - Interactive HTML chart (zoomable, hoverable)
- `<output>.svg` - Static SVG image for documentation
Default output filenames:
- `runtime_vs_bandwidth.{html,svg}` - When `protocol_latency` is defined in group
- `runtime_vs_latency.{html,svg}` - When `bandwidth` is defined in group
## Plot Format
Each dataset displays:
- **Solid line** - Total runtime (preprocessing + online phase)
- **Dashed line** - Online phase only
- **Shaded area** (optional) - Min/max variance bands
Different datasets automatically use distinct colors for easy comparison.
## CSV Format
Expected columns in each CSV file:
- `group` - Benchmark group name (must match TOML)
- `bandwidth` - Network bandwidth in Kbps (for bandwidth plots)
- `latency` - Network latency in ms (for latency plots)
- `time_preprocess` - Preprocessing time in ms
- `time_online` - Online phase time in ms
- `time_total` - Total runtime in ms
## TOML Format
The benchmark TOML file defines groups with either:
```toml
[[group]]
name = "my_benchmark"
protocol_latency = 50 # Fixed latency for bandwidth plots
# OR
bandwidth = 10000 # Fixed bandwidth for latency plots
```
All datasets must use the same TOML file to ensure consistent benchmark structure.
## Tips
- Use descriptive labels to make plots self-documenting
- Keep CSV files from the same benchmark configuration for valid comparisons
- Min/max bands are useful for showing stability but can clutter plots with many datasets
- Interactive HTML plots support zooming and hovering for detailed values

View File

@@ -1,17 +1,18 @@
use std::f32;
use charming::{
Chart, HtmlRenderer,
Chart, HtmlRenderer, ImageRenderer,
component::{Axis, Legend, Title},
element::{AreaStyle, LineStyle, NameLocation, Orient, TextStyle, Tooltip, Trigger},
element::{
AreaStyle, ItemStyle, LineStyle, LineStyleType, NameLocation, Orient, TextStyle, Tooltip,
Trigger,
},
series::Line,
theme::Theme,
};
use clap::Parser;
use harness_core::bench::{BenchItems, Measurement};
use itertools::Itertools;
const THEME: Theme = Theme::Default;
use harness_core::bench::BenchItems;
use polars::prelude::*;
#[derive(Parser, Debug)]
#[command(author, version, about)]
@@ -19,72 +20,131 @@ struct Cli {
/// Path to the Bench.toml file with benchmark spec
toml: String,
/// Path to the CSV file with benchmark results
csv: String,
/// Paths to CSV files with benchmark results (one or more)
csv: Vec<String>,
/// Prover kind: native or browser
#[arg(short, long, value_enum, default_value = "native")]
prover_kind: ProverKind,
/// Labels for each dataset (optional, defaults to "Dataset 1", "Dataset 2", etc.)
#[arg(short, long, num_args = 0..)]
labels: Vec<String>,
/// Add min/max bands to plots
#[arg(long, default_value_t = false)]
min_max_band: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
enum ProverKind {
Native,
Browser,
}
impl std::fmt::Display for ProverKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ProverKind::Native => write!(f, "Native"),
ProverKind::Browser => write!(f, "Browser"),
}
}
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let cli = Cli::parse();
let mut rdr = csv::Reader::from_path(&cli.csv)?;
if cli.csv.is_empty() {
return Err("At least one CSV file must be provided".into());
}
// Generate labels if not provided
let labels: Vec<String> = if cli.labels.is_empty() {
cli.csv
.iter()
.enumerate()
.map(|(i, _)| format!("Dataset {}", i + 1))
.collect()
} else if cli.labels.len() != cli.csv.len() {
return Err(format!(
"Number of labels ({}) must match number of CSV files ({})",
cli.labels.len(),
cli.csv.len()
)
.into());
} else {
cli.labels.clone()
};
// Load all CSVs and add dataset label
let mut dfs = Vec::new();
for (csv_path, label) in cli.csv.iter().zip(labels.iter()) {
let mut df = CsvReadOptions::default()
.try_into_reader_with_file_path(Some(csv_path.clone().into()))?
.finish()?;
let label_series = Series::new("dataset_label".into(), vec![label.as_str(); df.height()]);
df.with_column(label_series)?;
dfs.push(df);
}
// Combine all dataframes
let df = dfs
.into_iter()
.reduce(|acc, df| acc.vstack(&df).unwrap())
.unwrap();
let items: BenchItems = toml::from_str(&std::fs::read_to_string(&cli.toml)?)?;
let groups = items.group;
// Prepare data for plotting.
let all_data: Vec<Measurement> = rdr
.deserialize::<Measurement>()
.collect::<Result<Vec<_>, _>>()?;
for group in groups {
if group.protocol_latency.is_some() {
let latency = group.protocol_latency.unwrap();
plot_runtime_vs(
&all_data,
cli.min_max_band,
&group.name,
|r| r.bandwidth as f32 / 1000.0, // Kbps to Mbps
"Runtime vs Bandwidth",
format!("{} ms Latency, {} mode", latency, cli.prover_kind),
"runtime_vs_bandwidth.html",
"Bandwidth (Mbps)",
)?;
// Determine which field varies in benches for this group
let benches_in_group: Vec<_> = items
.bench
.iter()
.filter(|b| b.group.as_deref() == Some(&group.name))
.collect();
if benches_in_group.is_empty() {
continue;
}
if group.bandwidth.is_some() {
let bandwidth = group.bandwidth.unwrap();
// Check which field has varying values
let bandwidth_varies = benches_in_group
.windows(2)
.any(|w| w[0].bandwidth != w[1].bandwidth);
let latency_varies = benches_in_group
.windows(2)
.any(|w| w[0].protocol_latency != w[1].protocol_latency);
let download_size_varies = benches_in_group
.windows(2)
.any(|w| w[0].download_size != w[1].download_size);
if download_size_varies {
let upload_size = group.upload_size.unwrap_or(1024);
plot_runtime_vs(
&all_data,
&df,
&labels,
cli.min_max_band,
&group.name,
|r| r.latency as f32,
"download_size",
1.0 / 1024.0, // bytes to KB
"Runtime vs Response Size",
format!("{} bytes upload size", upload_size),
"runtime_vs_download_size",
"Response Size (KB)",
true, // legend on left
)?;
} else if bandwidth_varies {
let latency = group.protocol_latency.unwrap_or(50);
plot_runtime_vs(
&df,
&labels,
cli.min_max_band,
&group.name,
"bandwidth",
1.0 / 1000.0, // Kbps to Mbps
"Runtime vs Bandwidth",
format!("{} ms Latency", latency),
"runtime_vs_bandwidth",
"Bandwidth (Mbps)",
false, // legend on right
)?;
} else if latency_varies {
let bandwidth = group.bandwidth.unwrap_or(1000);
plot_runtime_vs(
&df,
&labels,
cli.min_max_band,
&group.name,
"latency",
1.0,
"Runtime vs Latency",
format!("{} bps bandwidth, {} mode", bandwidth, cli.prover_kind),
"runtime_vs_latency.html",
format!("{} bps bandwidth", bandwidth),
"runtime_vs_latency",
"Latency (ms)",
true, // legend on left
)?;
}
}
@@ -92,83 +152,51 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
Ok(())
}
struct DataPoint {
min: f32,
mean: f32,
max: f32,
}
struct Points {
preprocess: DataPoint,
online: DataPoint,
total: DataPoint,
}
#[allow(clippy::too_many_arguments)]
fn plot_runtime_vs<Fx>(
all_data: &[Measurement],
fn plot_runtime_vs(
df: &DataFrame,
labels: &[String],
show_min_max: bool,
group: &str,
x_value: Fx,
x_col: &str,
x_scale: f32,
title: &str,
subtitle: String,
output_file: &str,
x_axis_label: &str,
) -> Result<Chart, Box<dyn std::error::Error>>
where
Fx: Fn(&Measurement) -> f32,
{
fn data_point(values: &[f32]) -> DataPoint {
let mean = values.iter().copied().sum::<f32>() / values.len() as f32;
let max = values.iter().copied().reduce(f32::max).unwrap_or_default();
let min = values.iter().copied().reduce(f32::min).unwrap_or_default();
DataPoint { min, mean, max }
}
legend_left: bool,
) -> Result<Chart, Box<dyn std::error::Error>> {
let stats_df = df
.clone()
.lazy()
.filter(col("group").eq(lit(group)))
.with_column((col(x_col).cast(DataType::Float32) * lit(x_scale)).alias("x"))
.with_columns([
(col("time_preprocess").cast(DataType::Float32) / lit(1000.0)).alias("preprocess"),
(col("time_online").cast(DataType::Float32) / lit(1000.0)).alias("online"),
(col("time_total").cast(DataType::Float32) / lit(1000.0)).alias("total"),
])
.group_by([col("x"), col("dataset_label")])
.agg([
col("preprocess").min().alias("preprocess_min"),
col("preprocess").mean().alias("preprocess_mean"),
col("preprocess").max().alias("preprocess_max"),
col("online").min().alias("online_min"),
col("online").mean().alias("online_mean"),
col("online").max().alias("online_max"),
col("total").min().alias("total_min"),
col("total").mean().alias("total_mean"),
col("total").max().alias("total_max"),
])
.sort(["dataset_label", "x"], Default::default())
.collect()?;
let stats: Vec<(f32, Points)> = all_data
.iter()
.filter(|r| r.group.as_deref() == Some(group))
.map(|r| {
(
x_value(r),
r.time_preprocess as f32 / 1000.0, // ms to s
r.time_online as f32 / 1000.0,
r.time_total as f32 / 1000.0,
)
})
.sorted_by(|a, b| a.0.partial_cmp(&b.0).unwrap())
.chunk_by(|entry| entry.0)
.into_iter()
.map(|(x, group)| {
let group_vec: Vec<_> = group.collect();
let preprocess = data_point(
&group_vec
.iter()
.map(|(_, t, _, _)| *t)
.collect::<Vec<f32>>(),
);
let online = data_point(
&group_vec
.iter()
.map(|(_, _, t, _)| *t)
.collect::<Vec<f32>>(),
);
let total = data_point(
&group_vec
.iter()
.map(|(_, _, _, t)| *t)
.collect::<Vec<f32>>(),
);
(
x,
Points {
preprocess,
online,
total,
},
)
})
.collect();
// Build legend entries
let mut legend_data = Vec::new();
for label in labels {
legend_data.push(format!("Total Mean ({})", label));
legend_data.push(format!("Online Mean ({})", label));
}
let mut chart = Chart::new()
.title(
@@ -179,14 +207,6 @@ where
.subtext_style(TextStyle::new().font_size(16)),
)
.tooltip(Tooltip::new().trigger(Trigger::Axis))
.legend(
Legend::new()
.data(vec!["Preprocess Mean", "Online Mean", "Total Mean"])
.top("80")
.right("110")
.orient(Orient::Vertical)
.item_gap(10),
)
.x_axis(
Axis::new()
.name(x_axis_label)
@@ -205,73 +225,156 @@ where
.name_text_style(TextStyle::new().font_size(21)),
);
chart = add_mean_series(chart, &stats, "Preprocess Mean", |p| p.preprocess.mean);
chart = add_mean_series(chart, &stats, "Online Mean", |p| p.online.mean);
chart = add_mean_series(chart, &stats, "Total Mean", |p| p.total.mean);
// Add legend with conditional positioning
let legend = Legend::new()
.data(legend_data)
.top("80")
.orient(Orient::Vertical)
.item_gap(10);
if show_min_max {
chart = add_min_max_band(
chart,
&stats,
"Preprocess Min/Max",
|p| &p.preprocess,
"#ccc",
);
chart = add_min_max_band(chart, &stats, "Online Min/Max", |p| &p.online, "#ccc");
chart = add_min_max_band(chart, &stats, "Total Min/Max", |p| &p.total, "#ccc");
let legend = if legend_left {
legend.left("110")
} else {
legend.right("110")
};
chart = chart.legend(legend);
// Define colors for each dataset
let colors = vec![
"#5470c6", "#91cc75", "#fac858", "#ee6666", "#73c0de", "#3ba272", "#fc8452", "#9a60b4",
];
for (idx, label) in labels.iter().enumerate() {
let color = colors.get(idx % colors.len()).unwrap();
// Total time - solid line
chart = add_dataset_series(
&chart,
&stats_df,
label,
&format!("Total Mean ({})", label),
"total_mean",
false,
color,
)?;
// Online time - dashed line (same color as total)
chart = add_dataset_series(
&chart,
&stats_df,
label,
&format!("Online Mean ({})", label),
"online_mean",
true,
color,
)?;
if show_min_max {
chart = add_dataset_min_max_band(
&chart,
&stats_df,
label,
&format!("Total Min/Max ({})", label),
"total",
color,
)?;
}
}
// Save the chart as HTML file.
// Save the chart as HTML file (no theme)
HtmlRenderer::new(title, 1000, 800)
.theme(THEME)
.save(&chart, output_file)
.save(&chart, &format!("{}.html", output_file))
.unwrap();
// Save SVG with default theme
ImageRenderer::new(1000, 800)
.theme(Theme::Default)
.save(&chart, &format!("{}.svg", output_file))
.unwrap();
// Save SVG with dark theme
ImageRenderer::new(1000, 800)
.theme(Theme::Dark)
.save(&chart, &format!("{}_dark.svg", output_file))
.unwrap();
Ok(chart)
}
fn add_mean_series(
chart: Chart,
stats: &[(f32, Points)],
name: &str,
extract: impl Fn(&Points) -> f32,
) -> Chart {
chart.series(
Line::new()
.name(name)
.data(
stats
.iter()
.map(|(x, points)| vec![*x, extract(points)])
.collect(),
)
.symbol_size(6),
)
fn add_dataset_series(
chart: &Chart,
df: &DataFrame,
dataset_label: &str,
series_name: &str,
col_name: &str,
dashed: bool,
color: &str,
) -> Result<Chart, Box<dyn std::error::Error>> {
// Filter for specific dataset
let mask = df.column("dataset_label")?.str()?.equal(dataset_label);
let filtered = df.filter(&mask)?;
let x = filtered.column("x")?.f32()?;
let y = filtered.column(col_name)?.f32()?;
let data: Vec<Vec<f32>> = x
.into_iter()
.zip(y.into_iter())
.filter_map(|(x, y)| Some(vec![x?, y?]))
.collect();
let mut line = Line::new()
.name(series_name)
.data(data)
.symbol_size(6)
.item_style(ItemStyle::new().color(color));
let mut line_style = LineStyle::new();
if dashed {
line_style = line_style.type_(LineStyleType::Dashed);
}
line = line.line_style(line_style.color(color));
Ok(chart.clone().series(line))
}
fn add_min_max_band(
chart: Chart,
stats: &[(f32, Points)],
fn add_dataset_min_max_band(
chart: &Chart,
df: &DataFrame,
dataset_label: &str,
name: &str,
extract: impl Fn(&Points) -> &DataPoint,
col_prefix: &str,
color: &str,
) -> Chart {
chart.series(
) -> Result<Chart, Box<dyn std::error::Error>> {
// Filter for specific dataset
let mask = df.column("dataset_label")?.str()?.equal(dataset_label);
let filtered = df.filter(&mask)?;
let x = filtered.column("x")?.f32()?;
let min_col = filtered.column(&format!("{}_min", col_prefix))?.f32()?;
let max_col = filtered.column(&format!("{}_max", col_prefix))?.f32()?;
let max_data: Vec<Vec<f32>> = x
.into_iter()
.zip(max_col.into_iter())
.filter_map(|(x, y)| Some(vec![x?, y?]))
.collect();
let min_data: Vec<Vec<f32>> = x
.into_iter()
.zip(min_col.into_iter())
.filter_map(|(x, y)| Some(vec![x?, y?]))
.rev()
.collect();
let data: Vec<Vec<f32>> = max_data.into_iter().chain(min_data).collect();
Ok(chart.clone().series(
Line::new()
.name(name)
.data(
stats
.iter()
.map(|(x, points)| vec![*x, extract(points).max])
.chain(
stats
.iter()
.rev()
.map(|(x, points)| vec![*x, extract(points).min]),
)
.collect(),
)
.data(data)
.show_symbol(false)
.line_style(LineStyle::new().opacity(0.0))
.area_style(AreaStyle::new().opacity(0.3).color(color)),
)
))
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,25 @@
#### Bandwidth ####
[[group]]
name = "bandwidth"
protocol_latency = 25
[[bench]]
group = "bandwidth"
bandwidth = 10
[[bench]]
group = "bandwidth"
bandwidth = 50
[[bench]]
group = "bandwidth"
bandwidth = 100
[[bench]]
group = "bandwidth"
bandwidth = 250
[[bench]]
group = "bandwidth"
bandwidth = 1000

View File

@@ -0,0 +1,37 @@
[[group]]
name = "download_size"
protocol_latency = 10
bandwidth = 200
upload-size = 2048
[[bench]]
group = "download_size"
download-size = 1024
[[bench]]
group = "download_size"
download-size = 2048
[[bench]]
group = "download_size"
download-size = 4096
[[bench]]
group = "download_size"
download-size = 8192
[[bench]]
group = "download_size"
download-size = 16384
[[bench]]
group = "download_size"
download-size = 32768
[[bench]]
group = "download_size"
download-size = 65536
[[bench]]
group = "download_size"
download-size = 131072

View File

@@ -0,0 +1,25 @@
#### Latency ####
[[group]]
name = "latency"
bandwidth = 1000
[[bench]]
group = "latency"
protocol_latency = 10
[[bench]]
group = "latency"
protocol_latency = 25
[[bench]]
group = "latency"
protocol_latency = 50
[[bench]]
group = "latency"
protocol_latency = 100
[[bench]]
group = "latency"
protocol_latency = 200