mirror of
https://github.com/paradigmxyz/reth.git
synced 2026-01-10 07:48:19 -05:00
feat(reth-bench): add gas throughput chart to python script (#17572)
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -16,6 +16,8 @@
|
||||
#
|
||||
# - A simple line graph plotting the latencies of the two files against each
|
||||
# other.
|
||||
#
|
||||
# - A gas per second (gas/s) chart showing throughput over time.
|
||||
|
||||
|
||||
import argparse
|
||||
@@ -23,25 +25,80 @@ import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
from matplotlib.ticker import FuncFormatter
|
||||
|
||||
def get_output_filename(base_path, suffix=None):
|
||||
"""Generate output filename with optional suffix."""
|
||||
if suffix is None:
|
||||
return base_path
|
||||
|
||||
# Split the base path into directory, name, and extension
|
||||
dir_name = os.path.dirname(base_path)
|
||||
base_name = os.path.basename(base_path)
|
||||
name, ext = os.path.splitext(base_name)
|
||||
|
||||
# Create new filename with suffix
|
||||
new_name = f"{name}_{suffix}{ext}"
|
||||
return os.path.join(dir_name, new_name) if dir_name else new_name
|
||||
|
||||
def format_gas_units(value, pos):
|
||||
"""Format gas values with appropriate units (gas, Kgas, Mgas, Ggas, Tgas)."""
|
||||
if value == 0:
|
||||
return '0'
|
||||
|
||||
# Define unit thresholds and labels
|
||||
units = [
|
||||
(1e12, 'Tgas'), # Teragas
|
||||
(1e9, 'Ggas'), # Gigagas
|
||||
(1e6, 'Mgas'), # Megagas
|
||||
(1e3, 'Kgas'), # Kilogas
|
||||
(1, 'gas') # gas
|
||||
]
|
||||
|
||||
abs_value = abs(value)
|
||||
for threshold, unit in units:
|
||||
if abs_value >= threshold:
|
||||
scaled_value = value / threshold
|
||||
# Format with appropriate precision
|
||||
if scaled_value >= 100:
|
||||
return f'{scaled_value:.0f}{unit}/s'
|
||||
elif scaled_value >= 10:
|
||||
return f'{scaled_value:.1f}{unit}/s'
|
||||
else:
|
||||
return f'{scaled_value:.2f}{unit}/s'
|
||||
|
||||
return f'{value:.0f}gas/s'
|
||||
|
||||
def moving_average(data, window_size):
|
||||
"""Calculate moving average with given window size."""
|
||||
if window_size <= 1:
|
||||
return data
|
||||
|
||||
# Use pandas for efficient rolling mean calculation
|
||||
series = pd.Series(data)
|
||||
return series.rolling(window=window_size, center=True, min_periods=1).mean().values
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Generate histogram of total_latency percent differences between two CSV files')
|
||||
parser.add_argument('baseline_csv', help='First CSV file, used as the baseline/control')
|
||||
parser.add_argument('comparison_csv', help='Second CSV file, which is being compared to the baseline')
|
||||
parser.add_argument('-o', '--output', default='latency.png', help='Output image file (default: latency.png)')
|
||||
parser.add_argument('--graphs', default='all', help='Comma-separated list of graphs to plot: histogram, line, all (default: all)')
|
||||
parser.add_argument('--graphs', default='all', help='Comma-separated list of graphs to plot: histogram, line, gas, all (default: all)')
|
||||
parser.add_argument('--average', type=int, metavar='N', help='Apply moving average over N blocks to smooth line and gas charts')
|
||||
parser.add_argument('--separate', action='store_true', help='Output each chart as a separate file')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse graph selection
|
||||
if args.graphs.lower() == 'all':
|
||||
selected_graphs = {'histogram', 'line'}
|
||||
selected_graphs = {'histogram', 'line', 'gas'}
|
||||
else:
|
||||
selected_graphs = set(graph.strip().lower() for graph in args.graphs.split(','))
|
||||
valid_graphs = {'histogram', 'line'}
|
||||
valid_graphs = {'histogram', 'line', 'gas'}
|
||||
invalid_graphs = selected_graphs - valid_graphs
|
||||
if invalid_graphs:
|
||||
print(f"Error: Invalid graph types: {', '.join(invalid_graphs)}. Valid options are: histogram, line, all", file=sys.stderr)
|
||||
print(f"Error: Invalid graph types: {', '.join(invalid_graphs)}. Valid options are: histogram, line, gas, all", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
@@ -62,6 +119,15 @@ def main():
|
||||
print(f"Error: 'total_latency' column not found in {args.comparison_csv}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Check for gas_used column if gas graph is selected
|
||||
if 'gas' in selected_graphs:
|
||||
if 'gas_used' not in df1.columns:
|
||||
print(f"Error: 'gas_used' column not found in {args.baseline_csv} (required for gas graph)", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
if 'gas_used' not in df2.columns:
|
||||
print(f"Error: 'gas_used' column not found in {args.comparison_csv} (required for gas graph)", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if len(df1) != len(df2):
|
||||
print("Warning: CSV files have different number of rows. Using minimum length.", file=sys.stderr)
|
||||
min_len = min(len(df1), len(df2))
|
||||
@@ -93,23 +159,35 @@ def main():
|
||||
print("Error: No valid graphs selected", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if num_plots == 1:
|
||||
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
|
||||
axes = [ax]
|
||||
# Store output filenames
|
||||
output_files = []
|
||||
|
||||
if args.separate:
|
||||
# We'll create individual figures for each graph
|
||||
pass
|
||||
else:
|
||||
fig, axes = plt.subplots(num_plots, 1, figsize=(12, 6 * num_plots))
|
||||
# Create combined figure
|
||||
if num_plots == 1:
|
||||
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
|
||||
axes = [ax]
|
||||
else:
|
||||
fig, axes = plt.subplots(num_plots, 1, figsize=(12, 6 * num_plots))
|
||||
|
||||
plot_idx = 0
|
||||
|
||||
# Plot histogram if selected
|
||||
if 'histogram' in selected_graphs:
|
||||
if args.separate:
|
||||
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
|
||||
else:
|
||||
ax = axes[plot_idx]
|
||||
|
||||
min_diff = np.floor(percent_diff.min())
|
||||
max_diff = np.ceil(percent_diff.max())
|
||||
|
||||
# Create histogram with 1% buckets
|
||||
bins = np.arange(min_diff, max_diff + 1, 1)
|
||||
|
||||
ax = axes[plot_idx]
|
||||
ax.hist(percent_diff, bins=bins, edgecolor='black', alpha=0.7)
|
||||
ax.set_xlabel('Percent Difference (%)')
|
||||
ax.set_ylabel('Number of Blocks')
|
||||
@@ -120,38 +198,151 @@ def main():
|
||||
ax.axvline(mean_diff, color='red', linestyle='--', label=f'Mean: {mean_diff:.2f}%')
|
||||
ax.axvline(median_diff, color='orange', linestyle='--', label=f'Median: {median_diff:.2f}%')
|
||||
ax.legend()
|
||||
plot_idx += 1
|
||||
|
||||
if args.separate:
|
||||
plt.tight_layout()
|
||||
output_file = get_output_filename(args.output, 'histogram')
|
||||
plt.savefig(output_file, dpi=300, bbox_inches='tight')
|
||||
output_files.append(output_file)
|
||||
plt.close(fig)
|
||||
else:
|
||||
plot_idx += 1
|
||||
|
||||
# Plot line graph if selected
|
||||
if 'line' in selected_graphs:
|
||||
if args.separate:
|
||||
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
|
||||
else:
|
||||
ax = axes[plot_idx]
|
||||
|
||||
# Determine comparison color based on median change. The median being
|
||||
# negative means processing time got faster, so that becomes green.
|
||||
comparison_color = 'green' if median_diff < 0 else 'red'
|
||||
|
||||
ax = axes[plot_idx]
|
||||
# Apply moving average if requested
|
||||
plot_latency1 = latency1[:len(percent_diff)]
|
||||
plot_latency2 = latency2[:len(percent_diff)]
|
||||
|
||||
if args.average:
|
||||
plot_latency1 = moving_average(plot_latency1, args.average)
|
||||
plot_latency2 = moving_average(plot_latency2, args.average)
|
||||
if 'block_number' in df1.columns and 'block_number' in df2.columns:
|
||||
block_numbers = df1['block_number'].values[:len(percent_diff)]
|
||||
ax.plot(block_numbers, latency1[:len(percent_diff)], 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
|
||||
ax.plot(block_numbers, latency2[:len(percent_diff)], comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
|
||||
ax.plot(block_numbers, plot_latency1, 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
|
||||
ax.plot(block_numbers, plot_latency2, comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
|
||||
ax.set_xlabel('Block Number')
|
||||
ax.set_ylabel('Total Latency (ms)')
|
||||
ax.set_title('Total Latency vs Block Number')
|
||||
title = 'Total Latency vs Block Number'
|
||||
if args.average:
|
||||
title += f' ({args.average}-block moving average)'
|
||||
ax.set_title(title)
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.legend()
|
||||
else:
|
||||
# If no block_number column, use index
|
||||
indices = np.arange(len(percent_diff))
|
||||
ax.plot(indices, latency1[:len(percent_diff)], 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
|
||||
ax.plot(indices, latency2[:len(percent_diff)], comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
|
||||
ax.plot(indices, plot_latency1, 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
|
||||
ax.plot(indices, plot_latency2, comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
|
||||
ax.set_xlabel('Block Index')
|
||||
ax.set_ylabel('Total Latency (ms)')
|
||||
ax.set_title('Total Latency vs Block Index')
|
||||
title = 'Total Latency vs Block Index'
|
||||
if args.average:
|
||||
title += f' ({args.average}-block moving average)'
|
||||
ax.set_title(title)
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.legend()
|
||||
plot_idx += 1
|
||||
|
||||
if args.separate:
|
||||
plt.tight_layout()
|
||||
output_file = get_output_filename(args.output, 'line')
|
||||
plt.savefig(output_file, dpi=300, bbox_inches='tight')
|
||||
output_files.append(output_file)
|
||||
plt.close(fig)
|
||||
else:
|
||||
plot_idx += 1
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(args.output, dpi=300, bbox_inches='tight')
|
||||
# Plot gas/s graph if selected
|
||||
if 'gas' in selected_graphs:
|
||||
if args.separate:
|
||||
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
|
||||
else:
|
||||
ax = axes[plot_idx]
|
||||
|
||||
# Calculate gas per second (gas/s)
|
||||
# latency is in microseconds, so convert to seconds for gas/s calculation
|
||||
gas1 = df1['gas_used'].values[:len(percent_diff)]
|
||||
gas2 = df2['gas_used'].values[:len(percent_diff)]
|
||||
|
||||
# Convert latency from microseconds to seconds
|
||||
latency1_sec = df1['total_latency'].values[:len(percent_diff)] / 1_000_000.0
|
||||
latency2_sec = df2['total_latency'].values[:len(percent_diff)] / 1_000_000.0
|
||||
|
||||
# Calculate gas per second
|
||||
gas_per_sec1 = gas1 / latency1_sec
|
||||
gas_per_sec2 = gas2 / latency2_sec
|
||||
|
||||
# Store original values for statistics before averaging
|
||||
original_gas_per_sec1 = gas_per_sec1.copy()
|
||||
original_gas_per_sec2 = gas_per_sec2.copy()
|
||||
|
||||
# Apply moving average if requested
|
||||
if args.average:
|
||||
gas_per_sec1 = moving_average(gas_per_sec1, args.average)
|
||||
gas_per_sec2 = moving_average(gas_per_sec2, args.average)
|
||||
|
||||
# Calculate median gas/s for color determination (use original values)
|
||||
median_gas_per_sec1 = np.median(original_gas_per_sec1)
|
||||
median_gas_per_sec2 = np.median(original_gas_per_sec2)
|
||||
comparison_color = 'green' if median_gas_per_sec2 > median_gas_per_sec1 else 'red'
|
||||
|
||||
if 'block_number' in df1.columns and 'block_number' in df2.columns:
|
||||
block_numbers = df1['block_number'].values[:len(percent_diff)]
|
||||
ax.plot(block_numbers, gas_per_sec1, 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
|
||||
ax.plot(block_numbers, gas_per_sec2, comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
|
||||
ax.set_xlabel('Block Number')
|
||||
ax.set_ylabel('Gas Throughput')
|
||||
title = 'Gas Throughput vs Block Number'
|
||||
if args.average:
|
||||
title += f' ({args.average}-block moving average)'
|
||||
ax.set_title(title)
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.legend()
|
||||
|
||||
# Format Y-axis with gas units
|
||||
formatter = FuncFormatter(format_gas_units)
|
||||
ax.yaxis.set_major_formatter(formatter)
|
||||
else:
|
||||
# If no block_number column, use index
|
||||
indices = np.arange(len(percent_diff))
|
||||
ax.plot(indices, gas_per_sec1, 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
|
||||
ax.plot(indices, gas_per_sec2, comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
|
||||
ax.set_xlabel('Block Index')
|
||||
ax.set_ylabel('Gas Throughput')
|
||||
title = 'Gas Throughput vs Block Index'
|
||||
if args.average:
|
||||
title += f' ({args.average}-block moving average)'
|
||||
ax.set_title(title)
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.legend()
|
||||
|
||||
# Format Y-axis with gas units
|
||||
formatter = FuncFormatter(format_gas_units)
|
||||
ax.yaxis.set_major_formatter(formatter)
|
||||
|
||||
if args.separate:
|
||||
plt.tight_layout()
|
||||
output_file = get_output_filename(args.output, 'gas')
|
||||
plt.savefig(output_file, dpi=300, bbox_inches='tight')
|
||||
output_files.append(output_file)
|
||||
plt.close(fig)
|
||||
else:
|
||||
plot_idx += 1
|
||||
|
||||
# Save combined figure if not using separate files
|
||||
if not args.separate:
|
||||
plt.tight_layout()
|
||||
plt.savefig(args.output, dpi=300, bbox_inches='tight')
|
||||
output_files.append(args.output)
|
||||
|
||||
# Create graph type description for output message
|
||||
graph_types = []
|
||||
@@ -159,8 +350,17 @@ def main():
|
||||
graph_types.append('histogram')
|
||||
if 'line' in selected_graphs:
|
||||
graph_types.append('latency graph')
|
||||
if 'gas' in selected_graphs:
|
||||
graph_types.append('gas/s graph')
|
||||
graph_desc = ' and '.join(graph_types)
|
||||
print(f"{graph_desc.capitalize()} saved to {args.output}")
|
||||
|
||||
# Print output file(s) information
|
||||
if args.separate:
|
||||
print(f"Saved {len(output_files)} separate files:")
|
||||
for output_file in output_files:
|
||||
print(f" - {output_file}")
|
||||
else:
|
||||
print(f"{graph_desc.capitalize()} saved to {args.output}")
|
||||
|
||||
# Always print statistics
|
||||
print(f"\nStatistics:")
|
||||
@@ -170,6 +370,15 @@ def main():
|
||||
print(f"Min: {percent_diff.min():.2f}%")
|
||||
print(f"Max: {percent_diff.max():.2f}%")
|
||||
print(f"Total blocks analyzed: {len(percent_diff)}")
|
||||
|
||||
# Print gas/s statistics if gas data is available
|
||||
if 'gas' in selected_graphs:
|
||||
# Use original values for statistics (not averaged)
|
||||
print(f"\nGas/s Statistics:")
|
||||
print(f"Baseline median gas/s: {median_gas_per_sec1:,.0f}")
|
||||
print(f"Comparison median gas/s: {median_gas_per_sec2:,.0f}")
|
||||
gas_diff_percent = ((median_gas_per_sec2 - median_gas_per_sec1) / median_gas_per_sec1) * 100
|
||||
print(f"Gas/s percent change: {gas_diff_percent:+.2f}%")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user