Refactor JsonExport and effectiveness plot (#3116)

Now `JsonExport` and `plot_effectiveness.py` both live in APC crate.

Didn't move other scripts, which rely on `metrics.json` and I think it's
specific to OVM?

---------

Co-authored-by: Georg Wiese <georgwiese@gmail.com>
This commit is contained in:
Steve Wang
2025-07-31 22:38:41 +08:00
committed by GitHub
parent 4be51aa95a
commit 1ded39b9f6
8 changed files with 88 additions and 44 deletions

View File

@@ -150,6 +150,7 @@ jobs:
python3 -m venv .venv
source .venv/bin/activate
pip install -r openvm/scripts/requirements.txt
pip install -r autoprecompiles/scripts/requirements.txt
- name: Remove old results if present
run: |
@@ -160,6 +161,7 @@ jobs:
run: |
source .venv/bin/activate
bash ./openvm/scripts/run_guest_benches.sh
bash ./autoprecompiles/scripts/run_guest_benches.sh
- name: Checkout openvm-reth-benchmark
uses: actions/checkout@v4
@@ -196,7 +198,7 @@ jobs:
python ../openvm/scripts/basic_metrics.py --csv $RES_DIR/noapc.json $RES_DIR/100apc.json > $RES_DIR/basic_metrics.csv
python ../openvm/scripts/plot_trace_cells.py -o $RES_DIR/trace_cells.png $RES_DIR/100apc.json > $RES_DIR/trace_cells.txt
python ../openvm/scripts/plot_effectiveness.py $RES_DIR/apc_candidates.json --output $RES_DIR/effectiveness.png
python ../autoprecompiles/scripts/plot_effectiveness.py $RES_DIR/apc_candidates.json --output $RES_DIR/effectiveness.png
mv $RES_DIR ../results/

View File

@@ -5,20 +5,30 @@ import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import argparse
import numpy as np
def load_apc_data(json_path):
def load_apc_data(json_path, effectiveness_type='cost'):
"""Load APC candidates and compute effectiveness."""
with open(json_path, 'r') as f:
data = json.load(f)
def calculate_effectiveness(item, eff_type):
if eff_type == 'cost':
return item['cost_before'] / item['cost_after']
elif eff_type == 'main_columns':
return item['stats']['before']['main_columns'] / item['stats']['after']['main_columns']
elif eff_type == 'constraints':
return item['stats']['before']['constraints'] / item['stats']['after']['constraints']
elif eff_type == 'bus_interactions':
return item['stats']['before']['bus_interactions'] / item['stats']['after']['bus_interactions']
else:
raise ValueError(f"Unknown effectiveness type: {eff_type}")
return pd.DataFrame([{
'start_pc': item['start_pc'],
'effectiveness': item['total_width_before'] / item['total_width_after'],
'start_pc': item['original_block']['start_pc'],
'effectiveness': calculate_effectiveness(item, effectiveness_type),
'instructions': len(item['original_block']['statements']),
'software_version_cells': item['total_width_before'] * item['execution_frequency'],
'total_width_before': item['total_width_before'],
'total_width_after': item['total_width_after']
'software_version_cells': item['width_before'] * item['execution_frequency'],
'width_before': item['width_before']
} for item in data])
def format_cell_count(count):
@@ -32,16 +42,16 @@ def format_cell_count(count):
else:
return f"{count:.0f}"
def plot_effectiveness(json_path, filename=None):
def plot_effectiveness(json_path, filename=None, effectiveness_type='cost'):
"""Generate bar plot of effectiveness data."""
df = load_apc_data(json_path)
df = load_apc_data(json_path, effectiveness_type)
total_cells = df['software_version_cells'].sum()
# Print top 10 basic blocks
top10 = df.nlargest(10, 'software_version_cells')[['start_pc', 'software_version_cells', 'effectiveness', 'instructions', 'total_width_before', 'total_width_after']]
top10 = df.nlargest(10, 'software_version_cells')[['start_pc', 'software_version_cells', 'effectiveness', 'instructions', 'width_before']]
top10['software_version_cells'] = top10['software_version_cells'].apply(format_cell_count)
top10.columns = ['Start PC', 'Trace Cells', 'Effectiveness', 'Instructions', 'Width Before', 'Width After']
print("\nTop 10 Basic Blocks by Trace Cells:")
top10.columns = ['Start PC', 'Trace Cells', 'Effectiveness', 'Instructions', 'Width Before']
print(f"\nTop 10 Basic Blocks by Trace Cells (Effectiveness: {effectiveness_type}):")
print(top10.to_string(index=False))
print()
@@ -104,7 +114,7 @@ def plot_effectiveness(json_path, filename=None):
# Formatting
ax.set_xlabel('Cumulative instruction trace cells (software version)', fontsize=12)
ax.set_ylabel('Effectiveness', fontsize=12)
ax.set_title("Effectiveness by Basic Block", fontsize=14)
ax.set_title(f"Effectiveness by Basic Block (reduction in {effectiveness_type})", fontsize=14)
ax.grid(True, alpha=0.3, axis='y')
ax.axhline(mean_effectiveness, color='red', linestyle='--', linewidth=2, alpha=0.7)
@@ -137,6 +147,10 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Plot effectiveness analysis from APC candidates JSON file.")
parser.add_argument("json_path", help="Path to the APC candidates JSON file")
parser.add_argument("-o", "--output", help="Optional file name to save the plot", default=None)
parser.add_argument("-e", "--effectiveness",
choices=['cost', 'main_columns', 'constraints', 'bus_interactions'],
default='cost',
help="Type of effectiveness calculation (default: cost_before/cost_after)")
args = parser.parse_args()
plot_effectiveness(args.json_path, args.output)
plot_effectiveness(args.json_path, args.output, args.effectiveness)

View File

@@ -0,0 +1,9 @@
### Scripts
Set up (from the project root):
```bash
python3 -m venv .venv
source .venv/bin/activate
pip install -r autoprecompiles/scripts/requirements.txt
```

View File

@@ -0,0 +1,2 @@
pandas
matplotlib

View File

@@ -10,7 +10,7 @@ mod selection;
pub use detection::collect_basic_blocks;
pub use pgo::PgoConfig;
pub use pgo::{generate_apcs_with_pgo, Candidate};
pub use pgo::{generate_apcs_with_pgo, ApcCandidateJsonExport, Candidate};
pub use selection::KnapsackItem;
#[derive(Debug, Serialize, Deserialize, Clone)]

View File

@@ -11,6 +11,7 @@ use serde::{Deserialize, Serialize};
use crate::{
adapter::{Adapter, AdapterApc, AdapterVmConfig, ApcStats},
blocks::selection::{parallel_fractional_knapsack, KnapsackItem},
evaluation::EvaluationResult,
BasicBlock, PowdrConfig,
};
@@ -45,7 +46,6 @@ impl PgoConfig {
/// Trait for autoprecompile candidates.
/// Implementors of this trait wrap an APC with additional data used by the `KnapsackItem` trait to select the most cost-effective APCs.
pub trait Candidate<A: Adapter>: Sized + KnapsackItem {
type JsonExport: Serialize + for<'de> Deserialize<'de> + Send;
type ApcStats;
/// Try to create an autoprecompile candidate from a block.
@@ -56,13 +56,34 @@ pub trait Candidate<A: Adapter>: Sized + KnapsackItem {
) -> Self;
/// Return a JSON export of the APC candidate.
fn to_json_export(&self, apc_candidates_dir_path: &Path) -> Self::JsonExport;
fn to_json_export(
&self,
apc_candidates_dir_path: &Path,
) -> ApcCandidateJsonExport<A::Instruction>;
/// Convert the candidate into an autoprecompile and its statistics.
fn into_apc_and_stats(self) -> (AdapterApc<A>, Self::ApcStats);
}
// pub trait ApcStats {}
#[derive(Serialize, Deserialize)]
pub struct ApcCandidateJsonExport<I> {
// execution_frequency
pub execution_frequency: usize,
// original instructions
pub original_block: BasicBlock<I>,
// before and after optimization stats
pub stats: EvaluationResult,
// width before optimisation, used for software version cells in effectiveness plot
pub width_before: usize,
// value used in ranking of candidates
pub value: usize,
// cost before optimisation, used for effectiveness calculation
pub cost_before: f64,
// cost after optimization, used for effectiveness calculation and ranking of candidates
pub cost_after: f64,
// path to the apc candidate file
pub apc_candidate_file: String,
}
// Note: This function can lead to OOM since it generates the apc for many blocks.
fn create_apcs_with_cell_pgo<A: Adapter>(

View File

@@ -26,7 +26,7 @@ plot_cells() {
}
plot_effectiveness() {
python3 $SCRIPTS_DIR/plot_effectiveness.py $1 --output effectiveness.png
python3 $SCRIPTS_DIR/../../autoprecompiles/scripts/plot_effectiveness.py $1 --output effectiveness.png
}
# usage: run_bench guest guest_manual_pcp apc_num input

View File

@@ -26,12 +26,15 @@ use openvm_stark_backend::{
};
use openvm_stark_sdk::p3_baby_bear::BabyBear;
use powdr_autoprecompiles::adapter::{Adapter, AdapterApc, AdapterVmConfig};
use powdr_autoprecompiles::blocks::{collect_basic_blocks, Instruction, Program};
use powdr_autoprecompiles::blocks::{
collect_basic_blocks, ApcCandidateJsonExport, Instruction, Program,
};
use powdr_autoprecompiles::blocks::{generate_apcs_with_pgo, Candidate, KnapsackItem, PgoConfig};
use powdr_autoprecompiles::evaluation::{evaluate_apc, EvaluationResult};
use powdr_autoprecompiles::expression::try_convert;
use powdr_autoprecompiles::SymbolicBusInteraction;
use powdr_autoprecompiles::VmConfig;
use powdr_autoprecompiles::{Apc, PowdrConfig};
use powdr_autoprecompiles::{BasicBlock, VmConfig};
use powdr_number::{BabyBearField, FieldElement, LargeInt};
use powdr_riscv_elf::debug_info::DebugInfo;
use serde::{Deserialize, Serialize};
@@ -325,6 +328,7 @@ pub struct OpenVmApcCandidate<F, I> {
apc: Apc<F, I>,
execution_frequency: usize,
widths: AirWidthsDiff,
stats: EvaluationResult,
}
#[derive(Clone, Serialize, Deserialize)]
@@ -339,7 +343,6 @@ impl OvmApcStats {
}
impl<'a> Candidate<BabyBearOpenVmApcAdapter<'a>> for OpenVmApcCandidate<BabyBear, Instr<BabyBear>> {
type JsonExport = OpenVmApcCandidateJsonExport<Instr<BabyBear>>;
type ApcStats = OvmApcStats;
fn create(
@@ -363,6 +366,12 @@ impl<'a> Candidate<BabyBearOpenVmApcAdapter<'a>> for OpenVmApcCandidate<BabyBear
})
.sum();
let stats = evaluate_apc(
&apc.block.statements,
vm_config.instruction_handler,
apc.machine(),
);
let execution_frequency =
*pgo_program_pc_count.get(&apc.block.start_pc).unwrap_or(&0) as usize;
@@ -370,6 +379,7 @@ impl<'a> Candidate<BabyBearOpenVmApcAdapter<'a>> for OpenVmApcCandidate<BabyBear
apc,
execution_frequency,
widths: AirWidthsDiff::new(width_before, width_after),
stats,
}
}
@@ -377,13 +387,15 @@ impl<'a> Candidate<BabyBearOpenVmApcAdapter<'a>> for OpenVmApcCandidate<BabyBear
fn to_json_export(
&self,
apc_candidates_dir_path: &Path,
) -> OpenVmApcCandidateJsonExport<Instr<BabyBear>> {
OpenVmApcCandidateJsonExport {
start_pc: self.apc.start_pc(),
) -> ApcCandidateJsonExport<Instr<BabyBear>> {
ApcCandidateJsonExport {
execution_frequency: self.execution_frequency,
original_block: self.apc.block.clone(),
total_width_before: self.widths.before.total(),
total_width_after: self.widths.after.total(),
stats: self.stats,
width_before: self.widths.before.total(),
value: self.value(),
cost_before: self.widths.before.total() as f64,
cost_after: self.widths.after.total() as f64,
apc_candidate_file: apc_candidates_dir_path
.join(format!("apc_{}.cbor", self.apc.start_pc()))
.display()
@@ -396,22 +408,6 @@ impl<'a> Candidate<BabyBearOpenVmApcAdapter<'a>> for OpenVmApcCandidate<BabyBear
}
}
#[derive(Serialize, Deserialize)]
pub struct OpenVmApcCandidateJsonExport<I> {
// start_pc
start_pc: u64,
// execution_frequency
execution_frequency: usize,
// original instructions
original_block: BasicBlock<I>,
// total width before optimisation
total_width_before: usize,
// total width after optimisation
total_width_after: usize,
// path to the apc candidate file
apc_candidate_file: String,
}
impl<P, I> OpenVmApcCandidate<P, I> {
fn cells_saved_per_row(&self) -> usize {
// The number of cells saved per row is the difference between the width before and after the APC.