mirror of
https://github.com/powdr-labs/powdr.git
synced 2026-01-10 11:38:11 -05:00
initial support for label reading for rv64 (#3060)
This PR adds a module to the `riscv-elf` crate that collects labels, their addresses, and jumpdests from a RV64 ELF binary. It does not compute debug infos and all the other stuff that the current 32-bit module does. --------- Co-authored-by: Steve Wang <qian.wang.wg24@wharton.upenn.edu>
This commit is contained in:
@@ -24,3 +24,7 @@ workspace = true
|
||||
|
||||
[lib]
|
||||
bench = false # See https://github.com/bheisler/criterion.rs/issues/458
|
||||
|
||||
[[bin]]
|
||||
name = "elf-labels"
|
||||
path = "src/bin/elf-labels.rs"
|
||||
|
||||
245
riscv-elf/src/bin/elf-labels.rs
Normal file
245
riscv-elf/src/bin/elf-labels.rs
Normal file
@@ -0,0 +1,245 @@
|
||||
#![allow(clippy::print_stdout)]
|
||||
|
||||
use goblin::elf::{
|
||||
header::{EI_CLASS, ELFCLASS32, ELFCLASS64},
|
||||
Elf,
|
||||
};
|
||||
use powdr_riscv_elf::{load_elf, rv64};
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::panic;
|
||||
use std::path::Path;
|
||||
use std::process;
|
||||
|
||||
fn main() {
|
||||
let args: Vec<String> = env::args().collect();
|
||||
|
||||
if args.len() != 2 {
|
||||
eprintln!("Usage: {} <elf-file>", args[0]);
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
let elf_path = Path::new(&args[1]);
|
||||
|
||||
if !elf_path.exists() {
|
||||
eprintln!("Error: File '{}' does not exist", elf_path.display());
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
// Read the file to check if it's 32-bit or 64-bit
|
||||
let file_buffer = match fs::read(elf_path) {
|
||||
Ok(buffer) => buffer,
|
||||
Err(e) => {
|
||||
eprintln!("Error reading file: {e}");
|
||||
process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let elf = match Elf::parse(&file_buffer) {
|
||||
Ok(elf) => elf,
|
||||
Err(e) => {
|
||||
eprintln!("Error parsing ELF header: {e}");
|
||||
process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
match elf.header.e_ident[EI_CLASS] {
|
||||
ELFCLASS32 => {
|
||||
// The load_elf function panics on errors, so we catch it
|
||||
let result = panic::catch_unwind(|| load_elf(elf_path));
|
||||
|
||||
match result {
|
||||
Ok(program) => {
|
||||
println!(
|
||||
"RV32 ELF file analyzed successfully: {}",
|
||||
elf_path.display()
|
||||
);
|
||||
println!();
|
||||
print_elf_info_32(&program);
|
||||
}
|
||||
Err(_) => {
|
||||
eprintln!("Error loading RV32 ELF file: The file may be corrupted or not a valid RISC-V ELF");
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
ELFCLASS64 => {
|
||||
// The load_elf_rv64 function panics on errors, so we catch it
|
||||
let result = panic::catch_unwind(|| rv64::compute_jumpdests(elf_path));
|
||||
|
||||
match result {
|
||||
Ok(labels) => {
|
||||
println!(
|
||||
"RV64 ELF file analyzed successfully: {}",
|
||||
elf_path.display()
|
||||
);
|
||||
println!();
|
||||
print_elf_info_64(&labels);
|
||||
}
|
||||
Err(_) => {
|
||||
eprintln!("Error loading RV64 ELF file: The file may be corrupted or not a valid RISC-V ELF");
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
eprintln!("Unsupported ELF class");
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn print_elf_info_32(program: &powdr_riscv_elf::ElfProgram) {
|
||||
// Get text labels from the program
|
||||
let text_labels = program.text_labels();
|
||||
|
||||
if text_labels.is_empty() {
|
||||
println!("No text labels found in the ELF file.");
|
||||
} else {
|
||||
println!("Text labels found: {}", text_labels.len());
|
||||
println!();
|
||||
println!("{:<16}", "Address");
|
||||
println!("{}", "-".repeat(16));
|
||||
|
||||
// Text labels are already sorted in BTreeSet
|
||||
for address in text_labels {
|
||||
println!("0x{address:08x}");
|
||||
}
|
||||
}
|
||||
|
||||
// Report on debug symbols
|
||||
let debug_info = program.debug_info();
|
||||
println!();
|
||||
println!("Debug information:");
|
||||
|
||||
// Since we can't iterate over SymbolTable directly, we'll use text_labels
|
||||
// and look up each address
|
||||
let mut symbol_count = 0;
|
||||
let mut function_symbols = Vec::new();
|
||||
|
||||
for &addr in text_labels {
|
||||
if let Some(name) = debug_info.symbols.try_get_one(addr) {
|
||||
symbol_count += 1;
|
||||
// Simple heuristic for functions: doesn't start with $ or contain .
|
||||
if !name.starts_with("$") && !name.contains(".") {
|
||||
function_symbols.push((addr, name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!(" Symbols at text label addresses: {symbol_count}");
|
||||
println!(" Function symbols: {}", function_symbols.len());
|
||||
|
||||
if !function_symbols.is_empty() {
|
||||
println!();
|
||||
println!("Function symbols:");
|
||||
println!("{:<16} {:<40}", "Address", "Symbol");
|
||||
println!("{}", "-".repeat(60));
|
||||
|
||||
for (address, name) in function_symbols {
|
||||
println!("0x{address:08x} {name}");
|
||||
}
|
||||
}
|
||||
|
||||
// Also show notes if available
|
||||
if !debug_info.notes.is_empty() {
|
||||
println!();
|
||||
println!("Debug notes:");
|
||||
let mut notes: Vec<_> = debug_info.notes.iter().collect();
|
||||
notes.sort_by_key(|(addr, _)| *addr);
|
||||
|
||||
for (addr, note) in notes {
|
||||
println!("0x{addr:08x}: {note}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn print_elf_info_64(labels: &rv64::Rv64Labels) {
|
||||
println!("Entry point: 0x{:016x}", labels.entry_point);
|
||||
println!("PC base: 0x{:016x}", labels.pc_base);
|
||||
println!();
|
||||
|
||||
if labels.jumpdests.is_empty() {
|
||||
println!("No text labels or jump destinations found.");
|
||||
} else {
|
||||
println!(
|
||||
"Text labels and jump destinations found: {}",
|
||||
labels.jumpdests.len()
|
||||
);
|
||||
println!();
|
||||
|
||||
// Show all labels with symbols if available
|
||||
println!("{:<20} {:<40}", "Address", "Symbol (if available)");
|
||||
println!("{}", "-".repeat(60));
|
||||
|
||||
for &addr in &labels.jumpdests {
|
||||
// Find symbol name if available
|
||||
let symbol = labels
|
||||
.symbols
|
||||
.iter()
|
||||
.find(|(sym_addr, _)| *sym_addr == addr)
|
||||
.map(|(_, name)| name.as_str())
|
||||
.unwrap_or("");
|
||||
|
||||
println!("0x{addr:016x} {symbol}");
|
||||
}
|
||||
|
||||
// Summary of symbols
|
||||
println!();
|
||||
println!("Summary:");
|
||||
println!(" Total labels/jumpdests: {}", labels.jumpdests.len());
|
||||
println!(" Named symbols: {}", labels.symbols.len());
|
||||
println!(
|
||||
" Jumpdests without symbols: {}",
|
||||
labels.jumpdests_with_debug_info.len()
|
||||
);
|
||||
|
||||
// Show function-like symbols separately
|
||||
let function_symbols: Vec<_> = labels
|
||||
.symbols
|
||||
.iter()
|
||||
.filter(|(_, name)| !name.starts_with("$") && !name.contains("."))
|
||||
.collect();
|
||||
|
||||
if !function_symbols.is_empty() {
|
||||
println!(" Function symbols: {}", function_symbols.len());
|
||||
}
|
||||
|
||||
// Show label to address map
|
||||
println!();
|
||||
println!("=== Label to Address Map ===");
|
||||
println!("{:<40} {:<20}", "Label", "Address");
|
||||
println!("{}", "-".repeat(60));
|
||||
|
||||
let mut sorted_symbols = labels.symbols.clone();
|
||||
sorted_symbols.sort_by(|a, b| a.1.cmp(&b.1));
|
||||
|
||||
for (addr, name) in sorted_symbols {
|
||||
println!("{name:<40} 0x{addr:016x}");
|
||||
}
|
||||
|
||||
// Show jumpdests that are not labels
|
||||
println!();
|
||||
println!("=== Jump Destinations Without Symbols ===");
|
||||
println!(
|
||||
"{:<20} {:<20} {:<40}",
|
||||
"Target Address", "From Address", "Instruction"
|
||||
);
|
||||
println!("{}", "-".repeat(80));
|
||||
|
||||
let mut sorted_jumpdests: Vec<_> = labels.jumpdests_with_debug_info.iter().collect();
|
||||
sorted_jumpdests.sort_by_key(|(addr, _)| *addr);
|
||||
|
||||
for (target_addr, sources) in sorted_jumpdests {
|
||||
for source in sources {
|
||||
println!(
|
||||
"0x{:016x} 0x{:016x} {}",
|
||||
target_addr, source.from_addr, source.instruction
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
println!();
|
||||
println!("PC Base: 0x{:016x}", labels.pc_base);
|
||||
}
|
||||
}
|
||||
@@ -26,6 +26,7 @@ use powdr_riscv_types::{
|
||||
};
|
||||
|
||||
pub mod debug_info;
|
||||
pub mod rv64;
|
||||
|
||||
use self::debug_info::{DebugInfo, SymbolTable};
|
||||
|
||||
|
||||
262
riscv-elf/src/rv64.rs
Normal file
262
riscv-elf/src/rv64.rs
Normal file
@@ -0,0 +1,262 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use goblin::elf::{
|
||||
header::{EI_CLASS, EI_DATA, ELFCLASS64, ELFDATA2LSB, EM_RISCV},
|
||||
Elf,
|
||||
};
|
||||
use raki::{decode::Decode, instruction::OpcodeKind as Op, Isa};
|
||||
|
||||
/// Information about a jump destination
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct JumpDest {
|
||||
/// The instruction address that generates this jump
|
||||
pub from_addr: u64,
|
||||
/// The instruction that generates this jump
|
||||
pub instruction: String,
|
||||
}
|
||||
|
||||
/// Minimal RV64 ELF program representation for label/jumpdest collection
|
||||
pub struct Rv64Labels {
|
||||
/// All text labels and jump destinations
|
||||
pub jumpdests: BTreeSet<u64>,
|
||||
/// Entry point address
|
||||
pub entry_point: u64,
|
||||
/// Symbol table for debugging
|
||||
pub symbols: Vec<(u64, String)>,
|
||||
/// Jump destinations that are not symbols (address -> source instructions)
|
||||
pub jumpdests_with_debug_info: BTreeMap<u64, Vec<JumpDest>>,
|
||||
/// PC base (lowest executable address)
|
||||
pub pc_base: u64,
|
||||
}
|
||||
|
||||
pub fn compute_jumpdests(file_name: &Path) -> Rv64Labels {
|
||||
log::info!("Loading RV64 ELF file: {}", file_name.display());
|
||||
let file_buffer = fs::read(file_name).unwrap();
|
||||
compute_jumpdests_from_buffer(&file_buffer)
|
||||
}
|
||||
|
||||
pub fn compute_jumpdests_from_buffer(file_buffer: &[u8]) -> Rv64Labels {
|
||||
let elf = Elf::parse(file_buffer).unwrap();
|
||||
|
||||
// Verify it's a 64-bit RISC-V ELF
|
||||
assert_eq!(
|
||||
elf.header.e_ident[EI_CLASS], ELFCLASS64,
|
||||
"Only 64-bit ELF files are supported by rv64 module!"
|
||||
);
|
||||
assert_eq!(
|
||||
elf.header.e_ident[EI_DATA], ELFDATA2LSB,
|
||||
"Only little-endian ELF files are supported!"
|
||||
);
|
||||
assert_eq!(
|
||||
elf.header.e_machine, EM_RISCV,
|
||||
"Only RISC-V ELF files are supported!"
|
||||
);
|
||||
|
||||
let mut jumpdests = BTreeSet::new();
|
||||
let mut jumpdests_with_debug_info = BTreeMap::new();
|
||||
|
||||
// Add entry point
|
||||
jumpdests.insert(elf.entry);
|
||||
|
||||
// Find PC base (lowest executable address)
|
||||
let pc_base = elf
|
||||
.program_headers
|
||||
.iter()
|
||||
.filter(|ph| ph.is_executable())
|
||||
.map(|ph| ph.p_vaddr)
|
||||
.min()
|
||||
.unwrap_or(0);
|
||||
|
||||
// Collect symbols that are in text sections
|
||||
let mut symbols = Vec::new();
|
||||
let mut symbol_addrs = BTreeSet::new();
|
||||
for sym in elf.syms.iter() {
|
||||
if sym.st_value != 0 {
|
||||
// Check if this symbol is in an executable section
|
||||
let in_text = elf.program_headers.iter().any(|ph| {
|
||||
ph.is_executable()
|
||||
&& sym.st_value >= ph.p_vaddr
|
||||
&& sym.st_value < ph.p_vaddr + ph.p_memsz
|
||||
});
|
||||
|
||||
if in_text {
|
||||
jumpdests.insert(sym.st_value);
|
||||
symbol_addrs.insert(sym.st_value);
|
||||
if let Some(name) = elf.strtab.get_at(sym.st_name) {
|
||||
symbols.push((sym.st_value, name.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan text sections for jump destinations
|
||||
for ph in elf.program_headers.iter() {
|
||||
if ph.is_executable() {
|
||||
let seg = &file_buffer[ph.p_offset as usize..(ph.p_offset + ph.p_filesz) as usize];
|
||||
scan_for_jump_targets(
|
||||
ph.p_vaddr,
|
||||
seg,
|
||||
&mut jumpdests,
|
||||
&mut jumpdests_with_debug_info,
|
||||
&symbol_addrs,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Rv64Labels {
|
||||
jumpdests,
|
||||
entry_point: elf.entry,
|
||||
symbols,
|
||||
jumpdests_with_debug_info,
|
||||
pc_base,
|
||||
}
|
||||
}
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
fn scan_for_jump_targets(
|
||||
base_addr: u64,
|
||||
data: &[u8],
|
||||
jumpdests: &mut BTreeSet<u64>,
|
||||
jumpdests_with_debug_info: &mut BTreeMap<u64, Vec<JumpDest>>,
|
||||
label_addrs: &BTreeSet<u64>,
|
||||
) {
|
||||
let mut addr = base_addr;
|
||||
let mut remaining = data;
|
||||
let mut last_was_auipc = false;
|
||||
|
||||
while remaining.len() >= 4 {
|
||||
// Assert that we have a 32-bit instruction.
|
||||
assert!(remaining[0] & 0b11 == 0b11);
|
||||
let insn_bytes = u32::from_le_bytes(remaining[0..4].try_into().unwrap());
|
||||
|
||||
if let Ok(insn) = insn_bytes.decode(Isa::Rv64) {
|
||||
// Check for jump/branch instructions
|
||||
match insn.opc {
|
||||
Op::JAL => {
|
||||
// JAL has a PC-relative immediate
|
||||
if let Some(imm) = insn.imm {
|
||||
let target = (addr as i64 + imm as i64) as u64;
|
||||
jumpdests.insert(target);
|
||||
|
||||
// Track non-symbol jumpdests
|
||||
if !label_addrs.contains(&target) {
|
||||
let jump_info = JumpDest {
|
||||
from_addr: addr,
|
||||
instruction: format!(
|
||||
"jal {}, 0x{:x}",
|
||||
insn.rd
|
||||
.map(|r| format!("x{r}"))
|
||||
.unwrap_or_else(|| "?".to_string()),
|
||||
target
|
||||
),
|
||||
};
|
||||
jumpdests_with_debug_info
|
||||
.entry(target)
|
||||
.or_default()
|
||||
.push(jump_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
Op::BEQ | Op::BNE | Op::BLT | Op::BGE | Op::BLTU | Op::BGEU => {
|
||||
// Conditional branches have PC-relative immediates
|
||||
if let Some(imm) = insn.imm {
|
||||
let target = (addr as i64 + imm as i64) as u64;
|
||||
jumpdests.insert(target);
|
||||
|
||||
// Track non-symbol jumpdests
|
||||
if !label_addrs.contains(&target) {
|
||||
let jump_info = JumpDest {
|
||||
from_addr: addr,
|
||||
instruction: format!(
|
||||
"{} {}, {}, 0x{:x}",
|
||||
format!("{:?}", insn.opc).to_lowercase(),
|
||||
insn.rs1
|
||||
.map(|r| format!("x{r}"))
|
||||
.unwrap_or_else(|| "?".to_string()),
|
||||
insn.rs2
|
||||
.map(|r| format!("x{r}"))
|
||||
.unwrap_or_else(|| "?".to_string()),
|
||||
target
|
||||
),
|
||||
};
|
||||
jumpdests_with_debug_info
|
||||
.entry(target)
|
||||
.or_default()
|
||||
.push(jump_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
Op::AUIPC => {
|
||||
// AUIPC is often followed by JALR for function calls and long jumps
|
||||
// In statically linked binaries, these usually target known symbols
|
||||
if remaining.len() >= 8 {
|
||||
let next_insn_bytes =
|
||||
u32::from_le_bytes(remaining[4..8].try_into().unwrap());
|
||||
if let Ok(next_insn) = next_insn_bytes.decode(Isa::Rv64) {
|
||||
if matches!(next_insn.opc, Op::JALR) && insn.rd == next_insn.rs1 {
|
||||
// This is an AUIPC+JALR pair
|
||||
if let (Some(auipc_imm), Some(jalr_imm)) = (insn.imm, next_insn.imm)
|
||||
{
|
||||
let target =
|
||||
(addr as i64 + auipc_imm as i64 + jalr_imm as i64) as u64;
|
||||
jumpdests.insert(target);
|
||||
|
||||
// Track non-symbol jumpdests
|
||||
if !label_addrs.contains(&target) {
|
||||
let jump_info = JumpDest {
|
||||
from_addr: addr,
|
||||
instruction: format!("auipc+jalr -> 0x{target:x}"),
|
||||
};
|
||||
jumpdests_with_debug_info
|
||||
.entry(target)
|
||||
.or_default()
|
||||
.push(jump_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Op::JALR => {
|
||||
// Only process if this JALR is not part of an AUIPC+JALR pair
|
||||
if !last_was_auipc {
|
||||
// Standalone JALR without preceding AUIPC
|
||||
// These are dynamic jumps we can't resolve statically:
|
||||
// - Return instructions (jalr x0, x1, 0)
|
||||
// - Indirect calls through function pointers
|
||||
// - Computed jumps (switch statements, vtables)
|
||||
// We just note their existence for completeness
|
||||
|
||||
let rs1_str = insn
|
||||
.rs1
|
||||
.map(|r| format!("x{r}"))
|
||||
.unwrap_or_else(|| "?".to_string());
|
||||
let rd_str = insn
|
||||
.rd
|
||||
.map(|r| format!("x{r}"))
|
||||
.unwrap_or_else(|| "?".to_string());
|
||||
let imm = insn.imm.unwrap_or(0);
|
||||
|
||||
// Only log if it's not a standard return (jalr x0, x1, 0)
|
||||
if !(insn.rd == Some(0) && insn.rs1 == Some(1) && imm == 0) {
|
||||
eprintln!(
|
||||
"Note: Dynamic jump at 0x{addr:x}: jalr {rd_str}, {rs1_str}, {imm}",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
// Update for next iteration
|
||||
last_was_auipc = matches!(insn.opc, Op::AUIPC);
|
||||
} else {
|
||||
panic!("Could not decode instruction")
|
||||
}
|
||||
|
||||
addr += 4;
|
||||
remaining = &remaining[4..];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user