initial support for label reading for rv64 (#3060)

This PR adds a module to the `riscv-elf` crate that collects labels,
their addresses, and jumpdests from a RV64 ELF binary. It does not
compute debug infos and all the other stuff that the current 32-bit
module does.

---------

Co-authored-by: Steve Wang <qian.wang.wg24@wharton.upenn.edu>
This commit is contained in:
Leo
2025-07-22 12:34:13 +02:00
committed by GitHub
parent 9a97fca7c6
commit e6ff8810b8
4 changed files with 512 additions and 0 deletions

View File

@@ -24,3 +24,7 @@ workspace = true
[lib]
bench = false # See https://github.com/bheisler/criterion.rs/issues/458
[[bin]]
name = "elf-labels"
path = "src/bin/elf-labels.rs"

View File

@@ -0,0 +1,245 @@
#![allow(clippy::print_stdout)]
use goblin::elf::{
header::{EI_CLASS, ELFCLASS32, ELFCLASS64},
Elf,
};
use powdr_riscv_elf::{load_elf, rv64};
use std::env;
use std::fs;
use std::panic;
use std::path::Path;
use std::process;
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() != 2 {
eprintln!("Usage: {} <elf-file>", args[0]);
process::exit(1);
}
let elf_path = Path::new(&args[1]);
if !elf_path.exists() {
eprintln!("Error: File '{}' does not exist", elf_path.display());
process::exit(1);
}
// Read the file to check if it's 32-bit or 64-bit
let file_buffer = match fs::read(elf_path) {
Ok(buffer) => buffer,
Err(e) => {
eprintln!("Error reading file: {e}");
process::exit(1);
}
};
let elf = match Elf::parse(&file_buffer) {
Ok(elf) => elf,
Err(e) => {
eprintln!("Error parsing ELF header: {e}");
process::exit(1);
}
};
match elf.header.e_ident[EI_CLASS] {
ELFCLASS32 => {
// The load_elf function panics on errors, so we catch it
let result = panic::catch_unwind(|| load_elf(elf_path));
match result {
Ok(program) => {
println!(
"RV32 ELF file analyzed successfully: {}",
elf_path.display()
);
println!();
print_elf_info_32(&program);
}
Err(_) => {
eprintln!("Error loading RV32 ELF file: The file may be corrupted or not a valid RISC-V ELF");
process::exit(1);
}
}
}
ELFCLASS64 => {
// The load_elf_rv64 function panics on errors, so we catch it
let result = panic::catch_unwind(|| rv64::compute_jumpdests(elf_path));
match result {
Ok(labels) => {
println!(
"RV64 ELF file analyzed successfully: {}",
elf_path.display()
);
println!();
print_elf_info_64(&labels);
}
Err(_) => {
eprintln!("Error loading RV64 ELF file: The file may be corrupted or not a valid RISC-V ELF");
process::exit(1);
}
}
}
_ => {
eprintln!("Unsupported ELF class");
process::exit(1);
}
}
}
fn print_elf_info_32(program: &powdr_riscv_elf::ElfProgram) {
// Get text labels from the program
let text_labels = program.text_labels();
if text_labels.is_empty() {
println!("No text labels found in the ELF file.");
} else {
println!("Text labels found: {}", text_labels.len());
println!();
println!("{:<16}", "Address");
println!("{}", "-".repeat(16));
// Text labels are already sorted in BTreeSet
for address in text_labels {
println!("0x{address:08x}");
}
}
// Report on debug symbols
let debug_info = program.debug_info();
println!();
println!("Debug information:");
// Since we can't iterate over SymbolTable directly, we'll use text_labels
// and look up each address
let mut symbol_count = 0;
let mut function_symbols = Vec::new();
for &addr in text_labels {
if let Some(name) = debug_info.symbols.try_get_one(addr) {
symbol_count += 1;
// Simple heuristic for functions: doesn't start with $ or contain .
if !name.starts_with("$") && !name.contains(".") {
function_symbols.push((addr, name));
}
}
}
println!(" Symbols at text label addresses: {symbol_count}");
println!(" Function symbols: {}", function_symbols.len());
if !function_symbols.is_empty() {
println!();
println!("Function symbols:");
println!("{:<16} {:<40}", "Address", "Symbol");
println!("{}", "-".repeat(60));
for (address, name) in function_symbols {
println!("0x{address:08x} {name}");
}
}
// Also show notes if available
if !debug_info.notes.is_empty() {
println!();
println!("Debug notes:");
let mut notes: Vec<_> = debug_info.notes.iter().collect();
notes.sort_by_key(|(addr, _)| *addr);
for (addr, note) in notes {
println!("0x{addr:08x}: {note}");
}
}
}
fn print_elf_info_64(labels: &rv64::Rv64Labels) {
println!("Entry point: 0x{:016x}", labels.entry_point);
println!("PC base: 0x{:016x}", labels.pc_base);
println!();
if labels.jumpdests.is_empty() {
println!("No text labels or jump destinations found.");
} else {
println!(
"Text labels and jump destinations found: {}",
labels.jumpdests.len()
);
println!();
// Show all labels with symbols if available
println!("{:<20} {:<40}", "Address", "Symbol (if available)");
println!("{}", "-".repeat(60));
for &addr in &labels.jumpdests {
// Find symbol name if available
let symbol = labels
.symbols
.iter()
.find(|(sym_addr, _)| *sym_addr == addr)
.map(|(_, name)| name.as_str())
.unwrap_or("");
println!("0x{addr:016x} {symbol}");
}
// Summary of symbols
println!();
println!("Summary:");
println!(" Total labels/jumpdests: {}", labels.jumpdests.len());
println!(" Named symbols: {}", labels.symbols.len());
println!(
" Jumpdests without symbols: {}",
labels.jumpdests_with_debug_info.len()
);
// Show function-like symbols separately
let function_symbols: Vec<_> = labels
.symbols
.iter()
.filter(|(_, name)| !name.starts_with("$") && !name.contains("."))
.collect();
if !function_symbols.is_empty() {
println!(" Function symbols: {}", function_symbols.len());
}
// Show label to address map
println!();
println!("=== Label to Address Map ===");
println!("{:<40} {:<20}", "Label", "Address");
println!("{}", "-".repeat(60));
let mut sorted_symbols = labels.symbols.clone();
sorted_symbols.sort_by(|a, b| a.1.cmp(&b.1));
for (addr, name) in sorted_symbols {
println!("{name:<40} 0x{addr:016x}");
}
// Show jumpdests that are not labels
println!();
println!("=== Jump Destinations Without Symbols ===");
println!(
"{:<20} {:<20} {:<40}",
"Target Address", "From Address", "Instruction"
);
println!("{}", "-".repeat(80));
let mut sorted_jumpdests: Vec<_> = labels.jumpdests_with_debug_info.iter().collect();
sorted_jumpdests.sort_by_key(|(addr, _)| *addr);
for (target_addr, sources) in sorted_jumpdests {
for source in sources {
println!(
"0x{:016x} 0x{:016x} {}",
target_addr, source.from_addr, source.instruction
);
}
}
println!();
println!("PC Base: 0x{:016x}", labels.pc_base);
}
}

View File

@@ -26,6 +26,7 @@ use powdr_riscv_types::{
};
pub mod debug_info;
pub mod rv64;
use self::debug_info::{DebugInfo, SymbolTable};

262
riscv-elf/src/rv64.rs Normal file
View File

@@ -0,0 +1,262 @@
use std::collections::BTreeSet;
use std::fs;
use std::path::Path;
use goblin::elf::{
header::{EI_CLASS, EI_DATA, ELFCLASS64, ELFDATA2LSB, EM_RISCV},
Elf,
};
use raki::{decode::Decode, instruction::OpcodeKind as Op, Isa};
/// Information about a jump destination
#[derive(Debug, Clone)]
pub struct JumpDest {
/// The instruction address that generates this jump
pub from_addr: u64,
/// The instruction that generates this jump
pub instruction: String,
}
/// Minimal RV64 ELF program representation for label/jumpdest collection
pub struct Rv64Labels {
/// All text labels and jump destinations
pub jumpdests: BTreeSet<u64>,
/// Entry point address
pub entry_point: u64,
/// Symbol table for debugging
pub symbols: Vec<(u64, String)>,
/// Jump destinations that are not symbols (address -> source instructions)
pub jumpdests_with_debug_info: BTreeMap<u64, Vec<JumpDest>>,
/// PC base (lowest executable address)
pub pc_base: u64,
}
pub fn compute_jumpdests(file_name: &Path) -> Rv64Labels {
log::info!("Loading RV64 ELF file: {}", file_name.display());
let file_buffer = fs::read(file_name).unwrap();
compute_jumpdests_from_buffer(&file_buffer)
}
pub fn compute_jumpdests_from_buffer(file_buffer: &[u8]) -> Rv64Labels {
let elf = Elf::parse(file_buffer).unwrap();
// Verify it's a 64-bit RISC-V ELF
assert_eq!(
elf.header.e_ident[EI_CLASS], ELFCLASS64,
"Only 64-bit ELF files are supported by rv64 module!"
);
assert_eq!(
elf.header.e_ident[EI_DATA], ELFDATA2LSB,
"Only little-endian ELF files are supported!"
);
assert_eq!(
elf.header.e_machine, EM_RISCV,
"Only RISC-V ELF files are supported!"
);
let mut jumpdests = BTreeSet::new();
let mut jumpdests_with_debug_info = BTreeMap::new();
// Add entry point
jumpdests.insert(elf.entry);
// Find PC base (lowest executable address)
let pc_base = elf
.program_headers
.iter()
.filter(|ph| ph.is_executable())
.map(|ph| ph.p_vaddr)
.min()
.unwrap_or(0);
// Collect symbols that are in text sections
let mut symbols = Vec::new();
let mut symbol_addrs = BTreeSet::new();
for sym in elf.syms.iter() {
if sym.st_value != 0 {
// Check if this symbol is in an executable section
let in_text = elf.program_headers.iter().any(|ph| {
ph.is_executable()
&& sym.st_value >= ph.p_vaddr
&& sym.st_value < ph.p_vaddr + ph.p_memsz
});
if in_text {
jumpdests.insert(sym.st_value);
symbol_addrs.insert(sym.st_value);
if let Some(name) = elf.strtab.get_at(sym.st_name) {
symbols.push((sym.st_value, name.to_string()));
}
}
}
}
// Scan text sections for jump destinations
for ph in elf.program_headers.iter() {
if ph.is_executable() {
let seg = &file_buffer[ph.p_offset as usize..(ph.p_offset + ph.p_filesz) as usize];
scan_for_jump_targets(
ph.p_vaddr,
seg,
&mut jumpdests,
&mut jumpdests_with_debug_info,
&symbol_addrs,
);
}
}
Rv64Labels {
jumpdests,
entry_point: elf.entry,
symbols,
jumpdests_with_debug_info,
pc_base,
}
}
use std::collections::BTreeMap;
fn scan_for_jump_targets(
base_addr: u64,
data: &[u8],
jumpdests: &mut BTreeSet<u64>,
jumpdests_with_debug_info: &mut BTreeMap<u64, Vec<JumpDest>>,
label_addrs: &BTreeSet<u64>,
) {
let mut addr = base_addr;
let mut remaining = data;
let mut last_was_auipc = false;
while remaining.len() >= 4 {
// Assert that we have a 32-bit instruction.
assert!(remaining[0] & 0b11 == 0b11);
let insn_bytes = u32::from_le_bytes(remaining[0..4].try_into().unwrap());
if let Ok(insn) = insn_bytes.decode(Isa::Rv64) {
// Check for jump/branch instructions
match insn.opc {
Op::JAL => {
// JAL has a PC-relative immediate
if let Some(imm) = insn.imm {
let target = (addr as i64 + imm as i64) as u64;
jumpdests.insert(target);
// Track non-symbol jumpdests
if !label_addrs.contains(&target) {
let jump_info = JumpDest {
from_addr: addr,
instruction: format!(
"jal {}, 0x{:x}",
insn.rd
.map(|r| format!("x{r}"))
.unwrap_or_else(|| "?".to_string()),
target
),
};
jumpdests_with_debug_info
.entry(target)
.or_default()
.push(jump_info);
}
}
}
Op::BEQ | Op::BNE | Op::BLT | Op::BGE | Op::BLTU | Op::BGEU => {
// Conditional branches have PC-relative immediates
if let Some(imm) = insn.imm {
let target = (addr as i64 + imm as i64) as u64;
jumpdests.insert(target);
// Track non-symbol jumpdests
if !label_addrs.contains(&target) {
let jump_info = JumpDest {
from_addr: addr,
instruction: format!(
"{} {}, {}, 0x{:x}",
format!("{:?}", insn.opc).to_lowercase(),
insn.rs1
.map(|r| format!("x{r}"))
.unwrap_or_else(|| "?".to_string()),
insn.rs2
.map(|r| format!("x{r}"))
.unwrap_or_else(|| "?".to_string()),
target
),
};
jumpdests_with_debug_info
.entry(target)
.or_default()
.push(jump_info);
}
}
}
Op::AUIPC => {
// AUIPC is often followed by JALR for function calls and long jumps
// In statically linked binaries, these usually target known symbols
if remaining.len() >= 8 {
let next_insn_bytes =
u32::from_le_bytes(remaining[4..8].try_into().unwrap());
if let Ok(next_insn) = next_insn_bytes.decode(Isa::Rv64) {
if matches!(next_insn.opc, Op::JALR) && insn.rd == next_insn.rs1 {
// This is an AUIPC+JALR pair
if let (Some(auipc_imm), Some(jalr_imm)) = (insn.imm, next_insn.imm)
{
let target =
(addr as i64 + auipc_imm as i64 + jalr_imm as i64) as u64;
jumpdests.insert(target);
// Track non-symbol jumpdests
if !label_addrs.contains(&target) {
let jump_info = JumpDest {
from_addr: addr,
instruction: format!("auipc+jalr -> 0x{target:x}"),
};
jumpdests_with_debug_info
.entry(target)
.or_default()
.push(jump_info);
}
}
}
}
}
}
Op::JALR => {
// Only process if this JALR is not part of an AUIPC+JALR pair
if !last_was_auipc {
// Standalone JALR without preceding AUIPC
// These are dynamic jumps we can't resolve statically:
// - Return instructions (jalr x0, x1, 0)
// - Indirect calls through function pointers
// - Computed jumps (switch statements, vtables)
// We just note their existence for completeness
let rs1_str = insn
.rs1
.map(|r| format!("x{r}"))
.unwrap_or_else(|| "?".to_string());
let rd_str = insn
.rd
.map(|r| format!("x{r}"))
.unwrap_or_else(|| "?".to_string());
let imm = insn.imm.unwrap_or(0);
// Only log if it's not a standard return (jalr x0, x1, 0)
if !(insn.rd == Some(0) && insn.rs1 == Some(1) && imm == 0) {
eprintln!(
"Note: Dynamic jump at 0x{addr:x}: jalr {rd_str}, {rs1_str}, {imm}",
);
}
}
}
_ => {}
}
// Update for next iteration
last_was_auipc = matches!(insn.opc, Op::AUIPC);
} else {
panic!("Could not decode instruction")
}
addr += 4;
remaining = &remaining[4..];
}
}