feat(hpu): update backend to support multiple V80 device, id of v80 is its serial number

- update psi64 to replace fw with stable version (3.1.0), remove psi16.hpu
This commit is contained in:
pgardratzama
2025-07-07 18:27:00 +02:00
committed by Pierre Gardrat
parent 1b92bcf476
commit afd8f58a8d
9 changed files with 70 additions and 35 deletions

View File

@@ -6,7 +6,8 @@
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_prc_3in3.toml"]
polling_us=10
[fpga.ffi.V80]
id= 0
id="${V80_PCIE_DEV}"
board_sn="${V80_SERIAL_NUMBER}"
hpu_path="${HPU_BACKEND_DIR}/config_store/v80_archives/psi64.hpu"
ami_path="${AMI_PATH}/ami.ko"
qdma_h2c="/dev/qdma${V80_PCIE_DEV}001-MM-1"

View File

@@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d5f578ec0cbcd1525fc88dc57fff1a2384fa742a147f69b6a9c77deafc0601fe
size 33348376

View File

@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0a0798a1170982be0ce714bbf0d4cdfbe3c069e328e8847053c20d7b9b347ef6
size 83225193
oid sha256:1d1afb554756df4d8b39bee33ded2dda19c23a6f9d8e2b242092efd35cf1cc19
size 83281321

View File

@@ -1,16 +1,37 @@
set TOP_NAME [lindex $::argv 0]
set SERIAL_NUMBER [lindex $::argv 1]
puts "TOP NAME: $TOP_NAME"
puts "SERIAL NUMBER: $SERIAL_NUMBER"
open_hw_manager
connect_hw_server -allow_non_jtag
open_hw_target
current_hw_device [get_hw_devices xcv80_1]
refresh_hw_device -update_hw_probes false [lindex [get_hw_devices xcv80_1] 0]
set_property PROBES.FILE {} [get_hw_devices xcv80_1]
set_property FULL_PROBES.FILE {} [get_hw_devices xcv80_1]
set found_index -1
# stage 1 programming
set_property PROGRAM.FILE $TOP_NAME [get_hw_devices xcv80_1]
program_hw_devices [get_hw_devices xcv80_1]
refresh_hw_device [lindex [get_hw_devices xcv80_1] 0]
set targets [get_hw_targets]
for {set i 0} {$i < [llength $targets]} {incr i} {
set tg [lindex $targets $i]
if {[string first $SERIAL_NUMBER $tg] != -1} {
set found_index $i
break
}
}
if {$found_index != -1} {
open_hw_target [lindex $targets $found_index]
set hw_device [get_hw_devices]
current_hw_device [get_hw_devices [lindex $hw_device 1]]
refresh_hw_device -update_hw_probes false [lindex [get_hw_devices [lindex $hw_device 1]] 0]
set_property PROBES.FILE {} [get_hw_devices [lindex $hw_device 1]]
set_property FULL_PROBES.FILE {} [get_hw_devices [lindex $hw_device 1]]
# stage 1 programming
set_property PROGRAM.FILE $TOP_NAME [get_hw_devices [lindex $hw_device 1]]
program_hw_devices [get_hw_devices [lindex $hw_device 1]]
refresh_hw_device [lindex [get_hw_devices [lindex $hw_device 1]] 0]
} else {
puts "Could not find $SERIAL_NUMBER in list of hw targets $targets"
}

View File

@@ -169,12 +169,14 @@ impl HpuHw {
match mode {
FFIMode::V80 {
id,
board_sn,
hpu_path,
ami_path,
qdma_h2c,
qdma_c2h,
} => Self(v80::HpuHw::new_hpu_hw(
*id,
&id.expand(),
&board_sn.expand(),
&hpu_path.expand(),
&ami_path.expand(),
retry_rate,

View File

@@ -10,7 +10,7 @@ use std::os::fd::AsRawFd;
use std::time::Duration;
const AMI_VERSION_FILE: &str = "/sys/module/ami/version";
const AMI_VERSION_PATTERN: &str = r"3\.0\.\d+-zama";
const AMI_VERSION_PATTERN: &str = r"3\.1\.\d+-zama";
const AMI_ID_FILE: &str = "/sys/bus/pci/drivers/ami/devices";
const AMI_ID_PATTERN: &str = r"(?<bus>[[:xdigit:]]{2}):(?<dev>[[:xdigit:]]{2})\.(?<func>[[:xdigit:]])\s(?<devn>\d+)\s(?<hwmon>\d+)";
@@ -36,7 +36,7 @@ pub struct AmiInfo {
/// Set of discovery function
/// Enable to probe the device IDs and status
impl AmiInfo {
pub fn new(ami_id: u32) -> Result<Self, Box<dyn Error>> {
pub fn new(ami_id: &str) -> Result<Self, Box<dyn Error>> {
// First read content of AMI_DEVICES_MAP
let devices_file = OpenOptions::new()
.read(true)
@@ -46,8 +46,11 @@ impl AmiInfo {
let devices_rd = BufReader::new(devices_file);
let line = devices_rd
.lines()
.nth(1 + ami_id as usize)
.ok_or("No device found")??;
.find(|line_result| match line_result {
Ok(l) => l.starts_with(ami_id),
Err(_) => false,
})
.ok_or("Could not find line starting with {ami_id:?}.")??;
// Extract AMI device path
lazy_static! {
@@ -75,12 +78,13 @@ impl AmiInfo {
pub struct AmiDriver {
ami_dev: File,
ami_info: AmiInfo,
retry_rate: Duration,
}
impl AmiDriver {
pub fn new(
ami_id: u32,
ami_id: &str,
amc_ver: &Version,
retry_rate: Duration,
) -> Result<Self, Box<dyn Error>> {
@@ -97,6 +101,7 @@ impl AmiDriver {
Ok(Self {
ami_dev,
ami_info,
retry_rate,
})
}
@@ -357,11 +362,13 @@ impl AmiDriver {
// TODO ugly quick patch
// Clean this when driver interface is specified
pub fn iop_ackq_rd(&self) -> u32 {
let ami_devn = self.ami_info.devn;
let ami_proc_path = format!("/proc/ami_iop_ack_{}", ami_devn);
let mut iop_ack_f = OpenOptions::new()
.read(true)
.write(true)
.create(false)
.open("/proc/ami_iop_ack")
.open(&ami_proc_path)
.unwrap();
// Read a line and extract a 32b integer
@@ -371,7 +378,7 @@ impl AmiDriver {
0
} else {
let ack_nb = ack_str.as_str().trim_ascii().parse::<u32>().unwrap();
tracing::trace!("Get value {ack_str} from proc/ami_iop_ack => {ack_nb}",);
tracing::trace!("Get value {ack_str} from {ami_proc_path} => {ack_nb}",);
ack_nb
}
}

View File

@@ -40,7 +40,8 @@ impl HpuHw {
/// otherwise reload Pdi
#[inline(always)]
pub fn new_hpu_hw(
id: u32,
id: &str,
board_sn: &str,
hpu_path: &str,
ami_path: &str,
ami_retry: std::time::Duration,
@@ -54,7 +55,9 @@ impl HpuHw {
// Try current hw and fallback to a fresh reload
Self::try_current_hw(id, &hpu_pdi, ami_retry, h2c_path, c2h_path).unwrap_or_else(|err| {
tracing::warn!("Loading current HW failed with {err:?}. Will do a fresh reload");
Self::reload_hw(id, &hpu_pdi, ami_path, ami_retry, h2c_path, c2h_path)
Self::reload_hw(
&id, &board_sn, &hpu_pdi, ami_path, ami_retry, h2c_path, c2h_path,
)
})
}
@@ -62,7 +65,7 @@ impl HpuHw {
/// NB: This procedure required unload of Qdma/Ami driver and thus couldn't be directly
/// implemented in the AMI
fn try_current_hw(
id: u32,
id: &str,
pdi: &HpuV80Pdi,
ami_retry: std::time::Duration,
h2c_path: &str,
@@ -96,7 +99,8 @@ impl HpuHw {
/// NB: This procedure required unload of Qdma/Ami driver and thus couldn't be directly
/// implemented in the AMI
fn reload_hw(
id: u32,
id: &str,
board_sn: &str,
pdi: &HpuV80Pdi,
ami_path: &str,
ami_retry: std::time::Duration,
@@ -151,6 +155,7 @@ impl HpuHw {
)
.arg("-tclargs")
.arg(format!("{}/{}", tmp_dir_str, &pdi_stg1_tmp))
.arg(format!("{}", &board_sn))
.output()
.expect("Stage1 loading encounters error");
tracing::debug!("Stage1 loaded: {hw_monitor:?}");
@@ -199,6 +204,9 @@ impl HpuHw {
.write_all(b"1\n")
.expect("Unable to triggered a pci rescan");
// wait for QDMA to create its fs
std::thread::sleep(std::time::Duration::from_secs(2));
// Update right on V80 pcie subsystem
// NB: sysfs is recreated upon rescan
Command::new("sudo")

View File

@@ -42,7 +42,8 @@ impl std::str::FromStr for ShellString {
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub enum FFIMode {
V80 {
id: u32,
id: ShellString,
board_sn: ShellString,
hpu_path: ShellString,
ami_path: ShellString,
qdma_h2c: ShellString,

View File

@@ -17,12 +17,7 @@ RUST_LOG="info"
# Setting PCI device variable: depends on the machine
mapfile -t DEVICE< <(lspci -d 10ee:50b5)
if [ ${#DEVICE[@]} -gt 1 ]; then
echo "[ERROR]: There is more than one device pcie, we only support one hpu for now"
return 1
else
V80_PCIE_DEV="${DEVICE[0]%%:*}"
fi
V80_PCIE_DEV="unselected"
# V80 bitstream refresh rely on XilinxVivado tools
XILINX_VIVADO=${XILINX_VIVADO:-"/opt/amd/Vivado/2024.2"}
@@ -69,7 +64,10 @@ do
V80_PCIE_DEV="${2}"
((i++))
else
echo "Error: --pcie-dev requires a value"
echo "Please select a device in following list (1st two digits):"
for item in "${DEVICE[@]}"; do
echo "$item"
done
return 1
fi
shift 2