mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-08 22:28:01 -05:00
feat(hpu): update backend to support multiple V80 device, id of v80 is its serial number
- update psi64 to replace fw with stable version (3.1.0), remove psi16.hpu
This commit is contained in:
committed by
Pierre Gardrat
parent
1b92bcf476
commit
afd8f58a8d
@@ -6,7 +6,8 @@
|
||||
"${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_regif_core_prc_3in3.toml"]
|
||||
polling_us=10
|
||||
[fpga.ffi.V80]
|
||||
id= 0
|
||||
id="${V80_PCIE_DEV}"
|
||||
board_sn="${V80_SERIAL_NUMBER}"
|
||||
hpu_path="${HPU_BACKEND_DIR}/config_store/v80_archives/psi64.hpu"
|
||||
ami_path="${AMI_PATH}/ami.ko"
|
||||
qdma_h2c="/dev/qdma${V80_PCIE_DEV}001-MM-1"
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d5f578ec0cbcd1525fc88dc57fff1a2384fa742a147f69b6a9c77deafc0601fe
|
||||
size 33348376
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0a0798a1170982be0ce714bbf0d4cdfbe3c069e328e8847053c20d7b9b347ef6
|
||||
size 83225193
|
||||
oid sha256:1d1afb554756df4d8b39bee33ded2dda19c23a6f9d8e2b242092efd35cf1cc19
|
||||
size 83281321
|
||||
|
||||
@@ -1,16 +1,37 @@
|
||||
set TOP_NAME [lindex $::argv 0]
|
||||
set SERIAL_NUMBER [lindex $::argv 1]
|
||||
|
||||
puts "TOP NAME: $TOP_NAME"
|
||||
puts "SERIAL NUMBER: $SERIAL_NUMBER"
|
||||
|
||||
|
||||
open_hw_manager
|
||||
|
||||
connect_hw_server -allow_non_jtag
|
||||
open_hw_target
|
||||
current_hw_device [get_hw_devices xcv80_1]
|
||||
refresh_hw_device -update_hw_probes false [lindex [get_hw_devices xcv80_1] 0]
|
||||
|
||||
set_property PROBES.FILE {} [get_hw_devices xcv80_1]
|
||||
set_property FULL_PROBES.FILE {} [get_hw_devices xcv80_1]
|
||||
set found_index -1
|
||||
|
||||
# stage 1 programming
|
||||
set_property PROGRAM.FILE $TOP_NAME [get_hw_devices xcv80_1]
|
||||
program_hw_devices [get_hw_devices xcv80_1]
|
||||
refresh_hw_device [lindex [get_hw_devices xcv80_1] 0]
|
||||
set targets [get_hw_targets]
|
||||
for {set i 0} {$i < [llength $targets]} {incr i} {
|
||||
set tg [lindex $targets $i]
|
||||
if {[string first $SERIAL_NUMBER $tg] != -1} {
|
||||
set found_index $i
|
||||
break
|
||||
}
|
||||
}
|
||||
if {$found_index != -1} {
|
||||
open_hw_target [lindex $targets $found_index]
|
||||
set hw_device [get_hw_devices]
|
||||
current_hw_device [get_hw_devices [lindex $hw_device 1]]
|
||||
refresh_hw_device -update_hw_probes false [lindex [get_hw_devices [lindex $hw_device 1]] 0]
|
||||
|
||||
set_property PROBES.FILE {} [get_hw_devices [lindex $hw_device 1]]
|
||||
set_property FULL_PROBES.FILE {} [get_hw_devices [lindex $hw_device 1]]
|
||||
|
||||
# stage 1 programming
|
||||
set_property PROGRAM.FILE $TOP_NAME [get_hw_devices [lindex $hw_device 1]]
|
||||
program_hw_devices [get_hw_devices [lindex $hw_device 1]]
|
||||
refresh_hw_device [lindex [get_hw_devices [lindex $hw_device 1]] 0]
|
||||
} else {
|
||||
puts "Could not find $SERIAL_NUMBER in list of hw targets $targets"
|
||||
}
|
||||
|
||||
@@ -169,12 +169,14 @@ impl HpuHw {
|
||||
match mode {
|
||||
FFIMode::V80 {
|
||||
id,
|
||||
board_sn,
|
||||
hpu_path,
|
||||
ami_path,
|
||||
qdma_h2c,
|
||||
qdma_c2h,
|
||||
} => Self(v80::HpuHw::new_hpu_hw(
|
||||
*id,
|
||||
&id.expand(),
|
||||
&board_sn.expand(),
|
||||
&hpu_path.expand(),
|
||||
&ami_path.expand(),
|
||||
retry_rate,
|
||||
|
||||
@@ -10,7 +10,7 @@ use std::os::fd::AsRawFd;
|
||||
use std::time::Duration;
|
||||
|
||||
const AMI_VERSION_FILE: &str = "/sys/module/ami/version";
|
||||
const AMI_VERSION_PATTERN: &str = r"3\.0\.\d+-zama";
|
||||
const AMI_VERSION_PATTERN: &str = r"3\.1\.\d+-zama";
|
||||
|
||||
const AMI_ID_FILE: &str = "/sys/bus/pci/drivers/ami/devices";
|
||||
const AMI_ID_PATTERN: &str = r"(?<bus>[[:xdigit:]]{2}):(?<dev>[[:xdigit:]]{2})\.(?<func>[[:xdigit:]])\s(?<devn>\d+)\s(?<hwmon>\d+)";
|
||||
@@ -36,7 +36,7 @@ pub struct AmiInfo {
|
||||
/// Set of discovery function
|
||||
/// Enable to probe the device IDs and status
|
||||
impl AmiInfo {
|
||||
pub fn new(ami_id: u32) -> Result<Self, Box<dyn Error>> {
|
||||
pub fn new(ami_id: &str) -> Result<Self, Box<dyn Error>> {
|
||||
// First read content of AMI_DEVICES_MAP
|
||||
let devices_file = OpenOptions::new()
|
||||
.read(true)
|
||||
@@ -46,8 +46,11 @@ impl AmiInfo {
|
||||
let devices_rd = BufReader::new(devices_file);
|
||||
let line = devices_rd
|
||||
.lines()
|
||||
.nth(1 + ami_id as usize)
|
||||
.ok_or("No device found")??;
|
||||
.find(|line_result| match line_result {
|
||||
Ok(l) => l.starts_with(ami_id),
|
||||
Err(_) => false,
|
||||
})
|
||||
.ok_or("Could not find line starting with {ami_id:?}.")??;
|
||||
|
||||
// Extract AMI device path
|
||||
lazy_static! {
|
||||
@@ -75,12 +78,13 @@ impl AmiInfo {
|
||||
|
||||
pub struct AmiDriver {
|
||||
ami_dev: File,
|
||||
ami_info: AmiInfo,
|
||||
retry_rate: Duration,
|
||||
}
|
||||
|
||||
impl AmiDriver {
|
||||
pub fn new(
|
||||
ami_id: u32,
|
||||
ami_id: &str,
|
||||
amc_ver: &Version,
|
||||
retry_rate: Duration,
|
||||
) -> Result<Self, Box<dyn Error>> {
|
||||
@@ -97,6 +101,7 @@ impl AmiDriver {
|
||||
|
||||
Ok(Self {
|
||||
ami_dev,
|
||||
ami_info,
|
||||
retry_rate,
|
||||
})
|
||||
}
|
||||
@@ -357,11 +362,13 @@ impl AmiDriver {
|
||||
// TODO ugly quick patch
|
||||
// Clean this when driver interface is specified
|
||||
pub fn iop_ackq_rd(&self) -> u32 {
|
||||
let ami_devn = self.ami_info.devn;
|
||||
let ami_proc_path = format!("/proc/ami_iop_ack_{}", ami_devn);
|
||||
let mut iop_ack_f = OpenOptions::new()
|
||||
.read(true)
|
||||
.write(true)
|
||||
.create(false)
|
||||
.open("/proc/ami_iop_ack")
|
||||
.open(&ami_proc_path)
|
||||
.unwrap();
|
||||
|
||||
// Read a line and extract a 32b integer
|
||||
@@ -371,7 +378,7 @@ impl AmiDriver {
|
||||
0
|
||||
} else {
|
||||
let ack_nb = ack_str.as_str().trim_ascii().parse::<u32>().unwrap();
|
||||
tracing::trace!("Get value {ack_str} from proc/ami_iop_ack => {ack_nb}",);
|
||||
tracing::trace!("Get value {ack_str} from {ami_proc_path} => {ack_nb}",);
|
||||
ack_nb
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,8 @@ impl HpuHw {
|
||||
/// otherwise reload Pdi
|
||||
#[inline(always)]
|
||||
pub fn new_hpu_hw(
|
||||
id: u32,
|
||||
id: &str,
|
||||
board_sn: &str,
|
||||
hpu_path: &str,
|
||||
ami_path: &str,
|
||||
ami_retry: std::time::Duration,
|
||||
@@ -54,7 +55,9 @@ impl HpuHw {
|
||||
// Try current hw and fallback to a fresh reload
|
||||
Self::try_current_hw(id, &hpu_pdi, ami_retry, h2c_path, c2h_path).unwrap_or_else(|err| {
|
||||
tracing::warn!("Loading current HW failed with {err:?}. Will do a fresh reload");
|
||||
Self::reload_hw(id, &hpu_pdi, ami_path, ami_retry, h2c_path, c2h_path)
|
||||
Self::reload_hw(
|
||||
&id, &board_sn, &hpu_pdi, ami_path, ami_retry, h2c_path, c2h_path,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -62,7 +65,7 @@ impl HpuHw {
|
||||
/// NB: This procedure required unload of Qdma/Ami driver and thus couldn't be directly
|
||||
/// implemented in the AMI
|
||||
fn try_current_hw(
|
||||
id: u32,
|
||||
id: &str,
|
||||
pdi: &HpuV80Pdi,
|
||||
ami_retry: std::time::Duration,
|
||||
h2c_path: &str,
|
||||
@@ -96,7 +99,8 @@ impl HpuHw {
|
||||
/// NB: This procedure required unload of Qdma/Ami driver and thus couldn't be directly
|
||||
/// implemented in the AMI
|
||||
fn reload_hw(
|
||||
id: u32,
|
||||
id: &str,
|
||||
board_sn: &str,
|
||||
pdi: &HpuV80Pdi,
|
||||
ami_path: &str,
|
||||
ami_retry: std::time::Duration,
|
||||
@@ -151,6 +155,7 @@ impl HpuHw {
|
||||
)
|
||||
.arg("-tclargs")
|
||||
.arg(format!("{}/{}", tmp_dir_str, &pdi_stg1_tmp))
|
||||
.arg(format!("{}", &board_sn))
|
||||
.output()
|
||||
.expect("Stage1 loading encounters error");
|
||||
tracing::debug!("Stage1 loaded: {hw_monitor:?}");
|
||||
@@ -199,6 +204,9 @@ impl HpuHw {
|
||||
.write_all(b"1\n")
|
||||
.expect("Unable to triggered a pci rescan");
|
||||
|
||||
// wait for QDMA to create its fs
|
||||
std::thread::sleep(std::time::Duration::from_secs(2));
|
||||
|
||||
// Update right on V80 pcie subsystem
|
||||
// NB: sysfs is recreated upon rescan
|
||||
Command::new("sudo")
|
||||
|
||||
@@ -42,7 +42,8 @@ impl std::str::FromStr for ShellString {
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
pub enum FFIMode {
|
||||
V80 {
|
||||
id: u32,
|
||||
id: ShellString,
|
||||
board_sn: ShellString,
|
||||
hpu_path: ShellString,
|
||||
ami_path: ShellString,
|
||||
qdma_h2c: ShellString,
|
||||
|
||||
12
setup_hpu.sh
12
setup_hpu.sh
@@ -17,12 +17,7 @@ RUST_LOG="info"
|
||||
|
||||
# Setting PCI device variable: depends on the machine
|
||||
mapfile -t DEVICE< <(lspci -d 10ee:50b5)
|
||||
if [ ${#DEVICE[@]} -gt 1 ]; then
|
||||
echo "[ERROR]: There is more than one device pcie, we only support one hpu for now"
|
||||
return 1
|
||||
else
|
||||
V80_PCIE_DEV="${DEVICE[0]%%:*}"
|
||||
fi
|
||||
V80_PCIE_DEV="unselected"
|
||||
|
||||
# V80 bitstream refresh rely on XilinxVivado tools
|
||||
XILINX_VIVADO=${XILINX_VIVADO:-"/opt/amd/Vivado/2024.2"}
|
||||
@@ -69,7 +64,10 @@ do
|
||||
V80_PCIE_DEV="${2}"
|
||||
((i++))
|
||||
else
|
||||
echo "Error: --pcie-dev requires a value"
|
||||
echo "Please select a device in following list (1st two digits):"
|
||||
for item in "${DEVICE[@]}"; do
|
||||
echo "$item"
|
||||
done
|
||||
return 1
|
||||
fi
|
||||
shift 2
|
||||
|
||||
Reference in New Issue
Block a user