diff --git a/.github/workflows/benchmark_hpu_hlapi.yml b/.github/workflows/benchmark_hpu_hlapi.yml index 9f8a5584d..7ba471d63 100644 --- a/.github/workflows/benchmark_hpu_hlapi.yml +++ b/.github/workflows/benchmark_hpu_hlapi.yml @@ -16,7 +16,7 @@ permissions: {} jobs: hlapi-benchmarks-hpu: name: Execute HLAPI benchmarks for HPU backend - runs-on: v80-desktop + runs-on: v80-marais concurrency: group: ${{ github.workflow }}_${{ github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} @@ -63,8 +63,8 @@ jobs: - name: Run benchmarks run: | make pull_hpu_files - export V80_SERIAL_NUMBER=XFL12E4XJXWK - source /opt/xilinx/Vivado/2024.2/settings64.sh + export V80_SERIAL_NUMBER=XFL12NWY3ZKG + source /opt/amd/Vivado/2024.2/settings64.sh make bench_hlapi_erc20_hpu make bench_hlapi_hpu diff --git a/.github/workflows/benchmark_hpu_integer.yml b/.github/workflows/benchmark_hpu_integer.yml index 4de872be7..a5ba7cdcb 100644 --- a/.github/workflows/benchmark_hpu_integer.yml +++ b/.github/workflows/benchmark_hpu_integer.yml @@ -29,7 +29,7 @@ permissions: {} jobs: prepare-matrix: name: Prepare operations matrix - runs-on: v80-desktop + runs-on: v80-marais outputs: bench_type: ${{ steps.set_bench_type.outputs.bench_type }} steps: @@ -58,7 +58,7 @@ jobs: integer-benchmarks-hpu: name: benchmark_hpu_integer/integer-benchmarks-hpu needs: prepare-matrix - runs-on: v80-desktop + runs-on: v80-marais concurrency: group: ${{ github.workflow }}_${{ github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} @@ -114,8 +114,8 @@ jobs: - name: Run benchmarks run: | make pull_hpu_files - export V80_SERIAL_NUMBER=XFL12E4XJXWK - source /opt/xilinx/Vivado/2024.2/settings64.sh + export V80_SERIAL_NUMBER=XFL12NWY3ZKG + source /opt/amd/Vivado/2024.2/settings64.sh make BENCH_TYPE="${BENCH_TYPE}" bench_integer_hpu env: BENCH_TYPE: ${{ matrix.bench_type }} diff --git a/Makefile b/Makefile index 908ca58de..646d5c10a 100644 --- a/Makefile +++ b/Makefile @@ -1332,7 +1332,7 @@ bench_integer_hpu: install_rs_check_toolchain RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \ cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ --bench integer-bench \ - --features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark -- + --features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark -- --quick .PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression bench_integer_compression: install_rs_check_toolchain diff --git a/backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml b/backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml index 8e86e4e56..b8ea2db44 100644 --- a/backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml +++ b/backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml @@ -78,7 +78,7 @@ #implementation = "Ilp" implementation = "Llt" integer_w=[2,4,6,8,10,12,14,16,32,64,128] - min_batch_size = 9 + min_batch_size = 12 kogge_cfg = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/kogge_cfg.toml" custom_iop.'IOP[0]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_0.asm" custom_iop.'IOP[1]' = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_1.asm" diff --git a/backends/tfhe-hpu-backend/config_store/v80_archives/psi64.hpu b/backends/tfhe-hpu-backend/config_store/v80_archives/psi64.hpu index 18114dc0a..c8569b5e0 100644 --- a/backends/tfhe-hpu-backend/config_store/v80_archives/psi64.hpu +++ b/backends/tfhe-hpu-backend/config_store/v80_archives/psi64.hpu @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f077c9cebbd56ba83c93ed0fdb4dea4f431dd6ee59be436ffbd8225e3ce82f49 -size 84230351 +oid sha256:3eb7619c9fb31dde691f08f963968ed60ec2892f3fab16b9c6c2353f9605efe8 +size 82307506 diff --git a/backends/tfhe-hpu-backend/src/asm/iop/mod.rs b/backends/tfhe-hpu-backend/src/asm/iop/mod.rs index 6f764a394..6259695c4 100644 --- a/backends/tfhe-hpu-backend/src/asm/iop/mod.rs +++ b/backends/tfhe-hpu-backend/src/asm/iop/mod.rs @@ -176,7 +176,7 @@ pub const IOP_2CT_F_CT_SCALAR: ConstIOpProto<2, 1> = ConstIOpProto { imm: 1, }; -pub const SIMD_N: usize = 9; //TODO: We need to come up with a way to have this dynamic +pub const SIMD_N: usize = 12; //TODO: We need to come up with a way to have this dynamic pub const IOP_NCT_F_2NCT: ConstIOpProto<{ SIMD_N }, { 2 * SIMD_N }> = ConstIOpProto { dst: [VarMode::Native; SIMD_N], src: [VarMode::Native; 2 * SIMD_N], diff --git a/backends/tfhe-hpu-backend/src/ffi/v80/ami.rs b/backends/tfhe-hpu-backend/src/ffi/v80/ami.rs index a5d34de0b..d8e99483a 100644 --- a/backends/tfhe-hpu-backend/src/ffi/v80/ami.rs +++ b/backends/tfhe-hpu-backend/src/ffi/v80/ami.rs @@ -377,7 +377,7 @@ impl AmiDriver { if ack_str.is_empty() { 0 } else { - let ack_nb = ack_str.as_str().trim_ascii().parse::().unwrap(); + let ack_nb = ack_str.as_str().lines().map(|line| line.trim_ascii().parse::().unwrap()).sum(); tracing::trace!("Get value {ack_str} from {ami_proc_path} => {ack_nb}",); ack_nb } diff --git a/setup_hpu.sh b/setup_hpu.sh index d90a9cbe4..aa0649825 100644 --- a/setup_hpu.sh +++ b/setup_hpu.sh @@ -22,7 +22,7 @@ V80_PCIE_DEV="unselected" XILINX_VIVADO=${XILINX_VIVADO:-"/opt/amd/Vivado/2024.2"} # V80 bitstream refresh require insmod of ami.ko module -AMI_PATH=${AMI_PATH:-"/opt/v80/ami/ef9249f"} +AMI_PATH=${AMI_PATH:-"/opt/v80/ami/e55d02d"} # Parse user CLI ############################################################## opt_short="hc:l:p:" diff --git a/tfhe-benchmark/benches/high_level_api/erc20.rs b/tfhe-benchmark/benches/high_level_api/erc20.rs index 6ef836b17..166420025 100644 --- a/tfhe-benchmark/benches/high_level_api/erc20.rs +++ b/tfhe-benchmark/benches/high_level_api/erc20.rs @@ -663,7 +663,7 @@ fn hpu_bench_transfer_throughput_simd( .len() / 3; let mut rng = thread_rng(); - for num_elems in [2, 10] { + for num_elems in [2, 8] { let real_num_elems = num_elems * (hpu_simd_n as u64); group.throughput(Throughput::Elements(real_num_elems)); let bench_id =