diff --git a/.github/workflows/benchmark_hpu_hlapi.yml b/.github/workflows/benchmark_hpu_hlapi.yml
index 9f8a5584d..7ba471d63 100644
--- a/.github/workflows/benchmark_hpu_hlapi.yml
+++ b/.github/workflows/benchmark_hpu_hlapi.yml
@@ -16,7 +16,7 @@ permissions: {}
 jobs:
   hlapi-benchmarks-hpu:
     name: Execute HLAPI benchmarks for HPU backend
-    runs-on: v80-desktop
+    runs-on: v80-marais
     concurrency:
       group: ${{ github.workflow }}_${{ github.ref }}
       cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
@@ -63,8 +63,8 @@ jobs:
       - name: Run benchmarks
         run: |
           make pull_hpu_files
-          export V80_SERIAL_NUMBER=XFL12E4XJXWK
-          source /opt/xilinx/Vivado/2024.2/settings64.sh
+          export V80_SERIAL_NUMBER=XFL12NWY3ZKG
+          source /opt/amd/Vivado/2024.2/settings64.sh
           make bench_hlapi_erc20_hpu
           make bench_hlapi_hpu
 
diff --git a/.github/workflows/benchmark_hpu_integer.yml b/.github/workflows/benchmark_hpu_integer.yml
index 4de872be7..a5ba7cdcb 100644
--- a/.github/workflows/benchmark_hpu_integer.yml
+++ b/.github/workflows/benchmark_hpu_integer.yml
@@ -29,7 +29,7 @@ permissions: {}
 jobs:
   prepare-matrix:
     name: Prepare operations matrix
-    runs-on: v80-desktop
+    runs-on: v80-marais
     outputs:
       bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
     steps:
@@ -58,7 +58,7 @@ jobs:
   integer-benchmarks-hpu:
     name: benchmark_hpu_integer/integer-benchmarks-hpu
     needs: prepare-matrix
-    runs-on: v80-desktop
+    runs-on: v80-marais
     concurrency:
       group: ${{ github.workflow }}_${{ github.ref }}
       cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
@@ -114,8 +114,8 @@ jobs:
       - name: Run benchmarks
         run: |
           make pull_hpu_files
-          export V80_SERIAL_NUMBER=XFL12E4XJXWK
-          source /opt/xilinx/Vivado/2024.2/settings64.sh
+          export V80_SERIAL_NUMBER=XFL12NWY3ZKG
+          source /opt/amd/Vivado/2024.2/settings64.sh
           make BENCH_TYPE="${BENCH_TYPE}" bench_integer_hpu
         env:
           BENCH_TYPE: ${{ matrix.bench_type }}
diff --git a/Makefile b/Makefile
index 908ca58de..646d5c10a 100644
--- a/Makefile
+++ b/Makefile
@@ -1332,7 +1332,7 @@ bench_integer_hpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark --
+	--features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark -- --quick
 
 .PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
 bench_integer_compression: install_rs_check_toolchain
diff --git a/backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml b/backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml
index 8e86e4e56..b8ea2db44 100644
--- a/backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml
+++ b/backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml
@@ -78,7 +78,7 @@
   #implementation = "Ilp"
   implementation = "Llt"
   integer_w=[2,4,6,8,10,12,14,16,32,64,128]
-  min_batch_size = 9
+  min_batch_size = 12
   kogge_cfg            = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/kogge_cfg.toml"
   custom_iop.'IOP[0]'  = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_0.asm"
   custom_iop.'IOP[1]'  = "${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/custom_iop/cust_1.asm"
diff --git a/backends/tfhe-hpu-backend/config_store/v80_archives/psi64.hpu b/backends/tfhe-hpu-backend/config_store/v80_archives/psi64.hpu
index 18114dc0a..c8569b5e0 100644
--- a/backends/tfhe-hpu-backend/config_store/v80_archives/psi64.hpu
+++ b/backends/tfhe-hpu-backend/config_store/v80_archives/psi64.hpu
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f077c9cebbd56ba83c93ed0fdb4dea4f431dd6ee59be436ffbd8225e3ce82f49
-size 84230351
+oid sha256:3eb7619c9fb31dde691f08f963968ed60ec2892f3fab16b9c6c2353f9605efe8
+size 82307506
diff --git a/backends/tfhe-hpu-backend/src/asm/iop/mod.rs b/backends/tfhe-hpu-backend/src/asm/iop/mod.rs
index 6f764a394..6259695c4 100644
--- a/backends/tfhe-hpu-backend/src/asm/iop/mod.rs
+++ b/backends/tfhe-hpu-backend/src/asm/iop/mod.rs
@@ -176,7 +176,7 @@ pub const IOP_2CT_F_CT_SCALAR: ConstIOpProto<2, 1> = ConstIOpProto {
     imm: 1,
 };
 
-pub const SIMD_N: usize = 9; //TODO: We need to come up with a way to have this dynamic
+pub const SIMD_N: usize = 12; //TODO: We need to come up with a way to have this dynamic
 pub const IOP_NCT_F_2NCT: ConstIOpProto<{ SIMD_N }, { 2 * SIMD_N }> = ConstIOpProto {
     dst: [VarMode::Native; SIMD_N],
     src: [VarMode::Native; 2 * SIMD_N],
diff --git a/backends/tfhe-hpu-backend/src/ffi/v80/ami.rs b/backends/tfhe-hpu-backend/src/ffi/v80/ami.rs
index a5d34de0b..d8e99483a 100644
--- a/backends/tfhe-hpu-backend/src/ffi/v80/ami.rs
+++ b/backends/tfhe-hpu-backend/src/ffi/v80/ami.rs
@@ -377,7 +377,7 @@ impl AmiDriver {
         if ack_str.is_empty() {
             0
         } else {
-            let ack_nb = ack_str.as_str().trim_ascii().parse::<u32>().unwrap();
+            let ack_nb = ack_str.as_str().lines().map(|line| line.trim_ascii().parse::<u32>().unwrap()).sum();
             tracing::trace!("Get value {ack_str} from {ami_proc_path} => {ack_nb}",);
             ack_nb
         }
diff --git a/setup_hpu.sh b/setup_hpu.sh
index d90a9cbe4..aa0649825 100644
--- a/setup_hpu.sh
+++ b/setup_hpu.sh
@@ -22,7 +22,7 @@ V80_PCIE_DEV="unselected"
 XILINX_VIVADO=${XILINX_VIVADO:-"/opt/amd/Vivado/2024.2"}
 
 # V80 bitstream refresh require insmod of ami.ko module
-AMI_PATH=${AMI_PATH:-"/opt/v80/ami/ef9249f"}
+AMI_PATH=${AMI_PATH:-"/opt/v80/ami/e55d02d"}
 
 # Parse user CLI ##############################################################
 opt_short="hc:l:p:"
diff --git a/tfhe-benchmark/benches/high_level_api/erc20.rs b/tfhe-benchmark/benches/high_level_api/erc20.rs
index 6ef836b17..166420025 100644
--- a/tfhe-benchmark/benches/high_level_api/erc20.rs
+++ b/tfhe-benchmark/benches/high_level_api/erc20.rs
@@ -663,7 +663,7 @@ fn hpu_bench_transfer_throughput_simd<FheType, F>(
         .len()
         / 3;
     let mut rng = thread_rng();
-    for num_elems in [2, 10] {
+    for num_elems in [2, 8] {
         let real_num_elems = num_elems * (hpu_simd_n as u64);
         group.throughput(Throughput::Elements(real_num_elems));
         let bench_id =