chore(hpu): make hpu integer bench fast by default

This commit is contained in:
pgardratzama
2025-09-10 13:28:27 +02:00
committed by Pierre Gardrat
parent 4ff0d6cac2
commit 757c2fc828
2 changed files with 28 additions and 4 deletions

View File

@@ -4,6 +4,9 @@ name: benchmark_hpu_integer
on:
workflow_dispatch:
inputs:
all_precisions:
description: "Run all precisions"
type: boolean
bench_type:
description: "Benchmarks type"
type: choice
@@ -19,6 +22,7 @@ env:
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
FAST_BENCH: TRUE
permissions: {}
@@ -102,6 +106,11 @@ jobs:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Should run benchmarks with all precisions
if: inputs.all_precisions
run: |
echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
- name: Run benchmarks
run: |
make pull_hpu_files

View File

@@ -255,7 +255,10 @@ where
{
use tfhe::tfhe_hpu_backend::prelude::hpu_asm;
let src = HpuHandle {
native: vec![from_amount, to_amount, amount].into_iter().flatten().collect(),
native: vec![from_amount, to_amount, amount]
.into_iter()
.flatten()
.collect(),
boolean: vec![],
imm: vec![],
};
@@ -392,7 +395,13 @@ fn bench_transfer_latency_simd<FheType, F>(
F: for<'a> Fn(&'a Vec<FheType>, &'a Vec<FheType>, &'a Vec<FheType>) -> Vec<FheType>,
{
use tfhe::tfhe_hpu_backend::prelude::hpu_asm;
let hpu_simd_n = hpu_asm::iop::IOP_ERC_20_SIMD.format().unwrap().proto.src.len()/3;
let hpu_simd_n = hpu_asm::iop::IOP_ERC_20_SIMD
.format()
.unwrap()
.proto
.src
.len()
/ 3;
let bench_id = format!("{bench_name}::{fn_name}::{type_name}");
c.bench_function(&bench_id, |b| {
let mut rng = thread_rng();
@@ -646,10 +655,16 @@ fn hpu_bench_transfer_throughput_simd<FheType, F>(
F: for<'a> Fn(&'a Vec<FheType>, &'a Vec<FheType>, &'a Vec<FheType>) -> Vec<FheType> + Sync,
{
use tfhe::tfhe_hpu_backend::prelude::hpu_asm;
let hpu_simd_n = hpu_asm::iop::IOP_ERC_20_SIMD.format().unwrap().proto.src.len()/3;
let hpu_simd_n = hpu_asm::iop::IOP_ERC_20_SIMD
.format()
.unwrap()
.proto
.src
.len()
/ 3;
let mut rng = thread_rng();
for num_elems in [2, 10] {
let real_num_elems = num_elems*(hpu_simd_n as u64);
let real_num_elems = num_elems * (hpu_simd_n as u64);
group.throughput(Throughput::Elements(real_num_elems));
let bench_id =
format!("{bench_name}::throughput::{fn_name}::{type_name}::{real_num_elems}_elems");