Compare commits

...

11 Commits

Author SHA1 Message Date
Agnes Leroy
b1e25c65a7 Fix Makefile 2026-02-13 09:48:40 +01:00
Agnes Leroy
cba4f2e6dd Add cost for 8xL40 2026-02-13 09:34:23 +01:00
Agnes Leroy
6199610865 Add decomp_sns_comp bench to summary 2026-02-13 09:34:23 +01:00
Agnes Leroy
aae9e6c9a5 Add 8xL40 to slab config 2026-02-13 09:34:23 +01:00
Agnes Leroy
6ecf2a59e2 Fix Makefile 2026-02-13 09:34:22 +01:00
Agnes Leroy
3169ee8093 chore(gpu): bench classic or multi-bit params for compress/decompress 2026-02-12 11:45:44 +01:00
David Testé
8119c2287b WIP: run classic an multi_bit benchs on gpu 2026-02-12 11:42:48 +01:00
David Testé
e4c7f83e17 WIP: use placeholder workflow to run benches 2026-02-10 16:09:26 +01:00
David Testé
349846bc11 WIP: add profile selection for GPU bench 2026-02-10 16:09:25 +01:00
David Testé
b1fb4b2ae0 WIP: adding missing DEX bench 2026-02-10 16:09:25 +01:00
David Testé
da3c55c50b WIP: implement bench workflow testing cpu and gpu 2026-02-10 16:09:22 +01:00
7 changed files with 412 additions and 55 deletions

126
.github/workflows/benchmark_summary.yml vendored Normal file
View File

@@ -0,0 +1,126 @@
# Run all benchmarks displayed in the internal documentation.
name: benchmark_summary
run-name: Benchmark Summary
on:
workflow_dispatch:
inputs:
run-cpu-benchmarks:
description: "Run CPU benchmarks"
type: boolean
default: true
run-gpu-benchmarks:
description: "Run GPU benchmarks"
type: boolean
default: true
gpu-profile:
description: "GPU Instance type"
required: true
default: "multi-h100-sxm5 (n3-H100-SXM5x8)"
type: choice
options:
- "l40 (n3-L40x1)"
- "4-l40 (n3-L40x4)"
- "multi-a100-nvlink (n3-A100x8-NVLink)"
- "single-h100 (n3-H100x1)"
- "2-h100 (n3-H100x2)"
- "4-h100 (n3-H100x4)"
- "multi-h100 (n3-H100x8)"
- "multi-h100-nvlink (n3-H100x8-NVLink)"
- "multi-h100-sxm5 (n3-H100-SXM5x8)"
run-hpu-benchmarks:
description: "Run HPU benchmarks"
type: boolean
default: true
permissions: {}
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
jobs:
parse-gpu-inputs:
name: benchmark_summary/parse-gpu-inputs
if: inputs.run-gpu-benchmarks
runs-on: ubuntu-latest
outputs:
profile: ${{ steps.parse_profile.outputs.profile }}
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
env:
INPUTS_PROFILE: ${{ inputs.gpu-profile }}
steps:
- name: Parse profile
id: parse_profile
run: |
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
# shellcheck disable=SC2001
PROFILE=$(echo "${INPUTS_PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
echo "profile=${PROFILE}" >> "${GITHUB_OUTPUT}"
- name: Parse hardware name
id: parse_hardware_name
run: |
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
# shellcheck disable=SC2001
NAME=$(echo "${INPUTS_PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
echo "name=${NAME}" >> "${GITHUB_OUTPUT}"
run-benchmarks-cpu:
name: benchmark_documentation/run-benchmarks-cpu-integer
uses: ./.github/workflows/benchmark_cpu_common.yml
if: inputs.run-cpu-benchmarks
with:
command: summary
bench_type: both
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-gpu:
name: benchmark_documentation/run-benchmarks-gpu
uses: ./.github/workflows/benchmark_gpu_common.yml
if: inputs.run-gpu-benchmarks
needs: parse-gpu-inputs
with:
profile: ${{ needs.parse-gpu-inputs.outputs.profile }}
hardware_name: ${{ needs.parse-gpu-inputs.outputs.hardware_name }}
command: summary
bench_type: both
params_type: classical + multi_bit
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
# TODO add make recipe for HPU benchmarks
# run-benchmarks-hpu:
# name: benchmark_documentation/run-benchmarks-hpu
# uses: ./.github/workflows/benchmark_hpu_common.yml
# if: inputs.run-hpu-benchmarks
# with:
# command: summary
# bench_type: both
# v80_pcie_dev: 24
# v80_serial_number: XFL12NWY3ZKG
# secrets:
# BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
# SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
# SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
# REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# JOB_SECRET: ${{ secrets.JOB_SECRET }}
# SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
# SLAB_URL: ${{ secrets.SLAB_URL }}
# SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
# SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}

View File

@@ -1,18 +1,127 @@
# Placeholder workflow file allowing running it without having to merge to main first
name: placeholder_workflow
run-name: Summary benchs tests
on:
workflow_dispatch:
inputs:
run-cpu-benchmarks:
description: "Run CPU benchmarks"
type: boolean
default: true
run-gpu-benchmarks:
description: "Run GPU benchmarks"
type: boolean
default: true
gpu-profile:
description: "GPU Instance type"
required: true
default: "multi-h100-sxm5 (n3-H100-SXM5x8)"
type: choice
options:
- "l40 (n3-L40x1)"
- "4-l40 (n3-L40x4)"
- "8-l40 (n3-L40x8)"
- "multi-a100-nvlink (n3-A100x8-NVLink)"
- "single-h100 (n3-H100x1)"
- "2-h100 (n3-H100x2)"
- "4-h100 (n3-H100x4)"
- "multi-h100 (n3-H100x8)"
- "multi-h100-nvlink (n3-H100x8-NVLink)"
- "multi-h100-sxm5 (n3-H100-SXM5x8)"
run-hpu-benchmarks:
description: "Run HPU benchmarks"
type: boolean
default: true
permissions: {}
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
jobs:
placeholder:
name: placeholder_workflow/placeholder
parse-gpu-inputs:
name: benchmark_summary/parse-gpu-inputs
if: inputs.run-gpu-benchmarks
runs-on: ubuntu-latest
outputs:
profile: ${{ steps.parse_profile.outputs.profile }}
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
env:
INPUTS_PROFILE: ${{ inputs.gpu-profile }}
steps:
- run: |
echo "Hello this is a Placeholder Workflow"
- name: Parse profile
id: parse_profile
run: |
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
# shellcheck disable=SC2001
PROFILE=$(echo "${INPUTS_PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
echo "profile=${PROFILE}" >> "${GITHUB_OUTPUT}"
- name: Parse hardware name
id: parse_hardware_name
run: |
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
# shellcheck disable=SC2001
NAME=$(echo "${INPUTS_PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
echo "name=${NAME}" >> "${GITHUB_OUTPUT}"
run-benchmarks-cpu:
name: benchmark_documentation/run-benchmarks-cpu-integer
uses: ./.github/workflows/benchmark_cpu_common.yml
if: inputs.run-cpu-benchmarks
with:
command: summary
bench_type: both
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-gpu:
name: benchmark_documentation/run-benchmarks-gpu
uses: ./.github/workflows/benchmark_gpu_common.yml
if: inputs.run-gpu-benchmarks
needs: parse-gpu-inputs
with:
profile: ${{ needs.parse-gpu-inputs.outputs.profile }}
hardware_name: ${{ needs.parse-gpu-inputs.outputs.hardware_name }}
command: summary
bench_type: both
params_type: classical + multi_bit
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
# TODO add make recipe for HPU benchmarks
# run-benchmarks-hpu:
# name: benchmark_documentation/run-benchmarks-hpu
# uses: ./.github/workflows/benchmark_hpu_common.yml
# if: inputs.run-hpu-benchmarks
# with:
# command: summary
# bench_type: both
# v80_pcie_dev: 24
# v80_serial_number: XFL12NWY3ZKG
# secrets:
# BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
# SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
# SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
# REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# JOB_SECRET: ${{ secrets.JOB_SECRET }}
# SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
# SLAB_URL: ${{ secrets.SLAB_URL }}
# SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
# SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}

102
Makefile
View File

@@ -1438,14 +1438,14 @@ bench_integer_hpu: install_rs_check_toolchain
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
bench_integer_compression: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-glwe_packing_compression \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
.PHONY: bench_integer_compression_gpu
bench_integer_compression_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-glwe_packing_compression \
--features=integer,internal-keycache,gpu,pbs-stats -p tfhe-benchmark --profile release_lto_off --
@@ -1459,6 +1459,7 @@ bench_integer_compression_128b_gpu: install_rs_check_toolchain
.PHONY: bench_integer_zk_gpu
bench_integer_zk_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
@@ -1526,7 +1527,7 @@ bench_signed_integer_multi_bit_gpu: install_rs_check_toolchain
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
bench_integer_zk: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,zk-pok,pbs-stats \
@@ -1780,6 +1781,101 @@ bench_hlapi_kvstore: install_rs_check_toolchain
--bench hlapi-kvstore \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
.PHONY: bench_summary # Run summary benchmarks
bench_summary: install_rs_check_toolchain
# Arithmetic operations: addition, multiplication, division, comparison
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi_unsigned \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::add|::mul|::gt|::div_rem'
# Noise squash
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-noise-squash \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::noise_squash::'
# ERC20
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc20 \
--features=integer,internal-keycache -p tfhe-benchmark -- '::transfer::overflow'
# DEX
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-dex \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::no_cmux::'
# ZK
# Proof is done on CPU node of the instance
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,zk-pok,pbs-stats \
-p tfhe-benchmark -- '::pke_zk_proof'
# Verify is done on GPUs
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,pbs-stats,zk-pok -p tfhe-benchmark --profile release_lto_off --
# Compression
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-glwe_packing_compression \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_summary_gpu # Run summary benchmarks on GPU
bench_summary_gpu: install_rs_check_toolchain
# Arithmetic operations: addition, multiplication, division, comparison
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=FAST_DEFAULT __TFHE_RS_BENCH_BIT_SIZES_SET=FAST __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::add|::mul|::gt|::div_rem'
# Noise squash
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-noise-squash \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::noise_squash::'
# Noise squash and compression
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-noise-squash \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::decomp_noise_squash_comp::'
# ERC20
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc20 \
--features=integer,gpu,internal-keycache -p tfhe-benchmark --profile release_lto_off -- '::transfer::overflow'
# DEX
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-dex \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::no_cmux::'
# ZK
# Proof is done on CPU node of the instance
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=fast_default __TFHE_RS_BENCH_BIT_SIZES_SET=fast \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,zk-pok,pbs-stats \
-p tfhe-benchmark -- '::pke_zk_proof'
# Verify is done on GPUs
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=fast_default __TFHE_RS_BENCH_BIT_SIZES_SET=fast \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,gpu,pbs-stats,zk-pok -p tfhe-benchmark --
# Compression
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-glwe_packing_compression \
--features=integer,internal-keycache,gpu,pbs-stats -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_custom # Run benchmarks with a user-defined command
bench_custom: install_rs_check_toolchain

View File

@@ -12,6 +12,7 @@
"n3-H100x4": 6.08,
"n3-H100x2": 3.04,
"n3-L40x1": 0.80,
"n3-L40x8": 6.40,
"n3-H100-SXM5x8": 15.36,
"hpu_x1": 1.0,
"hpu_x2": 1.4,

View File

@@ -106,3 +106,10 @@ environment_name = "norway"
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
flavor_name = "n3-RTX-A4000x4"
user = "ubuntu"
[backend.hyperstack.8-l40]
environment_name = "canada"
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
flavor_name = "n3-L40x8"
user = "ubuntu"

View File

@@ -1,6 +1,7 @@
use benchmark::params_aliases::*;
use benchmark::utilities::{
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType,
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, BitSizesSet, EnvConfig,
OperatorType,
};
use criterion::{black_box, criterion_group, Criterion, Throughput};
use rayon::prelude::*;
@@ -8,8 +9,35 @@ use std::cmp::max;
use tfhe::integer::ciphertext::CompressedCiphertextListBuilder;
use tfhe::integer::{ClientKey, RadixCiphertext};
use tfhe::keycache::NamedParam;
use tfhe::shortint::parameters::LweCiphertextCount;
use tfhe::shortint::MessageModulus;
use tfhe::{get_pbs_count, reset_pbs_count};
fn default_config(
lwe_per_glwe: &LweCiphertextCount,
message_modulus: &MessageModulus,
) -> Vec<usize> {
let env_config = EnvConfig::new();
match env_config.bit_sizes_set {
BitSizesSet::Fast => {
vec![64]
}
_ => {
vec![
2,
8,
16,
32,
64,
128,
256,
lwe_per_glwe.0 * message_modulus.0.ilog2() as usize,
]
}
}
}
fn cpu_glwe_packing(c: &mut Criterion) {
let bench_name = "integer::packing_compression";
let mut bench_group = c.benchmark_group(bench_name);
@@ -29,16 +57,7 @@ fn cpu_glwe_packing(c: &mut Criterion) {
let log_message_modulus = param.message_modulus.0.ilog2() as usize;
for bit_size in [
2,
8,
16,
32,
64,
128,
256,
comp_param.lwe_per_glwe().0 * log_message_modulus,
] {
for bit_size in default_config(&comp_param.lwe_per_glwe(), &param.message_modulus) {
assert_eq!(bit_size % log_message_modulus, 0);
let num_blocks = bit_size / log_message_modulus;
@@ -159,17 +178,17 @@ fn cpu_glwe_packing(c: &mut Criterion) {
mod cuda {
use super::*;
use benchmark::utilities::cuda_integer_utils::cuda_local_streams;
use benchmark::utilities::{get_param_type, ParamType};
use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
use tfhe::integer::compression_keys::CompressionPrivateKeys;
use tfhe::integer::gpu::ciphertext::compressed_ciphertext_list::CudaCompressedCiphertextListBuilder;
use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext;
use tfhe::integer::gpu::gen_keys_radix_gpu;
use tfhe::shortint::parameters::CompressionParameters;
use tfhe::shortint::PBSParameters;
#[derive(Clone)]
struct BenchConfig {
param: PBSParameters,
param: tfhe::shortint::AtomicPatternParameters,
comp_param: CompressionParameters,
bit_size: usize,
cks: ClientKey,
@@ -289,7 +308,7 @@ mod cuda {
write_to_json::<u64, _>(
&bench_id_pack,
(comp_param, param.into()),
(comp_param, param),
comp_param.name(),
"pack",
&OperatorType::Atomic,
@@ -452,7 +471,7 @@ mod cuda {
write_to_json::<u64, _>(
&bench_id_unpack,
(comp_param, param.into()),
(comp_param, param),
comp_param.name(),
"unpack",
&OperatorType::Atomic,
@@ -464,64 +483,62 @@ mod cuda {
}
fn gpu_glwe_packing(c: &mut Criterion) {
let param = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let comp_param =
BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let log_message_modulus = param.message_modulus.0.ilog2() as usize;
let (param, comp_param): (
tfhe::shortint::AtomicPatternParameters,
CompressionParameters,
) = match get_param_type() {
ParamType::Classical => (
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
),
_ => (
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
),
};
let cks = ClientKey::new(param);
let private_compression_key = cks.new_compression_private_key(comp_param);
let mut config = BenchConfig {
param: tfhe::shortint::PBSParameters::MultiBitPBS(param),
param,
comp_param,
cks,
private_compression_key,
bit_size: 0,
};
for bit_size in [
2,
8,
16,
32,
64,
128,
256,
comp_param.lwe_per_glwe().0 * log_message_modulus,
] {
for bit_size in default_config(&comp_param.lwe_per_glwe(), &param.message_modulus()) {
config.bit_size = bit_size;
execute_gpu_glwe_packing(c, config.clone());
}
}
fn gpu_glwe_unpacking(c: &mut Criterion) {
let param = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let comp_param =
BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let log_message_modulus = param.message_modulus.0.ilog2() as usize;
let (param, comp_param): (
tfhe::shortint::AtomicPatternParameters,
CompressionParameters,
) = match get_param_type() {
ParamType::Classical => (
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
),
_ => (
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
),
};
let cks = ClientKey::new(param);
let private_compression_key = cks.new_compression_private_key(comp_param);
let mut config = BenchConfig {
param: PBSParameters::MultiBitPBS(param),
param,
comp_param,
bit_size: 0,
cks,
private_compression_key,
};
for bit_size in [
2,
8,
16,
32,
64,
128,
256,
comp_param.lwe_per_glwe().0 * log_message_modulus,
] {
for bit_size in default_config(&comp_param.lwe_per_glwe(), &param.message_modulus()) {
config.bit_size = bit_size;
execute_gpu_glwe_unpacking(c, config.clone());
}

View File

@@ -466,6 +466,7 @@ fn ks_params_default_name(params: &ShortintKeySwitchingParameters) -> String {
named_params_impl!(CompressionParameters =>
COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
V1_6_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
; fallback => comp_params_default_name
);