mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-04-28 03:01:21 -04:00
Compare commits
11 Commits
am/chore/m
...
al/bench_8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b1e25c65a7 | ||
|
|
cba4f2e6dd | ||
|
|
6199610865 | ||
|
|
aae9e6c9a5 | ||
|
|
6ecf2a59e2 | ||
|
|
3169ee8093 | ||
|
|
8119c2287b | ||
|
|
e4c7f83e17 | ||
|
|
349846bc11 | ||
|
|
b1fb4b2ae0 | ||
|
|
da3c55c50b |
126
.github/workflows/benchmark_summary.yml
vendored
Normal file
126
.github/workflows/benchmark_summary.yml
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
# Run all benchmarks displayed in the internal documentation.
|
||||
name: benchmark_summary
|
||||
|
||||
run-name: Benchmark Summary
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
run-cpu-benchmarks:
|
||||
description: "Run CPU benchmarks"
|
||||
type: boolean
|
||||
default: true
|
||||
run-gpu-benchmarks:
|
||||
description: "Run GPU benchmarks"
|
||||
type: boolean
|
||||
default: true
|
||||
gpu-profile:
|
||||
description: "GPU Instance type"
|
||||
required: true
|
||||
default: "multi-h100-sxm5 (n3-H100-SXM5x8)"
|
||||
type: choice
|
||||
options:
|
||||
- "l40 (n3-L40x1)"
|
||||
- "4-l40 (n3-L40x4)"
|
||||
- "multi-a100-nvlink (n3-A100x8-NVLink)"
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "4-h100 (n3-H100x4)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-h100-sxm5 (n3-H100-SXM5x8)"
|
||||
run-hpu-benchmarks:
|
||||
description: "Run HPU benchmarks"
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
|
||||
permissions: {}
|
||||
|
||||
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
|
||||
|
||||
jobs:
|
||||
parse-gpu-inputs:
|
||||
name: benchmark_summary/parse-gpu-inputs
|
||||
if: inputs.run-gpu-benchmarks
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
profile: ${{ steps.parse_profile.outputs.profile }}
|
||||
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
|
||||
env:
|
||||
INPUTS_PROFILE: ${{ inputs.gpu-profile }}
|
||||
steps:
|
||||
- name: Parse profile
|
||||
id: parse_profile
|
||||
run: |
|
||||
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
|
||||
# shellcheck disable=SC2001
|
||||
PROFILE=$(echo "${INPUTS_PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
|
||||
echo "profile=${PROFILE}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Parse hardware name
|
||||
id: parse_hardware_name
|
||||
run: |
|
||||
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
|
||||
# shellcheck disable=SC2001
|
||||
NAME=$(echo "${INPUTS_PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
|
||||
echo "name=${NAME}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
run-benchmarks-cpu:
|
||||
name: benchmark_documentation/run-benchmarks-cpu-integer
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
if: inputs.run-cpu-benchmarks
|
||||
with:
|
||||
command: summary
|
||||
bench_type: both
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-gpu:
|
||||
name: benchmark_documentation/run-benchmarks-gpu
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
if: inputs.run-gpu-benchmarks
|
||||
needs: parse-gpu-inputs
|
||||
with:
|
||||
profile: ${{ needs.parse-gpu-inputs.outputs.profile }}
|
||||
hardware_name: ${{ needs.parse-gpu-inputs.outputs.hardware_name }}
|
||||
command: summary
|
||||
bench_type: both
|
||||
params_type: classical + multi_bit
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
# TODO add make recipe for HPU benchmarks
|
||||
# run-benchmarks-hpu:
|
||||
# name: benchmark_documentation/run-benchmarks-hpu
|
||||
# uses: ./.github/workflows/benchmark_hpu_common.yml
|
||||
# if: inputs.run-hpu-benchmarks
|
||||
# with:
|
||||
# command: summary
|
||||
# bench_type: both
|
||||
# v80_pcie_dev: 24
|
||||
# v80_serial_number: XFL12NWY3ZKG
|
||||
# secrets:
|
||||
# BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
# SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
# SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
# REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
# JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
# SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
# SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
# SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
# SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
119
.github/workflows/placeholder_workflow.yml
vendored
119
.github/workflows/placeholder_workflow.yml
vendored
@@ -1,18 +1,127 @@
|
||||
# Placeholder workflow file allowing running it without having to merge to main first
|
||||
name: placeholder_workflow
|
||||
|
||||
run-name: Summary benchs tests
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
run-cpu-benchmarks:
|
||||
description: "Run CPU benchmarks"
|
||||
type: boolean
|
||||
default: true
|
||||
run-gpu-benchmarks:
|
||||
description: "Run GPU benchmarks"
|
||||
type: boolean
|
||||
default: true
|
||||
gpu-profile:
|
||||
description: "GPU Instance type"
|
||||
required: true
|
||||
default: "multi-h100-sxm5 (n3-H100-SXM5x8)"
|
||||
type: choice
|
||||
options:
|
||||
- "l40 (n3-L40x1)"
|
||||
- "4-l40 (n3-L40x4)"
|
||||
- "8-l40 (n3-L40x8)"
|
||||
- "multi-a100-nvlink (n3-A100x8-NVLink)"
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "4-h100 (n3-H100x4)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-h100-sxm5 (n3-H100-SXM5x8)"
|
||||
run-hpu-benchmarks:
|
||||
description: "Run HPU benchmarks"
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
|
||||
permissions: {}
|
||||
|
||||
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
|
||||
|
||||
jobs:
|
||||
placeholder:
|
||||
name: placeholder_workflow/placeholder
|
||||
parse-gpu-inputs:
|
||||
name: benchmark_summary/parse-gpu-inputs
|
||||
if: inputs.run-gpu-benchmarks
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
outputs:
|
||||
profile: ${{ steps.parse_profile.outputs.profile }}
|
||||
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
|
||||
env:
|
||||
INPUTS_PROFILE: ${{ inputs.gpu-profile }}
|
||||
steps:
|
||||
- run: |
|
||||
echo "Hello this is a Placeholder Workflow"
|
||||
- name: Parse profile
|
||||
id: parse_profile
|
||||
run: |
|
||||
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
|
||||
# shellcheck disable=SC2001
|
||||
PROFILE=$(echo "${INPUTS_PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
|
||||
echo "profile=${PROFILE}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Parse hardware name
|
||||
id: parse_hardware_name
|
||||
run: |
|
||||
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
|
||||
# shellcheck disable=SC2001
|
||||
NAME=$(echo "${INPUTS_PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
|
||||
echo "name=${NAME}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
run-benchmarks-cpu:
|
||||
name: benchmark_documentation/run-benchmarks-cpu-integer
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
if: inputs.run-cpu-benchmarks
|
||||
with:
|
||||
command: summary
|
||||
bench_type: both
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-gpu:
|
||||
name: benchmark_documentation/run-benchmarks-gpu
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
if: inputs.run-gpu-benchmarks
|
||||
needs: parse-gpu-inputs
|
||||
with:
|
||||
profile: ${{ needs.parse-gpu-inputs.outputs.profile }}
|
||||
hardware_name: ${{ needs.parse-gpu-inputs.outputs.hardware_name }}
|
||||
command: summary
|
||||
bench_type: both
|
||||
params_type: classical + multi_bit
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
# TODO add make recipe for HPU benchmarks
|
||||
# run-benchmarks-hpu:
|
||||
# name: benchmark_documentation/run-benchmarks-hpu
|
||||
# uses: ./.github/workflows/benchmark_hpu_common.yml
|
||||
# if: inputs.run-hpu-benchmarks
|
||||
# with:
|
||||
# command: summary
|
||||
# bench_type: both
|
||||
# v80_pcie_dev: 24
|
||||
# v80_serial_number: XFL12NWY3ZKG
|
||||
# secrets:
|
||||
# BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
# SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
# SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
# REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
# JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
# SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
# SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
# SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
# SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
|
||||
102
Makefile
102
Makefile
@@ -1438,14 +1438,14 @@ bench_integer_hpu: install_rs_check_toolchain
|
||||
|
||||
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
|
||||
bench_integer_compression: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-glwe_packing_compression \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_integer_compression_gpu
|
||||
bench_integer_compression_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-glwe_packing_compression \
|
||||
--features=integer,internal-keycache,gpu,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
@@ -1459,6 +1459,7 @@ bench_integer_compression_128b_gpu: install_rs_check_toolchain
|
||||
|
||||
.PHONY: bench_integer_zk_gpu
|
||||
bench_integer_zk_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-zk-pke \
|
||||
@@ -1526,7 +1527,7 @@ bench_signed_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
|
||||
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
|
||||
bench_integer_zk: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-zk-pke \
|
||||
--features=integer,internal-keycache,zk-pok,pbs-stats \
|
||||
@@ -1780,6 +1781,101 @@ bench_hlapi_kvstore: install_rs_check_toolchain
|
||||
--bench hlapi-kvstore \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_summary # Run summary benchmarks
|
||||
bench_summary: install_rs_check_toolchain
|
||||
# Arithmetic operations: addition, multiplication, division, comparison
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi_unsigned \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::add|::mul|::gt|::div_rem'
|
||||
|
||||
# Noise squash
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-noise-squash \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::noise_squash::'
|
||||
|
||||
# ERC20
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,internal-keycache -p tfhe-benchmark -- '::transfer::overflow'
|
||||
|
||||
# DEX
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-dex \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::no_cmux::'
|
||||
|
||||
# ZK
|
||||
# Proof is done on CPU node of the instance
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-zk-pke \
|
||||
--features=integer,internal-keycache,zk-pok,pbs-stats \
|
||||
-p tfhe-benchmark -- '::pke_zk_proof'
|
||||
# Verify is done on GPUs
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-zk-pke \
|
||||
--features=integer,internal-keycache,pbs-stats,zk-pok -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
# Compression
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-glwe_packing_compression \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_summary_gpu # Run summary benchmarks on GPU
|
||||
bench_summary_gpu: install_rs_check_toolchain
|
||||
# Arithmetic operations: addition, multiplication, division, comparison
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=FAST_DEFAULT __TFHE_RS_BENCH_BIT_SIZES_SET=FAST __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::add|::mul|::gt|::div_rem'
|
||||
|
||||
# Noise squash
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-noise-squash \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::noise_squash::'
|
||||
|
||||
# Noise squash and compression
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-noise-squash \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::decomp_noise_squash_comp::'
|
||||
|
||||
# ERC20
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,gpu,internal-keycache -p tfhe-benchmark --profile release_lto_off -- '::transfer::overflow'
|
||||
|
||||
# DEX
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-dex \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::no_cmux::'
|
||||
|
||||
# ZK
|
||||
# Proof is done on CPU node of the instance
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=fast_default __TFHE_RS_BENCH_BIT_SIZES_SET=fast \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-zk-pke \
|
||||
--features=integer,internal-keycache,zk-pok,pbs-stats \
|
||||
-p tfhe-benchmark -- '::pke_zk_proof'
|
||||
# Verify is done on GPUs
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=fast_default __TFHE_RS_BENCH_BIT_SIZES_SET=fast \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-zk-pke \
|
||||
--features=integer,internal-keycache,gpu,pbs-stats,zk-pok -p tfhe-benchmark --
|
||||
|
||||
# Compression
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-glwe_packing_compression \
|
||||
--features=integer,internal-keycache,gpu,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_custom # Run benchmarks with a user-defined command
|
||||
bench_custom: install_rs_check_toolchain
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
"n3-H100x4": 6.08,
|
||||
"n3-H100x2": 3.04,
|
||||
"n3-L40x1": 0.80,
|
||||
"n3-L40x8": 6.40,
|
||||
"n3-H100-SXM5x8": 15.36,
|
||||
"hpu_x1": 1.0,
|
||||
"hpu_x2": 1.4,
|
||||
|
||||
@@ -106,3 +106,10 @@ environment_name = "norway"
|
||||
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
|
||||
flavor_name = "n3-RTX-A4000x4"
|
||||
user = "ubuntu"
|
||||
|
||||
[backend.hyperstack.8-l40]
|
||||
environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
|
||||
flavor_name = "n3-L40x8"
|
||||
user = "ubuntu"
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use benchmark::params_aliases::*;
|
||||
use benchmark::utilities::{
|
||||
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, OperatorType,
|
||||
get_bench_type, throughput_num_threads, write_to_json, BenchmarkType, BitSizesSet, EnvConfig,
|
||||
OperatorType,
|
||||
};
|
||||
use criterion::{black_box, criterion_group, Criterion, Throughput};
|
||||
use rayon::prelude::*;
|
||||
@@ -8,8 +9,35 @@ use std::cmp::max;
|
||||
use tfhe::integer::ciphertext::CompressedCiphertextListBuilder;
|
||||
use tfhe::integer::{ClientKey, RadixCiphertext};
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::shortint::parameters::LweCiphertextCount;
|
||||
use tfhe::shortint::MessageModulus;
|
||||
use tfhe::{get_pbs_count, reset_pbs_count};
|
||||
|
||||
fn default_config(
|
||||
lwe_per_glwe: &LweCiphertextCount,
|
||||
message_modulus: &MessageModulus,
|
||||
) -> Vec<usize> {
|
||||
let env_config = EnvConfig::new();
|
||||
|
||||
match env_config.bit_sizes_set {
|
||||
BitSizesSet::Fast => {
|
||||
vec![64]
|
||||
}
|
||||
_ => {
|
||||
vec![
|
||||
2,
|
||||
8,
|
||||
16,
|
||||
32,
|
||||
64,
|
||||
128,
|
||||
256,
|
||||
lwe_per_glwe.0 * message_modulus.0.ilog2() as usize,
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn cpu_glwe_packing(c: &mut Criterion) {
|
||||
let bench_name = "integer::packing_compression";
|
||||
let mut bench_group = c.benchmark_group(bench_name);
|
||||
@@ -29,16 +57,7 @@ fn cpu_glwe_packing(c: &mut Criterion) {
|
||||
|
||||
let log_message_modulus = param.message_modulus.0.ilog2() as usize;
|
||||
|
||||
for bit_size in [
|
||||
2,
|
||||
8,
|
||||
16,
|
||||
32,
|
||||
64,
|
||||
128,
|
||||
256,
|
||||
comp_param.lwe_per_glwe().0 * log_message_modulus,
|
||||
] {
|
||||
for bit_size in default_config(&comp_param.lwe_per_glwe(), ¶m.message_modulus) {
|
||||
assert_eq!(bit_size % log_message_modulus, 0);
|
||||
let num_blocks = bit_size / log_message_modulus;
|
||||
|
||||
@@ -159,17 +178,17 @@ fn cpu_glwe_packing(c: &mut Criterion) {
|
||||
mod cuda {
|
||||
use super::*;
|
||||
use benchmark::utilities::cuda_integer_utils::cuda_local_streams;
|
||||
use benchmark::utilities::{get_param_type, ParamType};
|
||||
use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
|
||||
use tfhe::integer::compression_keys::CompressionPrivateKeys;
|
||||
use tfhe::integer::gpu::ciphertext::compressed_ciphertext_list::CudaCompressedCiphertextListBuilder;
|
||||
use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext;
|
||||
use tfhe::integer::gpu::gen_keys_radix_gpu;
|
||||
use tfhe::shortint::parameters::CompressionParameters;
|
||||
use tfhe::shortint::PBSParameters;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct BenchConfig {
|
||||
param: PBSParameters,
|
||||
param: tfhe::shortint::AtomicPatternParameters,
|
||||
comp_param: CompressionParameters,
|
||||
bit_size: usize,
|
||||
cks: ClientKey,
|
||||
@@ -289,7 +308,7 @@ mod cuda {
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id_pack,
|
||||
(comp_param, param.into()),
|
||||
(comp_param, param),
|
||||
comp_param.name(),
|
||||
"pack",
|
||||
&OperatorType::Atomic,
|
||||
@@ -452,7 +471,7 @@ mod cuda {
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id_unpack,
|
||||
(comp_param, param.into()),
|
||||
(comp_param, param),
|
||||
comp_param.name(),
|
||||
"unpack",
|
||||
&OperatorType::Atomic,
|
||||
@@ -464,64 +483,62 @@ mod cuda {
|
||||
}
|
||||
|
||||
fn gpu_glwe_packing(c: &mut Criterion) {
|
||||
let param = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let comp_param =
|
||||
BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let log_message_modulus = param.message_modulus.0.ilog2() as usize;
|
||||
let (param, comp_param): (
|
||||
tfhe::shortint::AtomicPatternParameters,
|
||||
CompressionParameters,
|
||||
) = match get_param_type() {
|
||||
ParamType::Classical => (
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
|
||||
BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
),
|
||||
_ => (
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
|
||||
BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
),
|
||||
};
|
||||
|
||||
let cks = ClientKey::new(param);
|
||||
let private_compression_key = cks.new_compression_private_key(comp_param);
|
||||
|
||||
let mut config = BenchConfig {
|
||||
param: tfhe::shortint::PBSParameters::MultiBitPBS(param),
|
||||
param,
|
||||
comp_param,
|
||||
cks,
|
||||
private_compression_key,
|
||||
bit_size: 0,
|
||||
};
|
||||
for bit_size in [
|
||||
2,
|
||||
8,
|
||||
16,
|
||||
32,
|
||||
64,
|
||||
128,
|
||||
256,
|
||||
comp_param.lwe_per_glwe().0 * log_message_modulus,
|
||||
] {
|
||||
for bit_size in default_config(&comp_param.lwe_per_glwe(), ¶m.message_modulus()) {
|
||||
config.bit_size = bit_size;
|
||||
execute_gpu_glwe_packing(c, config.clone());
|
||||
}
|
||||
}
|
||||
|
||||
fn gpu_glwe_unpacking(c: &mut Criterion) {
|
||||
let param = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
let comp_param =
|
||||
BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
|
||||
let log_message_modulus = param.message_modulus.0.ilog2() as usize;
|
||||
let (param, comp_param): (
|
||||
tfhe::shortint::AtomicPatternParameters,
|
||||
CompressionParameters,
|
||||
) = match get_param_type() {
|
||||
ParamType::Classical => (
|
||||
BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
|
||||
BENCH_COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
),
|
||||
_ => (
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into(),
|
||||
BENCH_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
),
|
||||
};
|
||||
|
||||
let cks = ClientKey::new(param);
|
||||
let private_compression_key = cks.new_compression_private_key(comp_param);
|
||||
|
||||
let mut config = BenchConfig {
|
||||
param: PBSParameters::MultiBitPBS(param),
|
||||
param,
|
||||
comp_param,
|
||||
bit_size: 0,
|
||||
cks,
|
||||
private_compression_key,
|
||||
};
|
||||
for bit_size in [
|
||||
2,
|
||||
8,
|
||||
16,
|
||||
32,
|
||||
64,
|
||||
128,
|
||||
256,
|
||||
comp_param.lwe_per_glwe().0 * log_message_modulus,
|
||||
] {
|
||||
for bit_size in default_config(&comp_param.lwe_per_glwe(), ¶m.message_modulus()) {
|
||||
config.bit_size = bit_size;
|
||||
execute_gpu_glwe_unpacking(c, config.clone());
|
||||
}
|
||||
|
||||
@@ -466,6 +466,7 @@ fn ks_params_default_name(params: &ShortintKeySwitchingParameters) -> String {
|
||||
|
||||
named_params_impl!(CompressionParameters =>
|
||||
COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
V1_6_COMP_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
; fallback => comp_params_default_name
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user