From 97690ab3bd66d017a66ef467e4e8a9b7c48e4235 Mon Sep 17 00:00:00 2001 From: Agnes Leroy Date: Mon, 28 Apr 2025 16:00:41 +0200 Subject: [PATCH] chore(gpu): write swap bench --- .github/workflows/benchmark_dex.yml | 143 +++++ .github/workflows/benchmark_gpu_dex.yml | 44 ++ .../workflows/benchmark_gpu_dex_common.yml | 201 +++++++ .../workflows/benchmark_gpu_dex_weekly.yml | 35 ++ Makefile | 12 + tfhe/Cargo.toml | 6 + tfhe/benches/high_level_api/dex.rs | 540 ++++++++++++++++++ tfhe/benches/high_level_api/erc20.rs | 56 +- tfhe/benches/utilities.rs | 8 + 9 files changed, 1015 insertions(+), 30 deletions(-) create mode 100644 .github/workflows/benchmark_dex.yml create mode 100644 .github/workflows/benchmark_gpu_dex.yml create mode 100644 .github/workflows/benchmark_gpu_dex_common.yml create mode 100644 .github/workflows/benchmark_gpu_dex_weekly.yml create mode 100644 tfhe/benches/high_level_api/dex.rs diff --git a/.github/workflows/benchmark_dex.yml b/.github/workflows/benchmark_dex.yml new file mode 100644 index 000000000..be5bde90e --- /dev/null +++ b/.github/workflows/benchmark_dex.yml @@ -0,0 +1,143 @@ +# Run all DEX benchmarks on an AWS instance and return parsed results to Slab CI bot. +name: DEX benchmarks + +on: + workflow_dispatch: + schedule: + # Weekly benchmarks will be triggered each Saturday at 5a.m. + - cron: '0 5 * * 6' + +env: + CARGO_TERM_COLOR: always + RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json + ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + RUST_BACKTRACE: "full" + RUST_MIN_STACK: "8388608" + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + +jobs: + setup-instance: + name: Setup instance (dex-benchmarks) + runs-on: ubuntu-latest + if: github.event_name == 'workflow_dispatch' || + (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') + outputs: + runner-name: ${{ steps.start-instance.outputs.label }} + steps: + - name: Start instance + id: start-instance + uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac + with: + mode: start + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + backend: aws + profile: bench + + dex-benchmarks: + name: Execute DEX benchmarks + needs: setup-instance + runs-on: ${{ needs.setup-instance.outputs.runner-name }} + concurrency: + group: ${{ github.workflow_ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + continue-on-error: true + timeout-minutes: 720 # 12 hours + steps: + - name: Checkout tfhe-rs repo with tags + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + with: + fetch-depth: 0 + persist-credentials: 'false' + token: ${{ secrets.REPO_CHECKOUT_TOKEN }} + + - name: Get benchmark details + run: | + { + echo "BENCH_DATE=$(date --iso-8601=seconds)"; + echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; + echo "COMMIT_HASH=$(git describe --tags --dirty)"; + } >> "${GITHUB_ENV}" + + - name: Install rust + uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 + with: + toolchain: nightly + + - name: Checkout Slab repo + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + with: + repository: zama-ai/slab + path: slab + persist-credentials: 'false' + token: ${{ secrets.REPO_CHECKOUT_TOKEN }} + + - name: Run benchmarks + run: | + make bench_hlapi_dex + + - name: Parse results + run: | + python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ + --database tfhe_rs \ + --hardware "hpc7a.96xlarge" \ + --project-version "${{ env.COMMIT_HASH }}" \ + --branch ${{ github.ref_name }} \ + --commit-date "${{ env.COMMIT_DATE }}" \ + --bench-date "${{ env.BENCH_DATE }}" \ + --walk-subdirs \ + --name-suffix avx512 + + - name: Parse PBS counts + run: | + python3 ./ci/benchmark_parser.py tfhe/dex_pbs_count.csv ${{ env.RESULTS_FILENAME }} \ + --object-sizes \ + --append-results + + - name: Upload parsed results artifact + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 + with: + name: ${{ github.sha }}_dex + path: ${{ env.RESULTS_FILENAME }} + + - name: Send data to Slab + shell: bash + run: | + python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \ + --slab-url "${{ secrets.SLAB_URL }}" + + - name: Slack Notification + if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }} + continue-on-error: true + uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_MESSAGE: "DEX benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" + + teardown-instance: + name: Teardown instance (dex-benchmarks) + if: ${{ always() && needs.setup-instance.result == 'success' }} + needs: [ setup-instance, dex-benchmarks ] + runs-on: ubuntu-latest + steps: + - name: Stop instance + id: stop-instance + uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac + with: + mode: stop + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + label: ${{ needs.setup-instance.outputs.runner-name }} + + - name: Slack Notification + if: ${{ failure() }} + continue-on-error: true + uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_MESSAGE: "Instance teardown (dex-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/benchmark_gpu_dex.yml b/.github/workflows/benchmark_gpu_dex.yml new file mode 100644 index 000000000..3bd21e398 --- /dev/null +++ b/.github/workflows/benchmark_gpu_dex.yml @@ -0,0 +1,44 @@ +# Run CUDA DEX benchmarks on a Hyperstack VM and return parsed results to Slab CI bot. +name: Cuda DEX benchmarks + +on: + workflow_dispatch: + inputs: + profile: + description: "Instance type" + required: true + type: choice + options: + - "l40 (n3-L40x1)" + - "single-h100 (n3-H100x1)" + - "2-h100 (n3-H100x2)" + - "4-h100 (n3-H100x4)" + - "multi-h100 (n3-H100x8)" + - "multi-h100-nvlink (n3-H100x8-NVLink)" + - "multi-h100-sxm5 (n3-H100x8-SXM5)" + +jobs: + parse-inputs: + runs-on: ubuntu-latest + outputs: + profile: ${{ steps.parse_profile.outputs.profile }} + hardware_name: ${{ steps.parse_hardware_name.outputs.name }} + steps: + - name: Parse profile + id: parse_profile + run: | + echo "profile=$(echo '${{ inputs.profile }}' | sed 's|\(.*\)[[:space:]](.*)|\1|')" >> "${GITHUB_OUTPUT}" + + - name: Parse hardware name + id: parse_hardware_name + run: | + echo "name=$(echo '${{ inputs.profile }}' | sed 's|.*[[:space:]](\(.*\))|\1|')" >> "${GITHUB_OUTPUT}" + + run-benchmarks: + name: Run benchmarks + needs: parse-inputs + uses: ./.github/workflows/benchmark_gpu_dex_common.yml + with: + profile: ${{ needs.parse-inputs.outputs.profile }} + hardware_name: ${{ needs.parse-inputs.outputs.hardware_name }} + secrets: inherit diff --git a/.github/workflows/benchmark_gpu_dex_common.yml b/.github/workflows/benchmark_gpu_dex_common.yml new file mode 100644 index 000000000..b2a6b34bb --- /dev/null +++ b/.github/workflows/benchmark_gpu_dex_common.yml @@ -0,0 +1,201 @@ +# Run DEX benchmarks on an instance with CUDA and return parsed results to Slab CI bot. +name: Cuda DEX benchmarks - common + +on: + workflow_call: + inputs: + backend: + type: string + default: hyperstack + profile: + type: string + required: true + hardware_name: + type: string + required: true + secrets: + REPO_CHECKOUT_TOKEN: + required: true + SLAB_ACTION_TOKEN: + required: true + SLAB_BASE_URL: + required: true + SLAB_URL: + required: true + JOB_SECRET: + required: true + SLACK_CHANNEL: + required: true + BOT_USERNAME: + required: true + SLACK_WEBHOOK: + required: true + +env: + CARGO_TERM_COLOR: always + RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json + PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv + ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + RUST_BACKTRACE: "full" + RUST_MIN_STACK: "8388608" + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} + SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png + SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + +jobs: + setup-instance: + name: Setup instance (cuda-dex-benchmarks) + runs-on: ubuntu-latest + if: github.event_name == 'workflow_dispatch' || + (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') + outputs: + # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step. + # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance. + # Since the on-demand remote label is set before failure, we have to do the logical OR in this order, + # otherwise we'll try to run the next job on a non-existing on-demand instance. + runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }} + remote-instance-outcome: ${{ steps.start-remote-instance.outcome }} + steps: + - name: Start remote instance + id: start-remote-instance + continue-on-error: true + uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac + with: + mode: start + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + backend: ${{ inputs.backend }} + profile: ${{ inputs.profile }} + + - name: Acknowledge remote instance failure + if: steps.start-remote-instance.outcome == 'failure' && + inputs.profile != 'single-h100' + run: | + echo "Remote instance instance has failed to start (profile provided: '${{ inputs.profile }}')" + echo "Permanent instance instance cannot be used as a substitute (profile needed: 'single-h100')" + exit 1 + + # This will allow to fallback on permanent instances running on Hyperstack. + - name: Use permanent remote instance + id: use-permanent-instance + if: env.SECRETS_AVAILABLE == 'true' && + steps.start-remote-instance.outcome == 'failure' && + inputs.profile == 'single-h100' + run: | + echo "runner_group=h100x1" >> "$GITHUB_OUTPUT" + + cuda-dex-benchmarks: + name: Cuda DEX benchmarks (${{ inputs.profile }}) + needs: setup-instance + runs-on: ${{ needs.setup-instance.outputs.runner-name }} + strategy: + fail-fast: false + # explicit include-based build matrix, of known valid options + matrix: + include: + - os: ubuntu-22.04 + cuda: "12.2" + gcc: 11 + steps: + - name: Checkout tfhe-rs repo with tags + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + with: + fetch-depth: 0 + persist-credentials: 'false' + token: ${{ secrets.REPO_CHECKOUT_TOKEN }} + + - name: Setup Hyperstack dependencies + if: needs.setup-instance.outputs.remote-instance-outcome == 'success' + uses: ./.github/actions/gpu_setup + with: + cuda-version: ${{ matrix.cuda }} + gcc-version: ${{ matrix.gcc }} + + - name: Get benchmark details + run: | + { + echo "BENCH_DATE=$(date --iso-8601=seconds)"; + echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"; + echo "COMMIT_HASH=$(git describe --tags --dirty)"; + } >> "${GITHUB_ENV}" + + - name: Install rust + uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 + with: + toolchain: nightly + + - name: Run benchmarks + run: | + make bench_hlapi_dex_gpu + + - name: Parse results + run: | + python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \ + --database tfhe_rs \ + --hardware "${{ inputs.hardware_name }}" \ + --backend gpu \ + --project-version "${{ env.COMMIT_HASH }}" \ + --branch ${{ github.ref_name }} \ + --commit-date "${{ env.COMMIT_DATE }}" \ + --bench-date "${{ env.BENCH_DATE }}" \ + --walk-subdirs \ + --name-suffix avx512 + + - name: Upload parsed results artifact + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 + with: + name: ${{ github.sha }}_dex_${{ inputs.profile }} + path: ${{ env.RESULTS_FILENAME }} + + - name: Checkout Slab repo + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + with: + repository: zama-ai/slab + path: slab + persist-credentials: 'false' + token: ${{ secrets.REPO_CHECKOUT_TOKEN }} + + - name: Send data to Slab + shell: bash + run: | + python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \ + --slab-url "${{ secrets.SLAB_URL }}" + + slack-notify: + name: Slack Notification + needs: [ setup-instance, cuda-dex-benchmarks ] + runs-on: ubuntu-latest + if: ${{ always() && needs.cuda-dex-benchmarks.result != 'skipped' && failure() }} + continue-on-error: true + steps: + - name: Send message + uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 + env: + SLACK_COLOR: ${{ needs.cuda-dex-benchmarks.result }} + SLACK_MESSAGE: "Cuda DEX benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-dex-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})" + + teardown-instance: + name: Teardown instance (cuda-dex-${{ inputs.profile }}-benchmarks) + if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }} + needs: [ setup-instance, cuda-dex-benchmarks, slack-notify ] + runs-on: ubuntu-latest + steps: + - name: Stop instance + id: stop-instance + uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac + with: + mode: stop + github-token: ${{ secrets.SLAB_ACTION_TOKEN }} + slab-url: ${{ secrets.SLAB_BASE_URL }} + job-secret: ${{ secrets.JOB_SECRET }} + label: ${{ needs.setup-instance.outputs.runner-name }} + + - name: Slack Notification + if: ${{ failure() }} + continue-on-error: true + uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 + env: + SLACK_COLOR: ${{ job.status }} + SLACK_MESSAGE: "Instance teardown (cuda-dex-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})" diff --git a/.github/workflows/benchmark_gpu_dex_weekly.yml b/.github/workflows/benchmark_gpu_dex_weekly.yml new file mode 100644 index 000000000..720b82c24 --- /dev/null +++ b/.github/workflows/benchmark_gpu_dex_weekly.yml @@ -0,0 +1,35 @@ +# Run CUDA DEX benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot. +name: Cuda DEX weekly benchmarks + +on: + schedule: + # Weekly benchmarks will be triggered each Saturday at 9a.m. + - cron: '0 9 * * 6' + +jobs: + run-benchmarks-1-h100: + name: Run benchmarks (1xH100) + if: github.repository == 'zama-ai/tfhe-rs' + uses: ./.github/workflows/benchmark_gpu_dex_common.yml + with: + profile: single-h100 + hardware_name: n3-H100x1 + secrets: inherit + + run-benchmarks-2-h100: + name: Run benchmarks (2xH100) + if: github.repository == 'zama-ai/tfhe-rs' + uses: ./.github/workflows/benchmark_gpu_dex_common.yml + with: + profile: 2-h100 + hardware_name: n3-H100x2 + secrets: inherit + + run-benchmarks-8-h100: + name: Run benchmarks (8xH100) + if: github.repository == 'zama-ai/tfhe-rs' + uses: ./.github/workflows/benchmark_gpu_dex_common.yml + with: + profile: multi-h100 + hardware_name: n3-H100x8 + secrets: inherit diff --git a/Makefile b/Makefile index 2ace19138..e13db7fe9 100644 --- a/Makefile +++ b/Makefile @@ -1293,6 +1293,18 @@ bench_hlapi_erc20_gpu: install_rs_check_toolchain --bench hlapi-erc20 \ --features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) -- +.PHONY: bench_hlapi_dex # Run benchmarks for ECR20 operations +bench_hlapi_dex: install_rs_check_toolchain + RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ + --bench hlapi-dex \ + --features=integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) -- + +.PHONY: bench_hlapi_dex_gpu # Run benchmarks for ECR20 operations on GPU +bench_hlapi_dex_gpu: install_rs_check_toolchain + RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \ + --bench hlapi-dex \ + --features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) -- + .PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate bench_tfhe_zk_pok: install_rs_check_toolchain RUSTFLAGS="$(RUSTFLAGS)" \ diff --git a/tfhe/Cargo.toml b/tfhe/Cargo.toml index 992dee09f..dc6da59bb 100644 --- a/tfhe/Cargo.toml +++ b/tfhe/Cargo.toml @@ -254,6 +254,12 @@ path = "benches/high_level_api/erc20.rs" harness = false required-features = ["integer", "internal-keycache"] +[[bench]] +name = "hlapi-dex" +path = "benches/high_level_api/dex.rs" +harness = false +required-features = ["integer", "internal-keycache"] + [[bench]] name = "keygen" path = "benches/keygen/bench.rs" diff --git a/tfhe/benches/high_level_api/dex.rs b/tfhe/benches/high_level_api/dex.rs new file mode 100644 index 000000000..bccf3ef03 --- /dev/null +++ b/tfhe/benches/high_level_api/dex.rs @@ -0,0 +1,540 @@ +#[path = "../utilities.rs"] +mod utilities; + +#[cfg(feature = "gpu")] +use crate::utilities::configure_gpu; +use crate::utilities::{write_to_json, OperatorType}; + +use criterion::measurement::WallTime; +use criterion::{BenchmarkGroup, Criterion}; +use rand::prelude::*; +use rand::thread_rng; +use std::ops::{Add, Div, Mul, Sub}; +use tfhe::keycache::NamedParam; +use tfhe::prelude::*; +use tfhe::shortint::parameters::*; +#[cfg(not(feature = "gpu"))] +use tfhe::{set_server_key, CompressedServerKey}; +use tfhe::{ClientKey, ConfigBuilder, FheBool, FheUint128, FheUint64}; + +pub(crate) fn transfer_whitepaper( + from_amount: &FheType, + to_amount: &FheType, + amount: &FheType, +) -> (FheType, FheType) +where + FheType: Add + for<'a> FheOrd<&'a FheType>, + FheBool: IfThenElse, + for<'a> &'a FheType: Add + Sub, +{ + let has_enough_funds = (from_amount).ge(amount); + + let mut new_to_amount = to_amount + amount; + new_to_amount = has_enough_funds.if_then_else(&new_to_amount, to_amount); + + let mut new_from_amount = from_amount - amount; + new_from_amount = has_enough_funds.if_then_else(&new_from_amount, from_amount); + + (new_from_amount, new_to_amount) +} + +#[allow(clippy::too_many_arguments)] +fn swap_request( + from_balance_0: &FheType, + from_balance_1: &FheType, + current_dex_balance_0: &FheType, + current_dex_balance_1: &FheType, + to_balance_0: &FheType, + to_balance_1: &FheType, + total_dex_token_0_in: &FheType, + total_dex_token_1_in: &FheType, + amount0: &FheType, + amount1: &FheType, +) -> (FheType, FheType, FheType, FheType) +where + FheType: Add + for<'a> FheOrd<&'a FheType> + Clone, + FheBool: IfThenElse, + for<'a> &'a FheType: Add + Sub, +{ + let (_, new_current_balance_0) = + transfer_whitepaper(from_balance_0, current_dex_balance_0, amount0); + let (_, new_current_balance_1) = + transfer_whitepaper(from_balance_1, current_dex_balance_1, amount1); + let sent0 = &new_current_balance_0 - current_dex_balance_0; + let sent1 = &new_current_balance_1 - current_dex_balance_1; + let pending_0_in = to_balance_0 + &sent0; + let pending_total_token_0_in = total_dex_token_0_in + &sent0; + let pending_1_in = to_balance_1 + &sent1; + let pending_total_token_1_in = total_dex_token_1_in + &sent1; + ( + pending_0_in, + pending_total_token_0_in, + pending_1_in, + pending_total_token_1_in, + ) +} + +#[allow(clippy::too_many_arguments)] +fn swap_claim( + pending_0_in: &FheType, + pending_1_in: &FheType, + total_dex_token_0_in: u64, + total_dex_token_1_in: u64, + total_dex_token_0_out: u64, + total_dex_token_1_out: u64, + old_balance_0: &FheType, + old_balance_1: &FheType, + current_dex_balance_0: &FheType, + current_dex_balance_1: &FheType, +) -> (FheType, FheType) +where + FheType: CastFrom + + for<'a> FheOrd<&'a FheType> + + CastFrom + + Clone + + Add, + BigFheType: CastFrom + Mul + Div, + FheBool: IfThenElse, + for<'a> &'a FheType: Add + Sub, +{ + let mut new_balance_0 = old_balance_0.clone(); + let mut new_balance_1 = old_balance_1.clone(); + if total_dex_token_1_in != 0 { + let big_pending_1_in = BigFheType::cast_from(pending_1_in.clone()); + let big_amount_0_out = + (big_pending_1_in * total_dex_token_0_out as u128) / total_dex_token_1_in as u128; + let amount_0_out = FheType::cast_from(big_amount_0_out); + let (_, new_balance_0_tmp) = + transfer_whitepaper(current_dex_balance_0, old_balance_0, &amount_0_out); + new_balance_0 = new_balance_0_tmp; + } + if total_dex_token_0_in != 0 { + let big_pending_0_in = BigFheType::cast_from(pending_0_in.clone()); + let big_amount_1_out = + (big_pending_0_in * total_dex_token_1_out as u128) / total_dex_token_0_in as u128; + let amount_1_out = FheType::cast_from(big_amount_1_out); + let (_, new_balance_1_tmp) = + transfer_whitepaper(current_dex_balance_1, old_balance_1, &amount_1_out); + new_balance_1 = new_balance_1_tmp; + } + + (new_balance_0, new_balance_1) +} + +#[cfg(feature = "pbs-stats")] +mod pbs_stats { + use super::*; + use std::fs::{File, OpenOptions}; + use std::io::Write; + use std::path::Path; + + fn write_result(file: &mut File, name: &str, value: usize) { + let line = format!("{name},{value}\n"); + let error_message = format!("cannot write {name} result into file"); + file.write_all(line.as_bytes()).expect(&error_message); + } + + pub fn print_swap_request_pbs_counts( + client_key: &ClientKey, + type_name: &str, + swap_request_func: F, + ) where + FheType: FheEncrypt, + F: for<'a> Fn( + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + ) -> (FheType, FheType, FheType, FheType), + { + let mut rng = thread_rng(); + + let from_balance_0 = FheType::encrypt(rng.gen::(), client_key); + let from_balance_1 = FheType::encrypt(rng.gen::(), client_key); + let current_dex_balance_0 = FheType::encrypt(rng.gen::(), client_key); + let current_dex_balance_1 = FheType::encrypt(rng.gen::(), client_key); + let to_balance_0 = FheType::encrypt(rng.gen::(), client_key); + let to_balance_1 = FheType::encrypt(rng.gen::(), client_key); + let total_dex_token_0 = FheType::encrypt(rng.gen::(), client_key); + let total_dex_token_1 = FheType::encrypt(rng.gen::(), client_key); + let amount_0 = FheType::encrypt(rng.gen::(), client_key); + let amount_1 = FheType::encrypt(rng.gen::(), client_key); + + #[cfg(feature = "gpu")] + configure_gpu(client_key); + + tfhe::reset_pbs_count(); + let (_, _, _, _) = swap_request_func( + &from_balance_0, + &from_balance_1, + ¤t_dex_balance_0, + ¤t_dex_balance_1, + &to_balance_0, + &to_balance_1, + &total_dex_token_0, + &total_dex_token_1, + &amount_0, + &amount_1, + ); + let count = tfhe::get_pbs_count(); + + println!("ERC20 swap request/::{type_name}: {count} PBS"); + + let params = client_key.computation_parameters(); + + let test_name = if cfg!(feature = "gpu") { + format!("hlapi::cuda::dex::swap_request::pbs_count::{type_name}") + } else { + format!("hlapi::dex::swap_request::pbs_count::{type_name}") + }; + + let results_file = Path::new("dex_swap_request_pbs_count.csv"); + if !results_file.exists() { + File::create(results_file).expect("create results file failed"); + } + let mut file = OpenOptions::new() + .append(true) + .open(results_file) + .expect("cannot open results file"); + + write_result(&mut file, &test_name, count as usize); + + write_to_json::( + &test_name, + params, + params.name(), + "pbs-count", + &OperatorType::Atomic, + 0, + vec![], + ); + } + pub fn print_swap_claim_pbs_counts( + client_key: &ClientKey, + type_name: &str, + swap_claim_func: F, + ) where + FheType: FheEncrypt, + F: for<'a> Fn( + &'a FheType, + &'a FheType, + u64, + u64, + u64, + u64, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + ) -> (FheType, FheType), + { + let mut rng = thread_rng(); + + let pending_0_in = FheType::encrypt(rng.gen::(), client_key); + let pending_1_in = FheType::encrypt(rng.gen::(), client_key); + let total_dex_token_0_in = rng.gen::(); + let total_dex_token_1_in = rng.gen::(); + let total_dex_token_0_out = rng.gen::(); + let total_dex_token_1_out = rng.gen::(); + let old_balance_0 = FheType::encrypt(rng.gen::(), client_key); + let old_balance_1 = FheType::encrypt(rng.gen::(), client_key); + let current_dex_balance_0 = FheType::encrypt(rng.gen::(), client_key); + let current_dex_balance_1 = FheType::encrypt(rng.gen::(), client_key); + + #[cfg(feature = "gpu")] + configure_gpu(client_key); + + tfhe::reset_pbs_count(); + let (_, _) = swap_claim_func( + &pending_0_in, + &pending_1_in, + total_dex_token_0_in, + total_dex_token_1_in, + total_dex_token_0_out, + total_dex_token_1_out, + &old_balance_0, + &old_balance_1, + ¤t_dex_balance_0, + ¤t_dex_balance_1, + ); + let count = tfhe::get_pbs_count(); + + println!("ERC20 swap claim/::{type_name}: {count} PBS"); + + let params = client_key.computation_parameters(); + + let test_name = if cfg!(feature = "gpu") { + format!("hlapi::cuda::dex::swap_claim::pbs_count::{type_name}") + } else { + format!("hlapi::dex::swap_claim::pbs_count::{type_name}") + }; + + let results_file = Path::new("dex_swap_claim_pbs_count.csv"); + if !results_file.exists() { + File::create(results_file).expect("create results file failed"); + } + let mut file = OpenOptions::new() + .append(true) + .open(results_file) + .expect("cannot open results file"); + + write_result(&mut file, &test_name, count as usize); + + write_to_json::( + &test_name, + params, + params.name(), + "pbs-count", + &OperatorType::Atomic, + 0, + vec![], + ); + } +} + +fn bench_swap_request_latency( + c: &mut BenchmarkGroup<'_, WallTime>, + client_key: &ClientKey, + bench_name: &str, + type_name: &str, + fn_name: &str, + swap_request_func: F, +) where + FheType: FheEncrypt, + F: for<'a> Fn( + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + ) -> (FheType, FheType, FheType, FheType), +{ + #[cfg(feature = "gpu")] + configure_gpu(client_key); + + let bench_id = format!("{bench_name}::{fn_name}::{type_name}"); + c.bench_function(&bench_id, |b| { + let mut rng = thread_rng(); + + let from_balance_0 = FheType::encrypt(rng.gen::(), client_key); + let from_balance_1 = FheType::encrypt(rng.gen::(), client_key); + let current_balance_0 = FheType::encrypt(rng.gen::(), client_key); + let current_balance_1 = FheType::encrypt(rng.gen::(), client_key); + let to_balance_0 = FheType::encrypt(rng.gen::(), client_key); + let to_balance_1 = FheType::encrypt(rng.gen::(), client_key); + let total_token_0 = FheType::encrypt(rng.gen::(), client_key); + let total_token_1 = FheType::encrypt(rng.gen::(), client_key); + let amount_0 = FheType::encrypt(rng.gen::(), client_key); + let amount_1 = FheType::encrypt(rng.gen::(), client_key); + + b.iter(|| { + let (_, _, _, _) = swap_request_func( + &from_balance_0, + &from_balance_1, + ¤t_balance_0, + ¤t_balance_1, + &to_balance_0, + &to_balance_1, + &total_token_0, + &total_token_1, + &amount_0, + &amount_1, + ); + }) + }); + + let params = client_key.computation_parameters(); + + write_to_json::( + &bench_id, + params, + params.name(), + "dex-swap-request", + &OperatorType::Atomic, + 64, + vec![], + ); +} + +fn bench_swap_claim_latency( + c: &mut BenchmarkGroup<'_, WallTime>, + client_key: &ClientKey, + bench_name: &str, + type_name: &str, + fn_name: &str, + swap_claim_func: F, +) where + FheType: FheEncrypt, + F: for<'a> Fn( + &'a FheType, + &'a FheType, + u64, + u64, + u64, + u64, + &'a FheType, + &'a FheType, + &'a FheType, + &'a FheType, + ) -> (FheType, FheType), +{ + #[cfg(feature = "gpu")] + configure_gpu(client_key); + + let bench_id = format!("{bench_name}::{fn_name}::{type_name}"); + c.bench_function(&bench_id, |b| { + let mut rng = thread_rng(); + + let pending_0_in = FheType::encrypt(rng.gen::(), client_key); + let pending_1_in = FheType::encrypt(rng.gen::(), client_key); + let total_token_0_in = rng.gen::(); + let total_token_1_in = rng.gen::(); + let total_token_0_out = rng.gen::(); + let total_token_1_out = rng.gen::(); + let old_balance_0 = FheType::encrypt(rng.gen::(), client_key); + let old_balance_1 = FheType::encrypt(rng.gen::(), client_key); + let current_balance_0 = FheType::encrypt(rng.gen::(), client_key); + let current_balance_1 = FheType::encrypt(rng.gen::(), client_key); + + b.iter(|| { + let (_, _) = swap_claim_func( + &pending_0_in, + &pending_1_in, + total_token_0_in, + total_token_1_in, + total_token_0_out, + total_token_1_out, + &old_balance_0, + &old_balance_1, + ¤t_balance_0, + ¤t_balance_1, + ); + }) + }); + + let params = client_key.computation_parameters(); + + write_to_json::( + &bench_id, + params, + params.name(), + "dex-swap-claim", + &OperatorType::Atomic, + 64, + vec![], + ); +} + +#[cfg(feature = "pbs-stats")] +use crate::pbs_stats::print_swap_claim_pbs_counts; +#[cfg(feature = "pbs-stats")] +use crate::pbs_stats::print_swap_request_pbs_counts; + +#[cfg(not(feature = "gpu"))] +fn main() { + let params = PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128; + + let config = ConfigBuilder::with_custom_parameters(params).build(); + let cks = ClientKey::generate(config); + let compressed_sks = CompressedServerKey::new(&cks); + + let sks = compressed_sks.decompress(); + + rayon::broadcast(|_| set_server_key(sks.clone())); + set_server_key(sks); + + let mut c = Criterion::default().sample_size(10).configure_from_args(); + + let bench_name = "hlapi::dex"; + + // FheUint64 PBS counts + // We don't run multiple times since every input is encrypted + // PBS count is always the same + #[cfg(feature = "pbs-stats")] + { + print_swap_request_pbs_counts(&cks, "FheUint64", swap_request::); + print_swap_claim_pbs_counts(&cks, "FheUint64", swap_claim::); + } + + // FheUint64 latency + { + let mut group = c.benchmark_group(bench_name); + bench_swap_request_latency( + &mut group, + &cks, + bench_name, + "FheUint64", + "swap_request", + swap_request::, + ); + bench_swap_claim_latency( + &mut group, + &cks, + bench_name, + "FheUint64", + "swap_claim", + swap_claim::, + ); + + group.finish(); + } + + c.final_summary(); +} + +#[cfg(feature = "gpu")] +fn main() { + let params = PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS; + + let config = ConfigBuilder::with_custom_parameters(params).build(); + let cks = ClientKey::generate(config); + + let mut c = Criterion::default().sample_size(10).configure_from_args(); + + let bench_name = "hlapi::cuda::dex"; + + // FheUint64 PBS counts + // We don't run multiple times since every input is encrypted + // PBS count is always the same + #[cfg(feature = "pbs-stats")] + { + print_swap_request_pbs_counts(&cks, "FheUint64", swap_request::); + print_swap_claim_pbs_counts(&cks, "FheUint64", swap_claim::); + } + + // FheUint64 latency + { + let mut group = c.benchmark_group(bench_name); + bench_swap_request_latency( + &mut group, + &cks, + bench_name, + "FheUint64", + "swap_request", + swap_request::, + ); + bench_swap_claim_latency( + &mut group, + &cks, + bench_name, + "FheUint64", + "swap_claim", + swap_claim::, + ); + + group.finish(); + } + + c.final_summary(); +} diff --git a/tfhe/benches/high_level_api/erc20.rs b/tfhe/benches/high_level_api/erc20.rs index 03fb28a2d..787e9414a 100644 --- a/tfhe/benches/high_level_api/erc20.rs +++ b/tfhe/benches/high_level_api/erc20.rs @@ -1,6 +1,8 @@ #[path = "../utilities.rs"] mod utilities; +#[cfg(feature = "gpu")] +use crate::utilities::configure_gpu; use crate::utilities::{write_to_json, OperatorType}; use criterion::measurement::WallTime; use criterion::{BenchmarkGroup, Criterion, Throughput}; @@ -18,7 +20,7 @@ use tfhe::{set_server_key, ClientKey, CompressedServerKey, ConfigBuilder, FheBoo /// Transfer as written in the original FHEvm white-paper, /// it uses a comparison to check if the sender has enough, /// and cmuxes based on the comparison result -fn transfer_whitepaper( +pub fn transfer_whitepaper( from_amount: &FheType, to_amount: &FheType, amount: &FheType, @@ -177,13 +179,6 @@ mod pbs_stats { } } -#[cfg(feature = "gpu")] -fn configure_gpu(client_key: &ClientKey) { - let compressed_sks = CompressedServerKey::new(client_key); - let sks = compressed_sks.decompress_to_gpu(); - rayon::broadcast(|_| set_server_key(sks.clone())); - set_server_key(sks); -} fn bench_transfer_latency( c: &mut BenchmarkGroup<'_, WallTime>, client_key: &ClientKey, @@ -383,7 +378,7 @@ fn main() { let mut c = Criterion::default().sample_size(10).configure_from_args(); - let bench_name = "hlapi::erc20::transfer"; + let bench_name = "hlapi::erc20"; // FheUint64 PBS counts // We don't run multiple times since every input is encrypted @@ -393,14 +388,14 @@ fn main() { print_transfer_pbs_counts( &cks, "FheUint64", - "whitepaper", + "transfer::whitepaper", transfer_whitepaper::, ); print_transfer_pbs_counts(&cks, "FheUint64", "no_cmux", transfer_no_cmux::); print_transfer_pbs_counts( &cks, "FheUint64", - "overflow", + "transfer::overflow", transfer_overflow::, ); print_transfer_pbs_counts(&cks, "FheUint64", "safe", transfer_safe::); @@ -414,7 +409,7 @@ fn main() { &cks, bench_name, "FheUint64", - "whitepaper", + "transfer::whitepaper", transfer_whitepaper::, ); bench_transfer_latency( @@ -422,7 +417,7 @@ fn main() { &cks, bench_name, "FheUint64", - "no_cmux", + "transfer::no_cmux", transfer_no_cmux::, ); bench_transfer_latency( @@ -430,7 +425,7 @@ fn main() { &cks, bench_name, "FheUint64", - "overflow", + "transfer::overflow", transfer_overflow::, ); bench_transfer_latency( @@ -438,7 +433,7 @@ fn main() { &cks, bench_name, "FheUint64", - "safe", + "transfer::safe", transfer_safe::, ); @@ -453,7 +448,7 @@ fn main() { &cks, bench_name, "FheUint64", - "whitepaper", + "transfer::whitepaper", transfer_whitepaper::, ); bench_transfer_throughput( @@ -461,7 +456,7 @@ fn main() { &cks, bench_name, "FheUint64", - "no_cmux", + "transfer::no_cmux", transfer_no_cmux::, ); bench_transfer_throughput( @@ -469,7 +464,7 @@ fn main() { &cks, bench_name, "FheUint64", - "overflow", + "transfer::overflow", transfer_overflow::, ); bench_transfer_throughput( @@ -477,9 +472,10 @@ fn main() { &cks, bench_name, "FheUint64", - "safe", + "transfer::safe", transfer_safe::, ); + group.finish(); } @@ -495,7 +491,7 @@ fn main() { let mut c = Criterion::default().sample_size(10).configure_from_args(); - let bench_name = "hlapi::cuda::erc20::transfer"; + let bench_name = "hlapi::cuda::erc20"; // FheUint64 PBS counts // We don't run multiple times since every input is encrypted @@ -505,14 +501,14 @@ fn main() { print_transfer_pbs_counts( &cks, "FheUint64", - "whitepaper", + "transfer::whitepaper", transfer_whitepaper::, ); print_transfer_pbs_counts(&cks, "FheUint64", "no_cmux", transfer_no_cmux::); print_transfer_pbs_counts( &cks, "FheUint64", - "overflow", + "transfer::overflow", transfer_overflow::, ); print_transfer_pbs_counts(&cks, "FheUint64", "safe", transfer_safe::); @@ -526,7 +522,7 @@ fn main() { &cks, bench_name, "FheUint64", - "whitepaper", + "transfer::whitepaper", transfer_whitepaper::, ); bench_transfer_latency( @@ -534,7 +530,7 @@ fn main() { &cks, bench_name, "FheUint64", - "no_cmux", + "transfer::no_cmux", transfer_no_cmux::, ); bench_transfer_latency( @@ -542,7 +538,7 @@ fn main() { &cks, bench_name, "FheUint64", - "overflow", + "transfer::overflow", transfer_overflow::, ); bench_transfer_latency( @@ -550,7 +546,7 @@ fn main() { &cks, bench_name, "FheUint64", - "safe", + "transfer::safe", transfer_safe::, ); @@ -565,7 +561,7 @@ fn main() { &cks, bench_name, "FheUint64", - "whitepaper", + "transfer::whitepaper", transfer_whitepaper::, ); cuda_bench_transfer_throughput( @@ -573,7 +569,7 @@ fn main() { &cks, bench_name, "FheUint64", - "no_cmux", + "transfer::no_cmux", transfer_no_cmux::, ); cuda_bench_transfer_throughput( @@ -581,7 +577,7 @@ fn main() { &cks, bench_name, "FheUint64", - "overflow", + "transfer::overflow", transfer_overflow::, ); cuda_bench_transfer_throughput( @@ -589,7 +585,7 @@ fn main() { &cks, bench_name, "FheUint64", - "safe", + "transfer::safe", transfer_safe::, ); group.finish(); diff --git a/tfhe/benches/utilities.rs b/tfhe/benches/utilities.rs index 0c2fe5a9c..ec2c62aa0 100644 --- a/tfhe/benches/utilities.rs +++ b/tfhe/benches/utilities.rs @@ -655,6 +655,7 @@ mod cuda_utils { use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams}; use tfhe::core_crypto::prelude::{Numeric, UnsignedInteger}; use tfhe::shortint::server_key::ModulusSwitchNoiseReductionKey; + use tfhe::{set_server_key, ClientKey, CompressedServerKey}; #[allow(dead_code)] pub const GPU_MAX_SUPPORTED_POLYNOMIAL_SIZE: usize = 16384; @@ -879,6 +880,13 @@ mod cuda_utils { } } + #[allow(dead_code)] + pub fn configure_gpu(client_key: &ClientKey) { + let compressed_sks = CompressedServerKey::new(client_key); + let sks = compressed_sks.decompress_to_gpu(); + rayon::broadcast(|_| set_server_key(sks.clone())); + set_server_key(sks); + } #[allow(unused_imports)] #[cfg(feature = "integer")] pub use cuda_integer_utils::*;