mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-04-28 03:01:21 -04:00
Compare commits
2 Commits
am/chore/c
...
am/wip/ntt
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c60b2e3f55 | ||
|
|
eb3b5f1459 |
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
7
.github/workflows/aws_tfhe_fast_tests.yml
vendored
7
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -54,7 +54,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
@@ -133,7 +132,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -159,7 +158,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -270,7 +269,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
7
.github/workflows/aws_tfhe_integer_tests.yml
vendored
7
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -42,7 +42,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Check for file changes
|
||||
@@ -74,7 +73,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -98,7 +97,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -139,7 +138,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -42,7 +42,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Check for file changes
|
||||
@@ -74,7 +73,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -98,7 +97,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -143,7 +142,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
7
.github/workflows/aws_tfhe_tests.yml
vendored
7
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -63,7 +63,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
@@ -142,7 +141,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -168,7 +167,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -250,7 +249,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
6
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_boolean.yml
vendored
6
.github/workflows/benchmark_boolean.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -127,7 +127,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_core_crypto.yml
vendored
6
.github/workflows/benchmark_core_crypto.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -58,7 +58,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -115,7 +115,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_erc20.yml
vendored
6
.github/workflows/benchmark_erc20.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -124,7 +124,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
5
.github/workflows/benchmark_gpu_4090.yml
vendored
5
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -116,7 +116,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -127,7 +126,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -167,7 +167,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
214
.github/workflows/benchmark_gpu_erc20.yml
vendored
214
.github/workflows/benchmark_gpu_erc20.yml
vendored
@@ -1,41 +1,195 @@
|
||||
# Run CUDA ERC20 benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
|
||||
name: Cuda ERC20 benchmarks
|
||||
# Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: ERC20 GPU H100 benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
profile:
|
||||
description: "Instance type"
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- "l40 (n3-L40x1)"
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 5a.m.
|
||||
- cron: '0 5 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
parse-inputs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-erc20-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
profile: ${{ steps.parse_profile.outputs.profile }}
|
||||
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Parse profile
|
||||
id: parse_profile
|
||||
run: |
|
||||
echo "profile=$(echo '${{ inputs.profile }}' | sed 's|\(.*\)[[:space:]](.*)|\1|')" >> "${GITHUB_OUTPUT}"
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
- name: Parse hardware name
|
||||
id: parse_hardware_name
|
||||
cuda-erc20-benchmarks:
|
||||
name: Execute GPU integer benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
echo "name=$(echo '${{ inputs.profile }}' | sed 's|.*[[:space:]](\(.*\))|\1|')" >> "${GITHUB_OUTPUT}"
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
run-benchmarks:
|
||||
name: Run benchmarks
|
||||
needs: parse-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
|
||||
with:
|
||||
profile: ${{ needs.parse-inputs.outputs.profile }}
|
||||
hardware_name: ${{ needs.parse-inputs.outputs.hardware_name }}
|
||||
secrets: inherit
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make bench_hlapi_erc20_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "n3-H100x1" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Parse PBS counts
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/erc20_pbs_count.csv ${{ env.RESULTS_FILENAME }} \
|
||||
--object-sizes \
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_erc20
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-erc20-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-erc20-benchmarks.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-erc20-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-erc20-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-erc20-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -1,35 +1,11 @@
|
||||
# Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Cuda ERC20 benchmarks - common
|
||||
name: ERC20 GPU 2xH100 benchmarks
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
backend:
|
||||
type: string
|
||||
default: hyperstack
|
||||
profile:
|
||||
type: string
|
||||
required: true
|
||||
hardware_name:
|
||||
type: string
|
||||
required: true
|
||||
secrets:
|
||||
FHE_ACTIONS_TOKEN:
|
||||
required: true
|
||||
SLAB_ACTION_TOKEN:
|
||||
required: true
|
||||
SLAB_BASE_URL:
|
||||
required: true
|
||||
SLAB_URL:
|
||||
required: true
|
||||
JOB_SECRET:
|
||||
required: true
|
||||
SLACK_CHANNEL:
|
||||
required: true
|
||||
BOT_USERNAME:
|
||||
required: true
|
||||
SLACK_WEBHOOK:
|
||||
required: true
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 5a.m.
|
||||
- cron: '0 5 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -54,17 +30,17 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: ${{ inputs.backend }}
|
||||
profile: ${{ inputs.profile }}
|
||||
backend: hyperstack
|
||||
profile: 2-h100
|
||||
|
||||
cuda-erc20-benchmarks:
|
||||
name: Cuda ERC20 benchmarks (${{ inputs.profile }})
|
||||
name: Execute GPU integer benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -111,7 +87,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -147,7 +123,7 @@ jobs:
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "${{ inputs.hardware_name }}" \
|
||||
--hardware "n3-H100x2" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
@@ -156,6 +132,12 @@ jobs:
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Parse PBS counts
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/erc20_pbs_count.csv ${{ env.RESULTS_FILENAME }} \
|
||||
--object-sizes \
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
@@ -186,17 +168,17 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-erc20-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Cuda ERC20 benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "ERC20 2xH100 benchmarks finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-erc20-${{ inputs.profile }}-benchmarks)
|
||||
name: Teardown instance (cuda-erc20-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -210,4 +192,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-erc20-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-erc20-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
195
.github/workflows/benchmark_gpu_erc20_8h100.yml
vendored
Normal file
195
.github/workflows/benchmark_gpu_erc20_8h100.yml
vendored
Normal file
@@ -0,0 +1,195 @@
|
||||
# Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: ERC20 GPU 8xH100 benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 5a.m.
|
||||
- cron: '0 5 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-erc20-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: multi-h100
|
||||
|
||||
cuda-erc20-benchmarks:
|
||||
name: Execute GPU integer benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make bench_hlapi_erc20_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "n3-H100x8" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Parse PBS counts
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/erc20_pbs_count.csv ${{ env.RESULTS_FILENAME }} \
|
||||
--object-sizes \
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_erc20
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-erc20-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-erc20-benchmarks.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-erc20-benchmarks.result }}
|
||||
SLACK_MESSAGE: "ERC20 8xH100 benchmarks finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-erc20-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-erc20-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
35
.github/workflows/benchmark_gpu_erc20_weekly.yml
vendored
35
.github/workflows/benchmark_gpu_erc20_weekly.yml
vendored
@@ -1,35 +0,0 @@
|
||||
# Run CUDA ERC20 benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot.
|
||||
name: Cuda ERC20 weekly benchmarks
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 5a.m.
|
||||
- cron: '0 5 * * 6'
|
||||
|
||||
jobs:
|
||||
run-benchmarks-1-h100:
|
||||
name: Run benchmarks (1xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
|
||||
with:
|
||||
profile: single-h100
|
||||
hardware_name: n3-H100x1
|
||||
secrets: inherit
|
||||
|
||||
run-benchmarks-2-h100:
|
||||
name: Run benchmarks (2xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
|
||||
with:
|
||||
profile: 2-h100
|
||||
hardware_name: n3-H100x2
|
||||
secrets: inherit
|
||||
|
||||
run-benchmarks-8-h100:
|
||||
name: Run benchmarks (8xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
|
||||
with:
|
||||
profile: multi-h100
|
||||
hardware_name: n3-H100x8
|
||||
secrets: inherit
|
||||
263
.github/workflows/benchmark_gpu_integer.yml
vendored
263
.github/workflows/benchmark_gpu_integer.yml
vendored
@@ -1,78 +1,213 @@
|
||||
# Run CUDA benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
|
||||
name: Cuda benchmarks
|
||||
# Run integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Integer GPU benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
profile:
|
||||
description: "Instance type"
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- "l40 (n3-L40x1)"
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "4-h100 (n3-H100x4)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-a100-nvlink (n3-A100x8-NVLink)"
|
||||
command:
|
||||
description: "Benchmark command to run"
|
||||
type: choice
|
||||
default: integer_multi_bit
|
||||
options:
|
||||
- integer
|
||||
- integer_multi_bit
|
||||
- integer_compression
|
||||
- pbs
|
||||
- ks
|
||||
op_flavor:
|
||||
description: "Operations set to run"
|
||||
type: choice
|
||||
default: default
|
||||
options:
|
||||
- default
|
||||
- fast_default
|
||||
- unchecked
|
||||
all_precisions:
|
||||
description: "Run all precisions"
|
||||
run_throughput:
|
||||
description: "Run throughput benchmarks"
|
||||
type: boolean
|
||||
default: false
|
||||
bench_type:
|
||||
description: "Benchmarks type"
|
||||
type: choice
|
||||
default: latency
|
||||
options:
|
||||
- latency
|
||||
- throughput
|
||||
- both
|
||||
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
BENCH_TYPE: latency
|
||||
|
||||
jobs:
|
||||
parse-inputs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-integer-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
profile: ${{ steps.parse_profile.outputs.profile }}
|
||||
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Parse profile
|
||||
id: parse_profile
|
||||
run: |
|
||||
echo "profile=$(echo '${{ inputs.profile }}' | sed 's|\(.*\)[[:space:]](.*)|\1|')" >> "${GITHUB_OUTPUT}"
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
- name: Parse hardware name
|
||||
id: parse_hardware_name
|
||||
cuda-integer-benchmarks:
|
||||
name: Execute GPU integer benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
echo "name=$(echo '${{ inputs.profile }}' | sed 's|.*[[:space:]](\(.*\))|\1|')" >> "${GITHUB_OUTPUT}"
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
run-benchmarks:
|
||||
name: Run benchmarks
|
||||
needs: parse-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_integer_common.yml
|
||||
with:
|
||||
profile: ${{ needs.parse-inputs.outputs.profile }}
|
||||
hardware_name: ${{ needs.parse-inputs.outputs.hardware_name }}
|
||||
command: ${{ inputs.command }}
|
||||
op_flavor: ${{ inputs.op_flavor }}
|
||||
bench_type: ${{ inputs.bench_type }}
|
||||
all_precisions: ${{ inputs.all_precisions }}
|
||||
secrets: inherit
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Should run throughput benchmarks
|
||||
if: inputs.run_throughput
|
||||
run: |
|
||||
echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_gpu
|
||||
|
||||
- name: Parse benchmarks to csv
|
||||
run: |
|
||||
make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "n3-H100x1" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-integer-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-integer-benchmarks.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-integer-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ needs.cuda-integer-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-integer-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-integer-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
206
.github/workflows/benchmark_gpu_integer_2H100_full.yml
vendored
Normal file
206
.github/workflows/benchmark_gpu_integer_2H100_full.yml
vendored
Normal file
@@ -0,0 +1,206 @@
|
||||
# Run integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Integer 2xH100 benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
run_throughput:
|
||||
description: "Run throughput benchmarks"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
BENCH_TYPE: latency
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-integer-full-2-gpu-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: 2-h100
|
||||
|
||||
cuda-integer-full-2-gpu-benchmarks:
|
||||
name: Execute 2xH100 integer benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
command: [integer_multi_bit]
|
||||
op_flavor: [default]
|
||||
# explicit include-based build matrix, of known valid options
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Should run throughput benchmarks
|
||||
if: inputs.run_throughput
|
||||
run: |
|
||||
echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "n3-H100x2" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-integer-full-2-gpu-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-integer-full-2-gpu-benchmarks.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-integer-full-2-gpu-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer GPU 2xH100 benchmarks finished with status: ${{ needs.cuda-integer-full-2-gpu-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-full-2-gpu-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-integer-full-2-gpu-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-integer-full-2-gpu-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
212
.github/workflows/benchmark_gpu_integer_full.yml
vendored
Normal file
212
.github/workflows/benchmark_gpu_integer_full.yml
vendored
Normal file
@@ -0,0 +1,212 @@
|
||||
# Run all integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Integer GPU full benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
run_throughput:
|
||||
description: "Run throughput benchmarks"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
BENCH_TYPE: latency
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-integer-full-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
cuda-integer-full-benchmarks:
|
||||
name: Execute GPU integer benchmarks for all operations flavor
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
command: [integer, integer_multi_bit]
|
||||
op_flavor: [default]
|
||||
# explicit include-based build matrix, of known valid options
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Should run throughput benchmarks
|
||||
if: inputs.run_throughput
|
||||
run: |
|
||||
echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
|
||||
|
||||
# Run these benchmarks only once
|
||||
- name: Run compression benchmarks with AVX512
|
||||
if: matrix.op_flavor == 'default' && matrix.command == 'integer'
|
||||
run: |
|
||||
make bench_integer_compression_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "n3-H100x1" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-integer-full-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-integer-full-benchmarks.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-integer-full-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer GPU full benchmarks finished with status: ${{ needs.cuda-integer-full-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-full-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-integer-full-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-integer-full-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
234
.github/workflows/benchmark_gpu_integer_multi_bit.yml
vendored
Normal file
234
.github/workflows/benchmark_gpu_integer_multi_bit.yml
vendored
Normal file
@@ -0,0 +1,234 @@
|
||||
# Run integer benchmarks with multi-bit cryptographic parameters on an instance and return parsed results to Slab CI bot.
|
||||
name: Integer GPU Multi-bit benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
all_precisions:
|
||||
description: "Run all precisions"
|
||||
type: boolean
|
||||
default: false
|
||||
fast_default:
|
||||
description: "Run only deduplicated default operations without scalar variants"
|
||||
type: boolean
|
||||
default: false
|
||||
run_throughput:
|
||||
description: "Run throughput benchmarks"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_BENCH: TRUE
|
||||
BENCH_OP_FLAVOR: default
|
||||
BENCH_TYPE: latency
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-integer-multi-bit-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
cuda-integer-multi-bit-benchmarks:
|
||||
name: Execute GPU integer multi-bit benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
run: |
|
||||
echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run fast subset benchmarks
|
||||
if: inputs.fast_default
|
||||
run: |
|
||||
echo "BENCH_OP_FLAVOR=fast_default" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Should run throughput benchmarks
|
||||
if: inputs.run_throughput
|
||||
run: |
|
||||
echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run multi-bit benchmarks with AVX512
|
||||
run: |
|
||||
make bench_unsigned_integer_multi_bit_gpu
|
||||
|
||||
- name: Parse benchmarks to csv
|
||||
run: |
|
||||
make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "n3-H100x1" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-integer-multi-bit-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-integer-multi-bit-benchmarks.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-integer-multi-bit-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer GPU multi-bit benchmarks finished with status: ${{ needs.cuda-integer-multi-bit-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-full-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-integer-multi-bit-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-integer-multi-bit-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
@@ -1,47 +1,25 @@
|
||||
# Run integer benchmarks on CUDA instance and return parsed results to Slab CI bot.
|
||||
name: Cuda benchmarks - common
|
||||
# Run 64-bit multi-bit integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Integer multi GPU Multi-bit benchmarks
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
backend:
|
||||
type: string
|
||||
default: hyperstack
|
||||
profile:
|
||||
type: string
|
||||
required: true
|
||||
hardware_name:
|
||||
type: string
|
||||
required: true
|
||||
command: # Use a comma separated values to generate an array
|
||||
type: string
|
||||
required: true
|
||||
op_flavor: # Use a comma separated values to generate an array
|
||||
type: string
|
||||
required: true
|
||||
bench_type:
|
||||
type: string
|
||||
default: latency
|
||||
all_precisions:
|
||||
description: "Run all precisions"
|
||||
type: boolean
|
||||
default: false
|
||||
secrets:
|
||||
FHE_ACTIONS_TOKEN:
|
||||
required: true
|
||||
SLAB_ACTION_TOKEN:
|
||||
required: true
|
||||
SLAB_BASE_URL:
|
||||
required: true
|
||||
SLAB_URL:
|
||||
required: true
|
||||
JOB_SECRET:
|
||||
required: true
|
||||
SLACK_CHANNEL:
|
||||
required: true
|
||||
BOT_USERNAME:
|
||||
required: true
|
||||
SLACK_WEBHOOK:
|
||||
required: true
|
||||
fast_default:
|
||||
description: "Run only deduplicated default operations without scalar variants"
|
||||
type: boolean
|
||||
default: false
|
||||
run_throughput:
|
||||
description: "Run throughput benchmarks"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -54,82 +32,33 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_BENCH: TRUE
|
||||
BENCH_OP_FLAVOR: default
|
||||
BENCH_TYPE: latency
|
||||
|
||||
jobs:
|
||||
prepare-matrix:
|
||||
name: Prepare operations matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
command: ${{ steps.set_command.outputs.command }}
|
||||
op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
|
||||
bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
|
||||
steps:
|
||||
- name: Set single command
|
||||
if: ${{ !contains(inputs.command, ',')}}
|
||||
run: |
|
||||
echo "COMMAND=[\"${{ inputs.command }}\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set multiple commands
|
||||
if: ${{ contains(inputs.command, ',')}}
|
||||
run: |
|
||||
PARSED_COMMAND=$(echo "${{ inputs.command }}" | sed 's/[[:space:]]*,[[:space:]]*/\\", \\"/g')
|
||||
echo "COMMAND=[\"${PARSED_COMMAND}\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set single operations flavor
|
||||
if: ${{ !contains(inputs.op_flavor, ',')}}
|
||||
run: |
|
||||
echo "OP_FLAVOR=[\"${{ inputs.op_flavor }}\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set multiple operations flavors
|
||||
if: ${{ contains(inputs.op_flavor, ',')}}
|
||||
run: |
|
||||
PARSED_OP_FLAVOR=$(echo "${{ inputs.op_flavor }}" | sed 's/[[:space:]]*,[[:space:]]*/", "/g')
|
||||
echo "OP_FLAVOR=[\"${PARSED_OP_FLAVOR}\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set benchmark types
|
||||
run: |
|
||||
if [[ "${{ inputs.bench_type }}" == "both" ]]; then
|
||||
echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
|
||||
else
|
||||
echo "BENCH_TYPE=[\"${{ inputs.bench_type }}\"]" >> "${GITHUB_ENV}"
|
||||
fi
|
||||
|
||||
- name: Set command output
|
||||
id: set_command
|
||||
run: |
|
||||
echo "command=${{ toJSON(env.COMMAND) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Set operation flavor output
|
||||
id: set_op_flavor
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Set benchmark types output
|
||||
id: set_bench_type
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-${{ inputs.profile }}-benchmarks)
|
||||
needs: prepare-matrix
|
||||
name: Setup instance (cuda-integer-multi-bit-multi-gpu-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch' }}
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: ${{ inputs.backend }}
|
||||
profile: ${{ inputs.profile }}
|
||||
backend: hyperstack
|
||||
profile: multi-h100
|
||||
|
||||
cuda-benchmarks:
|
||||
name: Cuda benchmarks (${{ inputs.profile }})
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
cuda-integer-multi-bit-multi-gpu-benchmarks:
|
||||
name: Execute multi GPU integer multi-bit benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
continue-on-error: true
|
||||
@@ -137,10 +66,6 @@ jobs:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
command: ${{ fromJSON(needs.prepare-matrix.outputs.command) }}
|
||||
op_flavor: ${{ fromJSON(needs.prepare-matrix.outputs.op_flavor) }}
|
||||
bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
|
||||
# explicit include-based build matrix, of known valid options
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
@@ -181,7 +106,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -212,36 +137,47 @@ jobs:
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
run: |
|
||||
echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks
|
||||
- name: Should run fast subset benchmarks
|
||||
if: inputs.fast_default
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} BENCH_TYPE=${{ matrix.bench_type }} bench_${{ matrix.command }}_gpu
|
||||
echo "BENCH_OP_FLAVOR=fast_default" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Should run throughput benchmarks
|
||||
if: inputs.run_throughput
|
||||
run: |
|
||||
echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run multi-bit benchmarks with AVX512
|
||||
run: |
|
||||
make bench_unsigned_integer_multi_bit_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "${{ inputs.hardware_name }}" \
|
||||
--hardware "n3-H100x8" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
@@ -252,26 +188,26 @@ jobs:
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-benchmarks ]
|
||||
needs: [ setup-instance, cuda-integer-multi-bit-multi-gpu-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-benchmarks.result != 'skipped' && failure() }}
|
||||
if: ${{ always() && needs.cuda-integer-multi-bit-multi-gpu-benchmarks.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Cuda benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_COLOR: ${{ needs.cuda-integer-multi-bit-multi-gpu-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer multi GPU multi-bit benchmarks finished with status: ${{ needs.cuda-integer-multi-bit-multi-gpu-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-${{ inputs.profile }}-benchmarks)
|
||||
name: Teardown instance (cuda-integer-multi-bit-multi-gpu-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-benchmarks, slack-notify ]
|
||||
needs: [ setup-instance, cuda-integer-multi-bit-multi-gpu-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -285,4 +221,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-integer-multi-bit-multi-gpu-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
206
.github/workflows/benchmark_gpu_integer_multi_gpu_full.yml
vendored
Normal file
206
.github/workflows/benchmark_gpu_integer_multi_gpu_full.yml
vendored
Normal file
@@ -0,0 +1,206 @@
|
||||
# Run all integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Integer multi GPU full benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
run_throughput:
|
||||
description: "Run throughput benchmarks"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
BENCH_TYPE: latency
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-integer-full-multi-gpu-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: multi-h100
|
||||
|
||||
cuda-integer-full-multi-gpu-benchmarks:
|
||||
name: Execute multi GPU integer benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
command: [integer_multi_bit]
|
||||
op_flavor: [default]
|
||||
# explicit include-based build matrix, of known valid options
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Should run throughput benchmarks
|
||||
if: inputs.run_throughput
|
||||
run: |
|
||||
echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "n3-H100x8" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-integer-full-multi-gpu-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-integer-full-multi-gpu-benchmarks.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-integer-full-multi-gpu-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer GPU full benchmarks finished with status: ${{ needs.cuda-integer-full-multi-gpu-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-full-multi-gpu-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-integer-full-multi-gpu-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-integer-full-multi-gpu-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
@@ -1,60 +0,0 @@
|
||||
# Run CUDA benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot.
|
||||
name: Cuda weekly benchmarks
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
jobs:
|
||||
run-benchmarks-1-h100:
|
||||
name: Run benchmarks (1xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_integer_common.yml
|
||||
with:
|
||||
profile: single-h100
|
||||
hardware_name: n3-H100x1
|
||||
command: integer,integer_multi_bit
|
||||
op_flavor: default
|
||||
bench_type: latency
|
||||
all_precisions: true
|
||||
secrets: inherit
|
||||
|
||||
run-benchmarks-2-h100:
|
||||
name: Run benchmarks (2xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_integer_common.yml
|
||||
with:
|
||||
profile: 2-h100
|
||||
hardware_name: n3-H100x2
|
||||
command: integer_multi_bit
|
||||
op_flavor: default
|
||||
bench_type: latency
|
||||
all_precisions: true
|
||||
secrets: inherit
|
||||
|
||||
run-benchmarks-8-h100:
|
||||
name: Run benchmarks (8xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_integer_common.yml
|
||||
with:
|
||||
profile: multi-h100
|
||||
hardware_name: n3-H100x8
|
||||
command: integer_multi_bit
|
||||
op_flavor: default
|
||||
bench_type: latency
|
||||
all_precisions: true
|
||||
secrets: inherit
|
||||
|
||||
run-benchmarks-l40:
|
||||
name: Run benchmarks (L40)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_integer_common.yml
|
||||
with:
|
||||
profile: l40
|
||||
hardware_name: n3-L40x1
|
||||
command: integer_multi_bit,integer_compression,pbs,ks
|
||||
op_flavor: default
|
||||
bench_type: latency
|
||||
all_precisions: true
|
||||
secrets: inherit
|
||||
205
.github/workflows/benchmark_gpu_l40.yml
vendored
Normal file
205
.github/workflows/benchmark_gpu_l40.yml
vendored
Normal file
@@ -0,0 +1,205 @@
|
||||
# Run benchmarks on an L40 VM and return parsed results to Slab CI bot.
|
||||
name: Cuda benchmarks (L40)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-l40-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: l40
|
||||
|
||||
cuda-l40-benchmarks:
|
||||
name: Cuda benchmarks (L40)
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
command: [integer_multi_bit]
|
||||
op_flavor: [default]
|
||||
# explicit include-based build matrix, of known valid options
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
|
||||
|
||||
- name: Run compression benchmarks with AVX512
|
||||
run: |
|
||||
make bench_integer_compression_gpu
|
||||
|
||||
- name: Run PBS benchmarks
|
||||
run: |
|
||||
make bench_pbs_gpu
|
||||
|
||||
- name: Run KS benchmarks
|
||||
run: |
|
||||
make bench_ks_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "n3-L40x1" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-l40-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-l40-benchmarks.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-l40-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Cuda benchmarks (L40) finished with status: ${{ needs.cuda-l40-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-l40-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-l40-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-l40-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
9
.github/workflows/benchmark_integer.yml
vendored
9
.github/workflows/benchmark_integer.yml
vendored
@@ -35,6 +35,7 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_BENCH: TRUE
|
||||
BENCH_TYPE: latency
|
||||
|
||||
jobs:
|
||||
prepare-matrix:
|
||||
@@ -90,7 +91,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -130,7 +131,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -167,7 +168,7 @@ jobs:
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
@@ -197,7 +198,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_shortint.yml
vendored
6
.github/workflows/benchmark_shortint.yml
vendored
@@ -56,7 +56,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -163,7 +163,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
17
.github/workflows/benchmark_tfhe_fft.yml
vendored
17
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -16,9 +16,6 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- "main"
|
||||
paths:
|
||||
- tfhe-fft/**
|
||||
- .github/workflows/benchmark_tfhe_fft.yml
|
||||
schedule:
|
||||
# Job will be triggered each Thursday at 11p.m.
|
||||
- cron: '0 23 * * 4'
|
||||
@@ -32,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -50,7 +47,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -84,13 +81,13 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
|
||||
with:
|
||||
name: ${{ github.sha }}_fft
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -113,7 +110,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-fft benchmarks failed. (${{ env.ACTION_RUN_URL }})"
|
||||
@@ -126,7 +123,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -137,7 +134,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (fft-benchmarks) failed. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
17
.github/workflows/benchmark_tfhe_ntt.yml
vendored
17
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -16,9 +16,6 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- "main"
|
||||
paths:
|
||||
- tfhe-ntt/**
|
||||
- .github/workflows/benchmark_tfhe_ntt.yml
|
||||
schedule:
|
||||
# Job will be triggered each Friday at 11p.m.
|
||||
- cron: "0 23 * * 5"
|
||||
@@ -32,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -50,7 +47,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -84,13 +81,13 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
|
||||
with:
|
||||
name: ${{ github.sha }}_ntt
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -113,7 +110,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-ntt benchmarks failed. (${{ env.ACTION_RUN_URL }})"
|
||||
@@ -126,7 +123,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -137,7 +134,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (ntt-benchmarks) failed. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
11
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
11
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@c3a1bb2c992d77180ae65be6ae6c166cf40f857c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -58,7 +58,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -91,7 +91,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -118,7 +118,8 @@ jobs:
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
@@ -155,7 +156,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
7
.github/workflows/benchmark_wasm_client.yml
vendored
7
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -36,7 +36,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
@@ -65,7 +64,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -99,7 +98,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -200,7 +199,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
64
.github/workflows/benchmark_zk_pke.yml
vendored
64
.github/workflows/benchmark_zk_pke.yml
vendored
@@ -4,14 +4,10 @@ name: PKE ZK benchmarks
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
bench_type:
|
||||
description: "Benchmarks type"
|
||||
type: choice
|
||||
default: latency
|
||||
options:
|
||||
- latency
|
||||
- throughput
|
||||
- both
|
||||
run_throughput:
|
||||
description: "Run throughput benchmarks"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
push:
|
||||
branches:
|
||||
@@ -30,6 +26,7 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
BENCH_TYPE: latency
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -43,7 +40,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
@@ -63,37 +59,10 @@ jobs:
|
||||
- tfhe/benches/integer/zk_pke.rs
|
||||
- .github/workflows/zk_pke_benchmark.yml
|
||||
|
||||
prepare-matrix:
|
||||
name: Prepare operations matrix
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
|
||||
steps:
|
||||
- name: Set benchmark types
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if [[ "${{ inputs.bench_type }}" == "both" ]]; then
|
||||
echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
|
||||
else
|
||||
echo "BENCH_TYPE=[\"${{ inputs.bench_type }}\"]" >> "${GITHUB_ENV}"
|
||||
fi
|
||||
|
||||
- name: Default benchmark type
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
run: |
|
||||
echo "BENCH_TYPE=[\"latency\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set benchmark types output
|
||||
id: set_bench_type
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (pke-zk-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ should-run, prepare-matrix ]
|
||||
needs: should-run
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'push' &&
|
||||
@@ -104,7 +73,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -116,15 +85,11 @@ jobs:
|
||||
pke-zk-benchmarks:
|
||||
name: Execute PKE ZK benchmarks
|
||||
if: needs.setup-instance.result != 'skipped'
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
@@ -141,7 +106,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -152,9 +117,14 @@ jobs:
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Should run throughput benchmarks
|
||||
if: inputs.run_throughput
|
||||
run: |
|
||||
echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_TYPE=${{ matrix.bench_type }} bench_integer_zk
|
||||
make bench_integer_zk
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
@@ -168,7 +138,7 @@ jobs:
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
|
||||
- name: Parse CRS sizes results
|
||||
run: |
|
||||
@@ -211,7 +181,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/cargo_build.yml
vendored
2
.github/workflows/cargo_build.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
2
.github/workflows/cargo_build_tfhe_fft.yml
vendored
2
.github/workflows/cargo_build_tfhe_fft.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
|
||||
2
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
2
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
|
||||
6
.github/workflows/cargo_test_fft.yml
vendored
6
.github/workflows/cargo_test_fft.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
runner_type: [ubuntu-latest, macos-latest, windows-latest]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
@@ -44,7 +44,7 @@ jobs:
|
||||
matrix:
|
||||
runner_type: [ubuntu-latest, macos-latest, windows-latest]
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
cargo-tests-node-js:
|
||||
runs-on: "ubuntu-latest"
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
|
||||
- name: Test node js
|
||||
run: |
|
||||
|
||||
4
.github/workflows/cargo_test_ntt.yml
vendored
4
.github/workflows/cargo_test_ntt.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
|
||||
2
.github/workflows/ci_lint.yml
vendored
2
.github/workflows/ci_lint.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
make lint_workflow
|
||||
|
||||
- name: Ensure SHA pinned actions
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@5d6ac37a4cef8b8df67f482a8e384987766f0213 # v3.0.17
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@38608ef4fb69adae7f1eac6eeb88e67b7d083bfd # v3.0.16
|
||||
with:
|
||||
allowlist: |
|
||||
slsa-framework/slsa-github-generator
|
||||
|
||||
10
.github/workflows/code_coverage.yml
vendored
10
.github/workflows/code_coverage.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -47,7 +47,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a
|
||||
uses: codecov/codecov-action@5c47607acb93fed5485fdbf7232e8a31425f672a
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
make test_integer_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a
|
||||
uses: codecov/codecov-action@5c47607acb93fed5485fdbf7232e8a31425f672a
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/gpu_4090_tests.yml
vendored
2
.github/workflows/gpu_4090_tests.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
7
.github/workflows/gpu_fast_h100_tests.yml
vendored
7
.github/workflows/gpu_fast_h100_tests.yml
vendored
@@ -31,7 +31,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
@@ -68,7 +67,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -121,7 +120,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -187,7 +186,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
7
.github/workflows/gpu_fast_tests.yml
vendored
7
.github/workflows/gpu_fast_tests.yml
vendored
@@ -30,7 +30,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
@@ -66,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -119,7 +118,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -185,7 +184,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/gpu_full_h100_tests.yml
vendored
6
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -76,7 +76,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -139,7 +139,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -31,7 +31,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
@@ -68,7 +67,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -121,7 +120,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -190,7 +189,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
146
.github/workflows/gpu_integer_long_run_tests.yml
vendored
146
.github/workflows/gpu_integer_long_run_tests.yml
vendored
@@ -1,146 +0,0 @@
|
||||
name: AWS Long Run Tests on GPU
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly tests will be triggered each Friday at 1a.m.
|
||||
- cron: '0 1 * * FRI'
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (gpu-tests)
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
cuda-tests:
|
||||
name: Long run GPU H100 tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
make test_integer_long_run_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests.result }}
|
||||
SLACK_MESSAGE: "Integer GPU H100 long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (gpu-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (gpu-long-run-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
6
.github/workflows/gpu_pcc.yml
vendored
6
.github/workflows/gpu_pcc.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -110,7 +110,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -171,7 +171,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -171,7 +171,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -38,7 +38,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
@@ -75,7 +74,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -129,7 +128,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -189,7 +188,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -171,7 +171,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -171,7 +171,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -125,7 +125,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -185,7 +185,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
94
.github/workflows/integer_long_run_tests.yml
vendored
94
.github/workflows/integer_long_run_tests.yml
vendored
@@ -1,94 +0,0 @@
|
||||
name: AWS Long Run Tests on CPU
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly tests will be triggered each Friday at 1a.m.
|
||||
- cron: '0 1 * * FRI'
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cpu-tests)
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-big
|
||||
|
||||
cpu-tests:
|
||||
name: Long run CPU tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
make test_integer_long_run
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CPU long run tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cpu-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cpu-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cpu-long-run-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
2
.github/workflows/m1_tests.yml
vendored
2
.github/workflows/m1_tests.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
2
.github/workflows/make_release.yml
vendored
2
.github/workflows/make_release.yml
vendored
@@ -46,7 +46,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe
|
||||
@@ -85,7 +84,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: Create NPM version tag
|
||||
if: ${{ inputs.npm_latest_tag }}
|
||||
run: |
|
||||
|
||||
@@ -27,7 +27,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
|
||||
7
.github/workflows/make_release_cuda.yml
vendored
7
.github/workflows/make_release_cuda.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -64,14 +64,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -120,7 +119,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/make_release_tfhe_fft.yml
vendored
4
.github/workflows/make_release_tfhe_fft.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
needs: verify_tag
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
|
||||
4
.github/workflows/make_release_tfhe_ntt.yml
vendored
4
.github/workflows/make_release_tfhe_ntt.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
needs: verify_tag
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
|
||||
@@ -27,7 +27,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Publish proc-macro crate
|
||||
env:
|
||||
|
||||
1
.github/workflows/make_release_zk_pok.yml
vendored
1
.github/workflows/make_release_zk_pok.yml
vendored
@@ -28,7 +28,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
|
||||
1
.github/workflows/sync_on_push.yml
vendored
1
.github/workflows/sync_on_push.yml
vendored
@@ -16,7 +16,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: git-sync
|
||||
uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
|
||||
with:
|
||||
|
||||
@@ -19,14 +19,11 @@ exclude = [
|
||||
"utils/cargo-tfhe-lints"
|
||||
]
|
||||
[workspace.dependencies]
|
||||
aligned-vec = { version = "0.6", default-features = false }
|
||||
aligned-vec = { version = "0.5", default-features = false }
|
||||
bytemuck = "1.14.3"
|
||||
dyn-stack = { version = "0.11", default-features = false }
|
||||
itertools = "0.13"
|
||||
dyn-stack = { version = "0.10", default-features = false }
|
||||
num-complex = "0.4"
|
||||
pulp = { version = "0.20.0", default-features = false }
|
||||
rand = "0.8"
|
||||
rayon = "1"
|
||||
pulp = { version = "0.18.22", default-features = false }
|
||||
serde = { version = "1.0", default-features = false }
|
||||
wasm-bindgen = ">=0.2.86,<0.2.94"
|
||||
|
||||
|
||||
26
Makefile
26
Makefile
@@ -25,7 +25,6 @@ BACKWARD_COMPAT_DATA_BRANCH?=v0.4
|
||||
BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
|
||||
BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
|
||||
TFHE_SPEC:=tfhe
|
||||
WASM_PACK_VERSION="0.13.1"
|
||||
# We are kind of hacking the cut here, the version cannot contain a quote '"'
|
||||
WASM_BINDGEN_VERSION:=$(shell grep '^wasm-bindgen[[:space:]]*=' Cargo.toml | cut -d '"' -f 2 | xargs)
|
||||
WEB_RUNNER_DIR=web-test-runner
|
||||
@@ -117,8 +116,8 @@ install_wasm_bindgen_cli: install_rs_build_toolchain
|
||||
|
||||
.PHONY: install_wasm_pack # Install wasm-pack to build JS packages
|
||||
install_wasm_pack: install_rs_build_toolchain
|
||||
@wasm-pack --version | grep "$(WASM_PACK_VERSION)" > /dev/null 2>&1 || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.1 || \
|
||||
@wasm-pack --version > /dev/null 2>&1 || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.0 || \
|
||||
( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )
|
||||
|
||||
.PHONY: install_node # Install last version of NodeJS via nvm
|
||||
@@ -586,11 +585,6 @@ test_integer_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
|
||||
|
||||
.PHONY: test_integer_long_run_gpu # Run the tests of the integer module including experimental on the gpu backend
|
||||
test_integer_long_run_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,__long_run_tests -p $(TFHE_SPEC) -- integer::gpu::server_key::radix::tests_long_run --test-threads=6
|
||||
|
||||
.PHONY: test_integer_compression
|
||||
test_integer_compression: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
@@ -771,12 +765,6 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_integer_long_run # Run the long run tests for integer
|
||||
test_integer_long_run: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,__long_run_tests -p $(TFHE_SPEC) -- integer::server_key::radix_parallel::tests_long_run
|
||||
|
||||
|
||||
.PHONY: test_safe_serialization # Run the tests for safe serialization
|
||||
test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
@@ -1077,7 +1065,7 @@ bench_integer_compression_gpu: install_rs_check_toolchain
|
||||
|
||||
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
|
||||
bench_integer_multi_bit: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=MULTI_BIT __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
@@ -1085,7 +1073,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
|
||||
|
||||
.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
|
||||
bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=MULTI_BIT __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
@@ -1093,7 +1081,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
|
||||
.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
|
||||
bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=MULTI_BIT \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
@@ -1101,7 +1089,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
|
||||
.PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
|
||||
bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=MULTI_BIT \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
@@ -1131,7 +1119,7 @@ bench_shortint_oprf: install_rs_check_toolchain
|
||||
|
||||
.PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
|
||||
bench_shortint_multi_bit: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=MULTI_BIT \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench shortint-bench \
|
||||
|
||||
@@ -6,7 +6,7 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
rayon = { workspace = true }
|
||||
rayon = { version = "1.7.0"}
|
||||
|
||||
[target.'cfg(target_arch = "x86_64")'.dependencies.tfhe]
|
||||
path = "../../tfhe"
|
||||
|
||||
@@ -27,23 +27,12 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
|
||||
std::abort(); \
|
||||
}
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index);
|
||||
|
||||
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
|
||||
uint32_t gpu_index);
|
||||
void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
|
||||
uint32_t gpu_index);
|
||||
|
||||
void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index);
|
||||
|
||||
cudaStream_t cuda_create_stream(uint32_t gpu_index);
|
||||
|
||||
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
uint32_t cuda_is_available();
|
||||
|
||||
void *cuda_malloc(uint64_t size, uint32_t gpu_index);
|
||||
|
||||
void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
@@ -35,8 +35,6 @@ enum CMP_ORDERING { IS_INFERIOR = 0, IS_EQUAL = 1, IS_SUPERIOR = 2 };
|
||||
|
||||
enum SIGNED_OPERATION { ADDITION = 1, SUBTRACTION = -1 };
|
||||
|
||||
enum outputFlag { FLAG_NONE = 0, FLAG_OVERFLOW = 1, FLAG_CARRY = 2 };
|
||||
|
||||
extern "C" {
|
||||
void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -105,19 +103,17 @@ void cleanup_cuda_full_propagation(void *const *streams,
|
||||
|
||||
void scratch_cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, bool const is_boolean_left, bool const is_boolean_right,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t pbs_base_log,
|
||||
uint32_t pbs_level, uint32_t ks_base_log, uint32_t ks_level,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
int8_t **mem_ptr, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t pbs_base_log, uint32_t pbs_level, uint32_t ks_base_log,
|
||||
uint32_t ks_level, uint32_t grouping_factor, uint32_t num_blocks,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *radix_lwe_out, void const *radix_lwe_left, bool const is_bool_left,
|
||||
void const *radix_lwe_right, bool const is_bool_right, void *const *bsks,
|
||||
void *const *ksks, int8_t *mem_ptr, uint32_t polynomial_size,
|
||||
uint32_t num_blocks);
|
||||
void *radix_lwe_out, void const *radix_lwe_left,
|
||||
void const *radix_lwe_right, void *const *bsks, void *const *ksks,
|
||||
int8_t *mem_ptr, uint32_t polynomial_size, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_integer_mult(void *const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -284,61 +280,23 @@ void scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory);
|
||||
|
||||
void scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory);
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array, void *carry_out, const void *carry_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks,
|
||||
uint32_t requested_flag, uint32_t uses_carry);
|
||||
void *lwe_array, void *carry_out, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks);
|
||||
|
||||
void cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void cuda_propagate_single_carry_get_input_carries_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lhs_array, const void *rhs_array, void *carry_out,
|
||||
const void *carry_in, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks, uint32_t requested_flag, uint32_t uses_carry);
|
||||
void *lwe_array, void *carry_out, void *input_carries, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_propagate_single_carry(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void cleanup_cuda_add_and_propagate_single_carry(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t compute_overflow,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lhs_array, const void *rhs_array, void *overflow_block,
|
||||
const void *input_borrow, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks, uint32_t compute_overflow,
|
||||
uint32_t uses_input_borrow);
|
||||
|
||||
void cleanup_cuda_integer_overflowing_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
@@ -358,6 +316,25 @@ void cleanup_cuda_integer_radix_partial_sum_ciphertexts_vec(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_radix_overflowing_sub_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_radix_overflowing_sub_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *radix_lwe_out, void *radix_lwe_overflowed, void const *radix_lwe_left,
|
||||
void const *radix_lwe_right, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks_in_radix);
|
||||
|
||||
void cleanup_cuda_integer_radix_overflowing_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_scalar_mul_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
@@ -397,6 +374,26 @@ void cleanup_cuda_integer_div_rem(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_signed_overflowing_add_or_sub_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, int8_t signed_operation,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_signed_overflowing_add_or_sub_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lhs, void const *rhs, void *overflowed, int8_t signed_operation,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks_in_radix);
|
||||
|
||||
void cleanup_signed_overflowing_add_or_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -27,7 +27,6 @@ void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
void const *lwe_array_in_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *plaintext_array_in,
|
||||
|
||||
@@ -28,7 +28,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
#endif
|
||||
|
||||
template <typename Torus>
|
||||
@@ -46,7 +46,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
|
||||
template <typename Torus>
|
||||
void scratch_cuda_multi_bit_programmable_bootstrap(
|
||||
@@ -63,7 +63,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
|
||||
template <typename Torus>
|
||||
uint64_t get_buffer_size_full_sm_multibit_programmable_bootstrap_keybundle(
|
||||
|
||||
@@ -255,7 +255,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template <typename Torus>
|
||||
@@ -266,7 +266,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride);
|
||||
|
||||
#if (CUDA_ARCH >= 900)
|
||||
@@ -278,7 +278,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -69,7 +69,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride);
|
||||
uint32_t num_samples, uint32_t lut_count, uint32_t lut_stride);
|
||||
|
||||
void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
@@ -78,7 +78,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride);
|
||||
uint32_t num_samples, uint32_t lut_count, uint32_t lut_stride);
|
||||
|
||||
void cleanup_cuda_programmable_bootstrap(void *stream, uint32_t gpu_index,
|
||||
int8_t **pbs_buffer);
|
||||
|
||||
@@ -27,7 +27,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride);
|
||||
|
||||
void cleanup_cuda_multi_bit_programmable_bootstrap(void *stream,
|
||||
|
||||
@@ -2,30 +2,6 @@
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
cudaEvent_t event;
|
||||
check_cuda_error(cudaEventCreate(&event));
|
||||
return event;
|
||||
}
|
||||
|
||||
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaEventRecord(event, stream));
|
||||
}
|
||||
|
||||
void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
|
||||
uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaStreamWaitEvent(stream, event, 0));
|
||||
}
|
||||
|
||||
void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaEventDestroy(event));
|
||||
}
|
||||
|
||||
/// Unsafe function to create a CUDA stream, must check first that GPU exists
|
||||
cudaStream_t cuda_create_stream(uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
@@ -45,9 +21,6 @@ void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index) {
|
||||
check_cuda_error(cudaStreamSynchronize(stream));
|
||||
}
|
||||
|
||||
// Determine if a CUDA device is available at runtime
|
||||
uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
|
||||
|
||||
/// Unsafe function that will try to allocate even if gpu_index is invalid
|
||||
/// or if there's not enough memory. A safe wrapper around it must call
|
||||
/// cuda_check_valid_malloc() first
|
||||
|
||||
@@ -58,11 +58,9 @@ host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
|
||||
radix_params.big_lwe_dimension, num_blocks);
|
||||
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, ct, nullptr, nullptr, mem_ptr->scp_mem,
|
||||
bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
host_propagate_single_carry<Torus>(streams, gpu_indexes, gpu_count, ct,
|
||||
nullptr, nullptr, mem_ptr->scp_mem, bsks,
|
||||
ksks, num_blocks);
|
||||
|
||||
host_integer_radix_bitop_kb(streams, gpu_indexes, gpu_count, ct, mask, ct,
|
||||
mem_ptr->bitxor_mem, bsks, ksks, num_blocks);
|
||||
|
||||
50
backends/tfhe-cuda-backend/cuda/src/integer/addition.cu
Normal file
50
backends/tfhe-cuda-backend/cuda/src/integer/addition.cu
Normal file
@@ -0,0 +1,50 @@
|
||||
#include "integer/addition.cuh"
|
||||
|
||||
void scratch_cuda_signed_overflowing_add_or_sub_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, int8_t signed_operation,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
SIGNED_OPERATION op = (signed_operation == 1) ? SIGNED_OPERATION::ADDITION
|
||||
: SIGNED_OPERATION::SUBTRACTION;
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_signed_overflowing_add_or_sub_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_signed_overflowing_add_or_sub_memory<uint64_t> **)mem_ptr,
|
||||
num_blocks, op, params, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_signed_overflowing_add_or_sub_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lhs, void const *rhs, void *overflowed, int8_t signed_operation,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
auto mem = (int_signed_overflowing_add_or_sub_memory<uint64_t> *)mem_ptr;
|
||||
SIGNED_OPERATION op = (signed_operation == 1) ? SIGNED_OPERATION::ADDITION
|
||||
: SIGNED_OPERATION::SUBTRACTION;
|
||||
|
||||
host_integer_signed_overflowing_add_or_sub_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lhs), static_cast<uint64_t const *>(rhs),
|
||||
static_cast<uint64_t *>(overflowed), op, bsks, (uint64_t *const *)(ksks),
|
||||
mem, num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_signed_overflowing_add_or_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_signed_overflowing_add_or_sub_memory<uint64_t> *mem_ptr =
|
||||
(int_signed_overflowing_add_or_sub_memory<uint64_t> *)(*mem_ptr_void);
|
||||
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
149
backends/tfhe-cuda-backend/cuda/src/integer/addition.cuh
Normal file
149
backends/tfhe-cuda-backend/cuda/src/integer/addition.cuh
Normal file
@@ -0,0 +1,149 @@
|
||||
#ifndef TFHE_RS_ADDITION_CUH
|
||||
#define TFHE_RS_ADDITION_CUH
|
||||
|
||||
#include "crypto/keyswitch.cuh"
|
||||
#include "device.h"
|
||||
#include "integer/comparison.cuh"
|
||||
#include "integer/integer.cuh"
|
||||
#include "integer/integer_utilities.h"
|
||||
#include "integer/negation.cuh"
|
||||
#include "integer/scalar_shifts.cuh"
|
||||
#include "linear_algebra.h"
|
||||
#include "pbs/programmable_bootstrap.h"
|
||||
#include "utils/helper.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
template <typename Torus>
|
||||
void host_resolve_signed_overflow(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *result, Torus *last_block_inner_propagation,
|
||||
Torus const *last_block_input_carry, Torus *last_block_output_carry,
|
||||
int_resolve_signed_overflow_memory<Torus> *mem, void *const *bsks,
|
||||
Torus *const *ksks) {
|
||||
|
||||
auto x = mem->x;
|
||||
|
||||
Torus *d_clears =
|
||||
(Torus *)cuda_malloc_async(sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
|
||||
cuda_set_value_async<Torus>(streams[0], gpu_indexes[0], d_clears, 2, 1);
|
||||
|
||||
// replace with host function call
|
||||
cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
|
||||
streams[0], gpu_indexes[0], x, last_block_output_carry, d_clears,
|
||||
mem->params.big_lwe_dimension, 1);
|
||||
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], last_block_inner_propagation,
|
||||
last_block_inner_propagation, x,
|
||||
mem->params.big_lwe_dimension, 1);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], last_block_inner_propagation,
|
||||
last_block_inner_propagation, last_block_input_carry,
|
||||
mem->params.big_lwe_dimension, 1);
|
||||
|
||||
host_apply_univariate_lut_kb<Torus>(streams, gpu_indexes, gpu_count, result,
|
||||
last_block_inner_propagation,
|
||||
mem->resolve_overflow_lut, ksks, bsks, 1);
|
||||
|
||||
cuda_drop_async(d_clears, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void scratch_cuda_integer_signed_overflowing_add_or_sub_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int_signed_overflowing_add_or_sub_memory<Torus> **mem_ptr,
|
||||
uint32_t num_blocks, SIGNED_OPERATION op, int_radix_params params,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
*mem_ptr = new int_signed_overflowing_add_or_sub_memory<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_blocks, op,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
/*
|
||||
* Addition - signed_operation = 1
|
||||
* Subtraction - signed_operation = -1
|
||||
*/
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_signed_overflowing_add_or_sub_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lhs, Torus const *rhs, Torus *overflowed,
|
||||
SIGNED_OPERATION op, void *const *bsks, uint64_t *const *ksks,
|
||||
int_signed_overflowing_add_or_sub_memory<uint64_t> *mem_ptr,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
auto radix_params = mem_ptr->params;
|
||||
|
||||
uint32_t big_lwe_dimension = radix_params.big_lwe_dimension;
|
||||
uint32_t big_lwe_size = big_lwe_dimension + 1;
|
||||
uint32_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
|
||||
assert(radix_params.message_modulus >= 4 && radix_params.carry_modulus >= 4);
|
||||
|
||||
auto result = mem_ptr->result;
|
||||
auto neg_rhs = mem_ptr->neg_rhs;
|
||||
auto input_carries = mem_ptr->input_carries;
|
||||
auto output_carry = mem_ptr->output_carry;
|
||||
auto last_block_inner_propagation = mem_ptr->last_block_inner_propagation;
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(result, lhs, num_blocks * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
// phase 1
|
||||
if (op == SIGNED_OPERATION::ADDITION) {
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], result, lhs, rhs,
|
||||
big_lwe_dimension, num_blocks);
|
||||
} else {
|
||||
host_integer_radix_negation<Torus>(
|
||||
streams, gpu_indexes, gpu_count, neg_rhs, rhs, big_lwe_dimension,
|
||||
num_blocks, radix_params.message_modulus, radix_params.carry_modulus);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], result, lhs, neg_rhs,
|
||||
big_lwe_dimension, num_blocks);
|
||||
}
|
||||
|
||||
// phase 2
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
host_propagate_single_carry<Torus>(
|
||||
mem_ptr->sub_streams_1, gpu_indexes, gpu_count, result, output_carry,
|
||||
input_carries, mem_ptr->scp_mem, bsks, ksks, num_blocks);
|
||||
host_generate_last_block_inner_propagation<Torus>(
|
||||
mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
last_block_inner_propagation, &lhs[(num_blocks - 1) * big_lwe_size],
|
||||
&rhs[(num_blocks - 1) * big_lwe_size], mem_ptr->las_block_prop_mem, bsks,
|
||||
ksks);
|
||||
|
||||
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
// phase 3
|
||||
auto input_carry = &input_carries[(num_blocks - 1) * big_lwe_size];
|
||||
if (op == SIGNED_OPERATION::SUBTRACTION && num_blocks == 1) {
|
||||
// Quick fix for the case where the subtraction is done on a single block
|
||||
Torus *one_scalar =
|
||||
(Torus *)cuda_malloc_async(sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
cuda_set_value_async<Torus>(streams[0], gpu_indexes[0], one_scalar, 1, 1);
|
||||
create_trivial_radix<Torus>(
|
||||
streams[0], gpu_indexes[0], input_carry, one_scalar, big_lwe_dimension,
|
||||
1, 1, radix_params.message_modulus, radix_params.carry_modulus);
|
||||
cuda_drop_async(one_scalar, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
|
||||
host_resolve_signed_overflow<Torus>(
|
||||
streams, gpu_indexes, gpu_count, overflowed, last_block_inner_propagation,
|
||||
input_carry, output_carry, mem_ptr->resolve_overflow_mem, bsks, ksks);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(lhs, result, num_blocks * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
|
||||
#endif // TFHE_RS_ADDITION_CUH
|
||||
@@ -295,7 +295,7 @@ __host__ void host_integer_decompress(
|
||||
extracted_lwe = h_mem_ptr->tmp_extracted_lwe;
|
||||
|
||||
// In the case of extracting a single LWE these parameters are dummy
|
||||
uint32_t num_many_lut = 1;
|
||||
uint32_t lut_count = 1;
|
||||
uint32_t lut_stride = 0;
|
||||
/// Apply PBS to apply a LUT, reduce the noise and go from a small LWE
|
||||
/// dimension to a big LWE dimension
|
||||
@@ -311,7 +311,7 @@ __host__ void host_integer_decompress(
|
||||
compression_params.small_lwe_dimension,
|
||||
encryption_params.polynomial_size, encryption_params.pbs_base_log,
|
||||
encryption_params.pbs_level, encryption_params.grouping_factor,
|
||||
num_radix_blocks, encryption_params.pbs_type, num_many_lut, lut_stride);
|
||||
num_radix_blocks, encryption_params.pbs_type, lut_count, lut_stride);
|
||||
} else {
|
||||
/// For multi GPU execution we create vectors of pointers for inputs and
|
||||
/// outputs
|
||||
@@ -338,7 +338,7 @@ __host__ void host_integer_decompress(
|
||||
compression_params.small_lwe_dimension,
|
||||
encryption_params.polynomial_size, encryption_params.pbs_base_log,
|
||||
encryption_params.pbs_level, encryption_params.grouping_factor,
|
||||
num_radix_blocks, encryption_params.pbs_type, num_many_lut, lut_stride);
|
||||
num_radix_blocks, encryption_params.pbs_type, lut_count, lut_stride);
|
||||
|
||||
/// Copy data back to GPU 0 and release vecs
|
||||
multi_gpu_gather_lwe_async<Torus>(
|
||||
|
||||
@@ -425,24 +425,11 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
auto do_overflowing_sub = [&](cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
uint32_t compute_borrow = 1;
|
||||
uint32_t uses_input_borrow = 0;
|
||||
auto first_indexes = mem_ptr->first_indexes_for_overflow_sub
|
||||
[merged_interesting_remainder.len - 1];
|
||||
auto second_indexes = mem_ptr->second_indexes_for_overflow_sub
|
||||
[merged_interesting_remainder.len - 1];
|
||||
auto scalar_indexes =
|
||||
mem_ptr
|
||||
->scalars_for_overflow_sub[merged_interesting_remainder.len - 1];
|
||||
mem_ptr->overflow_sub_mem->update_lut_indexes(
|
||||
streams, gpu_indexes, first_indexes, second_indexes, scalar_indexes,
|
||||
merged_interesting_remainder.len);
|
||||
host_integer_overflowing_sub<uint64_t>(
|
||||
host_integer_overflowing_sub_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, new_remainder.data,
|
||||
(uint64_t *)merged_interesting_remainder.data,
|
||||
interesting_divisor.data, subtraction_overflowed.data,
|
||||
(const Torus *)nullptr, mem_ptr->overflow_sub_mem, bsks, ksks,
|
||||
merged_interesting_remainder.len, compute_borrow, uses_input_borrow);
|
||||
subtraction_overflowed.data, merged_interesting_remainder.data,
|
||||
interesting_divisor.data, bsks, ksks, mem_ptr->overflow_sub_mem,
|
||||
merged_interesting_remainder.len);
|
||||
};
|
||||
|
||||
// fills:
|
||||
@@ -670,12 +657,10 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
int_mem_ptr->negated_quotient, quotient, radix_params.big_lwe_dimension,
|
||||
num_blocks, radix_params.message_modulus, radix_params.carry_modulus);
|
||||
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_quotient, nullptr, nullptr, int_mem_ptr->scp_mem_1,
|
||||
bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
host_propagate_single_carry<Torus>(int_mem_ptr->sub_streams_1, gpu_indexes,
|
||||
gpu_count, int_mem_ptr->negated_quotient,
|
||||
nullptr, nullptr, int_mem_ptr->scp_mem_1,
|
||||
bsks, ksks, num_blocks);
|
||||
|
||||
host_integer_radix_negation(int_mem_ptr->sub_streams_2, gpu_indexes,
|
||||
gpu_count, int_mem_ptr->negated_remainder,
|
||||
@@ -686,8 +671,7 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
host_propagate_single_carry<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_remainder, nullptr, nullptr,
|
||||
int_mem_ptr->scp_mem_2, bsks, ksks, num_blocks, requested_flag,
|
||||
uses_carry);
|
||||
int_mem_ptr->scp_mem_2, bsks, ksks, num_blocks);
|
||||
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count, quotient,
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#include "integer/integer.cuh"
|
||||
#include "integer/negation.cuh"
|
||||
#include <linear_algebra.h>
|
||||
|
||||
void cuda_full_propagation_64_inplace(void *const *streams,
|
||||
@@ -50,8 +49,7 @@ void scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory) {
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -61,94 +59,30 @@ void scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
scratch_cuda_propagate_single_carry_kb_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_sc_prop_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
requested_flag, uses_carry, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_propagate_single_carry_kb_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_sc_prop_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
requested_flag, uses_carry, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void scratch_cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t compute_overflow,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_overflowing_sub<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_borrow_prop_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
compute_overflow, allocate_gpu_memory);
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array, void *carry_out, const void *carry_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks,
|
||||
uint32_t requested_flag, uint32_t uses_carry) {
|
||||
|
||||
void *lwe_array, void *carry_out, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks) {
|
||||
host_propagate_single_carry<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array), static_cast<uint64_t *>(carry_out),
|
||||
static_cast<const uint64_t *>(carry_in),
|
||||
(int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks, requested_flag, uses_carry);
|
||||
nullptr, (int_sc_prop_memory<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), num_blocks);
|
||||
}
|
||||
|
||||
void cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void cuda_propagate_single_carry_get_input_carries_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lhs_array, const void *rhs_array, void *carry_out,
|
||||
const void *carry_in, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks, uint32_t requested_flag, uint32_t uses_carry) {
|
||||
|
||||
host_add_and_propagate_single_carry<uint64_t>(
|
||||
void *lwe_array, void *carry_out, void *input_carries, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks) {
|
||||
host_propagate_single_carry<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lhs_array),
|
||||
static_cast<const uint64_t *>(rhs_array),
|
||||
static_cast<uint64_t *>(carry_out),
|
||||
static_cast<const uint64_t *>(carry_in),
|
||||
static_cast<uint64_t *>(lwe_array), static_cast<uint64_t *>(carry_out),
|
||||
static_cast<uint64_t *>(input_carries),
|
||||
(int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
|
||||
void cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lhs_array, const void *rhs_array, void *overflow_block,
|
||||
const void *input_borrow, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks, uint32_t compute_overflow,
|
||||
uint32_t uses_input_borrow) {
|
||||
|
||||
host_integer_overflowing_sub<uint64_t>(
|
||||
(cudaStream_t const *)streams, gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lhs_array), static_cast<uint64_t *>(lhs_array),
|
||||
static_cast<const uint64_t *>(rhs_array),
|
||||
static_cast<uint64_t *>(overflow_block),
|
||||
static_cast<const uint64_t *>(input_borrow),
|
||||
(int_borrow_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)ksks,
|
||||
num_blocks, compute_overflow, uses_input_borrow);
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_propagate_single_carry(void *const *streams,
|
||||
@@ -160,23 +94,6 @@ void cleanup_cuda_propagate_single_carry(void *const *streams,
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void cleanup_cuda_add_and_propagate_single_carry(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_sc_prop_memory<uint64_t> *mem_ptr =
|
||||
(int_sc_prop_memory<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
void cleanup_cuda_integer_overflowing_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_borrow_prop_memory<uint64_t> *mem_ptr =
|
||||
(int_borrow_prop_memory<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
@@ -225,14 +142,14 @@ void cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *output_radix_lwe, void const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
host_apply_many_univariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<const uint64_t *>(input_radix_lwe),
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks, num_blocks,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -67,12 +67,11 @@ void generate_ids_update_degrees(int *terms_degree, size_t *h_lwe_idx_in,
|
||||
*/
|
||||
void scratch_cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, bool const is_boolean_left, bool const is_boolean_right,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t pbs_base_log,
|
||||
uint32_t pbs_level, uint32_t ks_base_log, uint32_t ks_level,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
int8_t **mem_ptr, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
uint32_t glwe_dimension, uint32_t lwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t pbs_base_log, uint32_t pbs_level, uint32_t ks_base_log,
|
||||
uint32_t ks_level, uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
polynomial_size * glwe_dimension, lwe_dimension,
|
||||
@@ -89,8 +88,8 @@ void scratch_cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
case 16384:
|
||||
scratch_cuda_integer_mult_radix_ciphertext_kb<uint64_t>(
|
||||
(cudaStream_t const *)(streams), gpu_indexes, gpu_count,
|
||||
(int_mul_memory<uint64_t> **)mem_ptr, is_boolean_left, is_boolean_right,
|
||||
num_radix_blocks, params, allocate_gpu_memory);
|
||||
(int_mul_memory<uint64_t> **)mem_ptr, num_radix_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
|
||||
@@ -127,66 +126,65 @@ void scratch_cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
*/
|
||||
void cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *radix_lwe_out, void const *radix_lwe_left, bool const is_bool_left,
|
||||
void const *radix_lwe_right, bool const is_bool_right, void *const *bsks,
|
||||
void *const *ksks, int8_t *mem_ptr, uint32_t polynomial_size,
|
||||
uint32_t num_blocks) {
|
||||
void *radix_lwe_out, void const *radix_lwe_left,
|
||||
void const *radix_lwe_right, void *const *bsks, void *const *ksks,
|
||||
int8_t *mem_ptr, uint32_t polynomial_size, uint32_t num_blocks) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<256>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<const uint64_t *>(radix_lwe_left), is_bool_left,
|
||||
static_cast<const uint64_t *>(radix_lwe_right), is_bool_right, bsks,
|
||||
static_cast<const uint64_t *>(radix_lwe_left),
|
||||
static_cast<const uint64_t *>(radix_lwe_right), bsks,
|
||||
(uint64_t **)(ksks), (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 512:
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<512>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<const uint64_t *>(radix_lwe_left), is_bool_left,
|
||||
static_cast<const uint64_t *>(radix_lwe_right), is_bool_right, bsks,
|
||||
static_cast<const uint64_t *>(radix_lwe_left),
|
||||
static_cast<const uint64_t *>(radix_lwe_right), bsks,
|
||||
(uint64_t **)(ksks), (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 1024:
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<1024>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<const uint64_t *>(radix_lwe_left), is_bool_left,
|
||||
static_cast<const uint64_t *>(radix_lwe_right), is_bool_right, bsks,
|
||||
static_cast<const uint64_t *>(radix_lwe_left),
|
||||
static_cast<const uint64_t *>(radix_lwe_right), bsks,
|
||||
(uint64_t **)(ksks), (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 2048:
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<2048>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<const uint64_t *>(radix_lwe_left), is_bool_left,
|
||||
static_cast<const uint64_t *>(radix_lwe_right), is_bool_right, bsks,
|
||||
static_cast<const uint64_t *>(radix_lwe_left),
|
||||
static_cast<const uint64_t *>(radix_lwe_right), bsks,
|
||||
(uint64_t **)(ksks), (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 4096:
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<4096>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<const uint64_t *>(radix_lwe_left), is_bool_left,
|
||||
static_cast<const uint64_t *>(radix_lwe_right), is_bool_right, bsks,
|
||||
static_cast<const uint64_t *>(radix_lwe_left),
|
||||
static_cast<const uint64_t *>(radix_lwe_right), bsks,
|
||||
(uint64_t **)(ksks), (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 8192:
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<8192>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<const uint64_t *>(radix_lwe_left), is_bool_left,
|
||||
static_cast<const uint64_t *>(radix_lwe_right), is_bool_right, bsks,
|
||||
static_cast<const uint64_t *>(radix_lwe_left),
|
||||
static_cast<const uint64_t *>(radix_lwe_right), bsks,
|
||||
(uint64_t **)(ksks), (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 16384:
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<16384>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<const uint64_t *>(radix_lwe_left), is_bool_left,
|
||||
static_cast<const uint64_t *>(radix_lwe_right), is_bool_right, bsks,
|
||||
static_cast<const uint64_t *>(radix_lwe_left),
|
||||
static_cast<const uint64_t *>(radix_lwe_right), bsks,
|
||||
(uint64_t **)(ksks), (int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include "crypto/keyswitch.cuh"
|
||||
#include "device.h"
|
||||
#include "helper_multi_gpu.h"
|
||||
#include "integer/cmux.cuh"
|
||||
#include "integer/integer.cuh"
|
||||
#include "integer/integer_utilities.h"
|
||||
#include "linear_algebra.h"
|
||||
@@ -209,7 +208,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
auto small_lwe_size = small_lwe_dimension + 1;
|
||||
|
||||
// In the case of extracting a single LWE this parameters are dummy
|
||||
uint32_t num_many_lut = 1;
|
||||
uint32_t lut_count = 1;
|
||||
uint32_t lut_stride = 0;
|
||||
|
||||
if (num_radix_in_vec == 0)
|
||||
@@ -370,7 +369,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
glwe_dimension, small_lwe_dimension, polynomial_size,
|
||||
mem_ptr->params.pbs_base_log, mem_ptr->params.pbs_level,
|
||||
mem_ptr->params.grouping_factor, total_count,
|
||||
mem_ptr->params.pbs_type, num_many_lut, lut_stride);
|
||||
mem_ptr->params.pbs_type, lut_count, lut_stride);
|
||||
} else {
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
@@ -418,7 +417,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
glwe_dimension, small_lwe_dimension, polynomial_size,
|
||||
mem_ptr->params.pbs_base_log, mem_ptr->params.pbs_level,
|
||||
mem_ptr->params.grouping_factor, total_count,
|
||||
mem_ptr->params.pbs_type, num_many_lut, lut_stride);
|
||||
mem_ptr->params.pbs_type, lut_count, lut_stride);
|
||||
|
||||
multi_gpu_gather_lwe_async<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, new_blocks, lwe_after_pbs_vec,
|
||||
@@ -454,8 +453,7 @@ template <typename Torus, class params>
|
||||
__host__ void host_integer_mult_radix_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, uint64_t *radix_lwe_out, uint64_t const *radix_lwe_left,
|
||||
bool const is_bool_left, uint64_t const *radix_lwe_right,
|
||||
bool const is_bool_right, void *const *bsks, uint64_t *const *ksks,
|
||||
uint64_t const *radix_lwe_right, void *const *bsks, uint64_t *const *ksks,
|
||||
int_mul_memory<Torus> *mem_ptr, uint32_t num_blocks) {
|
||||
|
||||
auto glwe_dimension = mem_ptr->params.glwe_dimension;
|
||||
@@ -466,20 +464,6 @@ __host__ void host_integer_mult_radix_kb(
|
||||
|
||||
int big_lwe_dimension = glwe_dimension * polynomial_size;
|
||||
|
||||
if (is_bool_right) {
|
||||
zero_out_if<Torus>(streams, gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_left, radix_lwe_right, mem_ptr->zero_out_mem,
|
||||
mem_ptr->zero_out_predicate_lut, bsks, ksks, num_blocks);
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_bool_left) {
|
||||
zero_out_if<Torus>(streams, gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_right, radix_lwe_left, mem_ptr->zero_out_mem,
|
||||
mem_ptr->zero_out_predicate_lut, bsks, ksks, num_blocks);
|
||||
return;
|
||||
}
|
||||
|
||||
// 'vector_result_lsb' contains blocks from all possible right shifts of
|
||||
// radix_lwe_left, only nonzero blocks are kept
|
||||
int lsb_vector_block_count = num_blocks * (num_blocks + 1) / 2;
|
||||
@@ -578,26 +562,19 @@ __host__ void host_integer_mult_radix_kb(
|
||||
terms_degree, bsks, ksks, mem_ptr->sum_ciphertexts_mem, num_blocks,
|
||||
2 * num_blocks, mem_ptr->luts_array);
|
||||
|
||||
uint32_t block_modulus = message_modulus * carry_modulus;
|
||||
uint32_t num_bits_in_block = log2_int(block_modulus);
|
||||
|
||||
auto scp_mem_ptr = mem_ptr->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, radix_lwe_out, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
auto scp_mem_ptr = mem_ptr->sum_ciphertexts_mem->scp_mem;
|
||||
host_propagate_single_carry<Torus>(streams, gpu_indexes, gpu_count,
|
||||
radix_lwe_out, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, num_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void scratch_cuda_integer_mult_radix_ciphertext_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_mul_memory<Torus> **mem_ptr,
|
||||
bool const is_boolean_left, bool const is_boolean_right,
|
||||
uint32_t num_radix_blocks, int_radix_params params,
|
||||
bool allocate_gpu_memory) {
|
||||
*mem_ptr = new int_mul_memory<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
is_boolean_left, is_boolean_right,
|
||||
num_radix_blocks, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
|
||||
@@ -12,3 +12,49 @@ void cuda_negate_integer_radix_ciphertext_64(
|
||||
static_cast<const uint64_t *>(lwe_array_in), lwe_dimension,
|
||||
lwe_ciphertext_count, message_modulus, carry_modulus);
|
||||
}
|
||||
|
||||
void scratch_cuda_integer_radix_overflowing_sub_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_overflowing_sub_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_overflowing_sub_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_radix_overflowing_sub_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *radix_lwe_out, void *radix_lwe_overflowed, void const *radix_lwe_left,
|
||||
void const *radix_lwe_right, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks) {
|
||||
|
||||
auto mem = (int_overflowing_sub_memory<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_overflowing_sub_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_overflowed),
|
||||
static_cast<const uint64_t *>(radix_lwe_left),
|
||||
static_cast<const uint64_t *>(radix_lwe_right), bsks, (uint64_t **)(ksks),
|
||||
mem, num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_radix_overflowing_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_overflowing_sub_memory<uint64_t> *mem_ptr =
|
||||
(int_overflowing_sub_memory<uint64_t> *)(*mem_ptr_void);
|
||||
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ __host__ void scratch_cuda_integer_overflowing_sub_kb(
|
||||
*mem_ptr = new int_overflowing_sub_memory<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_blocks, allocate_gpu_memory);
|
||||
}
|
||||
/*
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_overflowing_sub_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
@@ -113,39 +113,4 @@ __host__ void host_integer_overflowing_sub_kb(
|
||||
mem_ptr, bsks, ksks, num_blocks);
|
||||
}
|
||||
|
||||
*/
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_overflowing_sub(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_out_array, Torus *lhs_array,
|
||||
const Torus *rhs_array, Torus *overflow_block, const Torus *input_borrow,
|
||||
int_borrow_prop_memory<uint64_t> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_blocks, uint32_t compute_overflow,
|
||||
uint32_t uses_input_borrow) {
|
||||
|
||||
auto radix_params = mem_ptr->params;
|
||||
|
||||
// We need to recalculate the num_groups, because on the division the number
|
||||
// of num_blocks changes
|
||||
uint32_t block_modulus =
|
||||
radix_params.message_modulus * radix_params.carry_modulus;
|
||||
uint32_t num_bits_in_block = log2_int(block_modulus);
|
||||
uint32_t grouping_size = num_bits_in_block;
|
||||
uint32_t num_groups = (num_blocks + grouping_size - 1) / grouping_size;
|
||||
|
||||
auto stream = (cudaStream_t *)streams;
|
||||
host_unchecked_sub_with_correcting_term<Torus>(
|
||||
stream[0], gpu_indexes[0], static_cast<Torus *>(lwe_out_array),
|
||||
static_cast<Torus *>(lhs_array), static_cast<const Torus *>(rhs_array),
|
||||
radix_params.big_lwe_dimension, num_blocks, radix_params.message_modulus,
|
||||
radix_params.carry_modulus, radix_params.message_modulus - 1);
|
||||
|
||||
host_single_borrow_propagate<Torus>(
|
||||
streams, gpu_indexes, gpu_count, static_cast<Torus *>(lwe_out_array),
|
||||
static_cast<Torus *>(overflow_block),
|
||||
static_cast<const Torus *>(input_borrow),
|
||||
(int_borrow_prop_memory<Torus> *)mem_ptr, bsks, (Torus **)(ksks),
|
||||
num_blocks, num_groups, compute_overflow, uses_input_borrow);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -292,7 +292,7 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
Torus const *sign_block =
|
||||
lwe_array_in + (total_num_radix_blocks - 1) * big_lwe_size;
|
||||
|
||||
auto sign_bit_pos = (int)log2_int(message_modulus) - 1;
|
||||
auto sign_bit_pos = (int)std::log2(message_modulus) - 1;
|
||||
|
||||
auto scalar_last_leaf_with_respect_to_zero_lut_f =
|
||||
[sign_handler_f, sign_bit_pos,
|
||||
|
||||
@@ -54,7 +54,7 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
uint32_t lwe_size = input_lwe_dimension + 1;
|
||||
uint32_t lwe_size_bytes = lwe_size * sizeof(T);
|
||||
uint32_t msg_bits = log2_int(message_modulus);
|
||||
uint32_t msg_bits = (uint32_t)std::log2(message_modulus);
|
||||
uint32_t num_ciphertext_bits = msg_bits * num_radix_blocks;
|
||||
|
||||
T *preshifted_buffer = mem->preshifted_buffer;
|
||||
@@ -112,12 +112,10 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
terms_degree, bsks, ksks, mem->sum_ciphertexts_vec_mem,
|
||||
num_radix_blocks, j, nullptr);
|
||||
|
||||
auto scp_mem_ptr = mem->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<T>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, num_radix_blocks, requested_flag, uses_carry);
|
||||
auto scp_mem_ptr = mem->sum_ciphertexts_vec_mem->scp_mem;
|
||||
host_propagate_single_carry<T>(streams, gpu_indexes, gpu_count, lwe_array,
|
||||
nullptr, nullptr, scp_mem_ptr, bsks, ksks,
|
||||
num_radix_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
|
||||
size_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
|
||||
size_t num_bits_in_message = (size_t)log2_int(message_modulus);
|
||||
size_t num_bits_in_message = (size_t)log2(message_modulus);
|
||||
size_t total_num_bits = num_bits_in_message * num_blocks;
|
||||
n = n % total_num_bits;
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(
|
||||
size_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
|
||||
size_t num_bits_in_block = (size_t)log2_int(message_modulus);
|
||||
size_t num_bits_in_block = (size_t)log2(message_modulus);
|
||||
size_t total_num_bits = num_bits_in_block * num_blocks;
|
||||
shift = shift % total_num_bits;
|
||||
|
||||
@@ -141,7 +141,7 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
|
||||
size_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
|
||||
size_t num_bits_in_block = (size_t)log2_int(message_modulus);
|
||||
size_t num_bits_in_block = (size_t)log2(message_modulus);
|
||||
size_t total_num_bits = num_bits_in_block * num_blocks;
|
||||
shift = shift % total_num_bits;
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
|
||||
uint32_t gpu_count, Torus *lwe_array, Torus const *lwe_shift,
|
||||
int_shift_and_rotate_buffer<Torus> *mem, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
uint32_t bits_per_block = log2_int(mem->params.message_modulus);
|
||||
uint32_t bits_per_block = std::log2(mem->params.message_modulus);
|
||||
uint32_t total_nb_bits = bits_per_block * num_radix_blocks;
|
||||
if (total_nb_bits == 0)
|
||||
return;
|
||||
@@ -55,7 +55,7 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
|
||||
// then the behaviour of shifting won't be the same
|
||||
// if shift >= total_nb_bits compared to when total_nb_bits
|
||||
// is a power of two, as will 'capture' more bits in `shift_bits`
|
||||
uint32_t max_num_bits_that_tell_shift = log2_int(total_nb_bits);
|
||||
uint32_t max_num_bits_that_tell_shift = std::log2(total_nb_bits);
|
||||
if (!is_power_of_two(total_nb_bits))
|
||||
max_num_bits_that_tell_shift += 1;
|
||||
// Extracts bits and put them in the bit index 2 (=> bit number 3)
|
||||
|
||||
@@ -57,7 +57,6 @@ void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
static_cast<const uint64_t *>(lwe_array_in_2),
|
||||
input_lwe_dimension, input_lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform the addition of a u32 input LWE ciphertext vector with a u32
|
||||
* plaintext vector. See the equivalent operation on u64 data for more details.
|
||||
|
||||
@@ -82,46 +82,6 @@ __host__ void host_addition(cudaStream_t stream, uint32_t gpu_index, T *output,
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void pack_for_overflowing_ops(T *output, T const *input_1,
|
||||
T const *input_2, uint32_t num_entries,
|
||||
uint32_t message_modulus) {
|
||||
|
||||
int tid = threadIdx.x;
|
||||
int index = blockIdx.x * blockDim.x + tid;
|
||||
if (index < num_entries) {
|
||||
// Here we take advantage of the wrapping behaviour of uint
|
||||
output[index] = input_1[index] * message_modulus + input_2[index];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void host_pack_for_overflowing_ops(cudaStream_t stream,
|
||||
uint32_t gpu_index, T *output,
|
||||
T const *input_1, T const *input_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus) {
|
||||
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = lwe_size;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
pack_for_overflowing_ops<T><<<grid, thds, 0, stream>>>(
|
||||
&output[(input_lwe_ciphertext_count - 1) * lwe_size],
|
||||
&input_1[(input_lwe_ciphertext_count - 1) * lwe_size],
|
||||
&input_2[(input_lwe_ciphertext_count - 1) * lwe_size], lwe_size,
|
||||
message_modulus);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void subtraction(T *output, T const *input_1, T const *input_2,
|
||||
uint32_t num_entries) {
|
||||
|
||||
@@ -92,7 +92,7 @@ void execute_pbs_async(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t grouping_factor,
|
||||
uint32_t input_lwe_ciphertext_count, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
switch (sizeof(Torus)) {
|
||||
case sizeof(uint32_t):
|
||||
@@ -126,7 +126,7 @@ void execute_pbs_async(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
current_lwe_array_in, current_lwe_input_indexes,
|
||||
bootstrapping_keys[i], pbs_buffer[i], lwe_dimension, glwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_inputs_on_gpu,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -165,7 +165,7 @@ void execute_pbs_async(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
current_lwe_array_in, current_lwe_input_indexes,
|
||||
bootstrapping_keys[i], pbs_buffer[i], lwe_dimension, glwe_dimension,
|
||||
polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_inputs_on_gpu, num_many_lut, lut_stride);
|
||||
num_inputs_on_gpu, lut_count, lut_stride);
|
||||
}
|
||||
break;
|
||||
case CLASSICAL:
|
||||
@@ -194,7 +194,7 @@ void execute_pbs_async(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
current_lwe_array_in, current_lwe_input_indexes,
|
||||
bootstrapping_keys[i], pbs_buffer[i], lwe_dimension, glwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_inputs_on_gpu,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -45,7 +45,7 @@ __global__ void device_programmable_bootstrap_cg(
|
||||
const double2 *__restrict__ bootstrapping_key, double2 *join_buffer,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, int8_t *device_mem,
|
||||
uint64_t device_memory_size_per_block, uint32_t num_many_lut,
|
||||
uint64_t device_memory_size_per_block, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
grid_group grid = this_grid();
|
||||
@@ -152,8 +152,8 @@ __global__ void device_programmable_bootstrap_cg(
|
||||
// but we do the computation at block 0 to avoid waiting for extra blocks,
|
||||
// in case they're not synchronized
|
||||
sample_extract_mask<Torus, params>(block_lwe_array_out, accumulator);
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
(i * gridDim.z * (glwe_dimension * polynomial_size + 1));
|
||||
@@ -168,8 +168,8 @@ __global__ void device_programmable_bootstrap_cg(
|
||||
}
|
||||
} else if (blockIdx.y == glwe_dimension) {
|
||||
sample_extract_body<Torus, params>(block_lwe_array_out, accumulator, 0);
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
@@ -235,7 +235,7 @@ __host__ void host_programmable_bootstrap_cg(
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
// With SM each block corresponds to either the mask or body, no need to
|
||||
// duplicate data for each
|
||||
@@ -273,7 +273,7 @@ __host__ void host_programmable_bootstrap_cg(
|
||||
kernel_args[10] = &base_log;
|
||||
kernel_args[11] = &level_count;
|
||||
kernel_args[12] = &d_mem;
|
||||
kernel_args[14] = &num_many_lut;
|
||||
kernel_args[14] = &lut_count;
|
||||
kernel_args[15] = &lut_stride;
|
||||
|
||||
if (max_shared_memory < partial_sm) {
|
||||
|
||||
@@ -32,8 +32,7 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
uint32_t level_count, uint32_t grouping_factor, uint32_t lwe_offset,
|
||||
uint32_t lwe_chunk_size, uint32_t keybundle_size_per_input,
|
||||
int8_t *device_mem, uint64_t device_memory_size_per_block,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
grid_group grid = this_grid();
|
||||
|
||||
// We use shared memory for the polynomials that are used often during the
|
||||
@@ -135,8 +134,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
// default
|
||||
sample_extract_mask<Torus, params>(block_lwe_array_out, accumulator);
|
||||
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
(i * gridDim.z * (glwe_dimension * polynomial_size + 1));
|
||||
@@ -154,8 +153,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
|
||||
sample_extract_body<Torus, params>(block_lwe_array_out, accumulator, 0);
|
||||
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
@@ -294,7 +293,7 @@ __host__ void execute_cg_external_product_loop(
|
||||
Torus const *lwe_output_indexes, pbs_buffer<Torus, MULTI_BIT> *buffer,
|
||||
uint32_t num_samples, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t lwe_offset, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t lwe_offset, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
uint64_t full_sm =
|
||||
@@ -344,7 +343,7 @@ __host__ void execute_cg_external_product_loop(
|
||||
kernel_args[16] = &chunk_size;
|
||||
kernel_args[17] = &keybundle_size_per_input;
|
||||
kernel_args[18] = &d_mem;
|
||||
kernel_args[20] = &num_many_lut;
|
||||
kernel_args[20] = &lut_count;
|
||||
kernel_args[21] = &lut_stride;
|
||||
|
||||
dim3 grid_accumulate(level_count, glwe_dimension + 1, num_samples);
|
||||
@@ -380,7 +379,7 @@ __host__ void host_cg_multi_bit_programmable_bootstrap(
|
||||
pbs_buffer<Torus, MULTI_BIT> *buffer, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
auto lwe_chunk_size = buffer->lwe_chunk_size;
|
||||
|
||||
@@ -398,7 +397,7 @@ __host__ void host_cg_multi_bit_programmable_bootstrap(
|
||||
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, lwe_array_out, lwe_output_indexes, buffer,
|
||||
num_samples, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
grouping_factor, base_log, level_count, lwe_offset, num_many_lut,
|
||||
grouping_factor, base_log, level_count, lwe_offset, lut_count,
|
||||
lut_stride);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -123,7 +123,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
@@ -133,7 +133,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap_tbc<Torus, Degree<512>>(
|
||||
@@ -141,7 +141,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap_tbc<Torus, Degree<1024>>(
|
||||
@@ -149,7 +149,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 2048:
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<2048>>(
|
||||
@@ -157,7 +157,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 4096:
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<4096>>(
|
||||
@@ -165,7 +165,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<8192>>(
|
||||
@@ -173,7 +173,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<16384>>(
|
||||
@@ -181,7 +181,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (classical PBS): unsupported polynomial size. "
|
||||
@@ -380,7 +380,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
@@ -390,7 +390,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap_cg<Torus, Degree<512>>(
|
||||
@@ -398,7 +398,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap_cg<Torus, Degree<1024>>(
|
||||
@@ -406,7 +406,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 2048:
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<2048>>(
|
||||
@@ -414,7 +414,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 4096:
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<4096>>(
|
||||
@@ -422,7 +422,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<8192>>(
|
||||
@@ -430,7 +430,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<16384>>(
|
||||
@@ -438,7 +438,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (classical PBS): unsupported polynomial size. "
|
||||
@@ -455,7 +455,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
@@ -465,7 +465,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap<Torus, Degree<512>>(
|
||||
@@ -473,7 +473,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap<Torus, Degree<1024>>(
|
||||
@@ -481,7 +481,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 2048:
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<2048>>(
|
||||
@@ -489,7 +489,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 4096:
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<4096>>(
|
||||
@@ -497,7 +497,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<8192>>(
|
||||
@@ -505,7 +505,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<16384>>(
|
||||
@@ -513,7 +513,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (classical PBS): unsupported polynomial size. "
|
||||
@@ -531,7 +531,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t num_samples, uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
if (base_log > 32)
|
||||
PANIC("Cuda error (classical PBS): base log should be <= 32")
|
||||
@@ -551,7 +551,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
static_cast<const uint32_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
#else
|
||||
PANIC("Cuda error (PBS): TBC pbs is not supported.")
|
||||
@@ -566,7 +566,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
static_cast<const uint32_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case DEFAULT:
|
||||
cuda_programmable_bootstrap_lwe_ciphertext_vector<uint32_t>(
|
||||
@@ -578,7 +578,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
static_cast<const uint32_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (PBS): unknown pbs variant.")
|
||||
@@ -653,11 +653,9 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t num_samples, uint32_t lut_count, uint32_t lut_stride) {
|
||||
if (base_log > 64)
|
||||
PANIC("Cuda error (classical PBS): base log should be <= 64")
|
||||
if ((glwe_dimension + 1) * level_count > 8)
|
||||
PANIC("Cuda error (multi-bit PBS): (k + 1)*l should be <= 8")
|
||||
|
||||
pbs_buffer<uint64_t, CLASSICAL> *buffer =
|
||||
(pbs_buffer<uint64_t, CLASSICAL> *)mem_ptr;
|
||||
@@ -674,7 +672,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
#else
|
||||
PANIC("Cuda error (PBS): TBC pbs is not supported.")
|
||||
@@ -689,7 +687,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
case PBS_VARIANT::DEFAULT:
|
||||
cuda_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
@@ -701,7 +699,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (PBS): unknown pbs variant.")
|
||||
@@ -729,7 +727,7 @@ template void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector<uint64_t>(
|
||||
uint64_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint64_t, CLASSICAL> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template void cuda_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
@@ -739,7 +737,7 @@ template void cuda_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
uint64_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint64_t, CLASSICAL> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template void scratch_cuda_programmable_bootstrap_cg<uint64_t>(
|
||||
@@ -760,7 +758,7 @@ template void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector<uint32_t>(
|
||||
uint32_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint32_t, CLASSICAL> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template void cuda_programmable_bootstrap_lwe_ciphertext_vector<uint32_t>(
|
||||
@@ -770,7 +768,7 @@ template void cuda_programmable_bootstrap_lwe_ciphertext_vector<uint32_t>(
|
||||
uint32_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint32_t, CLASSICAL> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template void scratch_cuda_programmable_bootstrap_cg<uint32_t>(
|
||||
@@ -799,7 +797,7 @@ template void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector<uint32_t>(
|
||||
uint32_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint32_t, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride);
|
||||
template void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector<uint64_t>(
|
||||
void *stream, uint32_t gpu_index, uint64_t *lwe_array_out,
|
||||
@@ -808,7 +806,7 @@ template void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector<uint64_t>(
|
||||
uint64_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint64_t, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride);
|
||||
template void scratch_cuda_programmable_bootstrap_tbc<uint32_t>(
|
||||
void *stream, uint32_t gpu_index,
|
||||
|
||||
@@ -142,7 +142,7 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
uint32_t lwe_iteration, uint32_t lwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
int8_t *device_mem, uint64_t device_memory_size_per_block,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
// We use shared memory for the polynomials that are used often during the
|
||||
// bootstrap, since shared memory is kept in L1 cache and accessing it is
|
||||
@@ -217,8 +217,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
// but we do the computation at block 0 to avoid waiting for extra blocks,
|
||||
// in case they're not synchronized
|
||||
sample_extract_mask<Torus, params>(block_lwe_array_out, accumulator);
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
(i * gridDim.x * (glwe_dimension * polynomial_size + 1));
|
||||
@@ -233,8 +233,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
}
|
||||
} else if (blockIdx.y == glwe_dimension) {
|
||||
sample_extract_body<Torus, params>(block_lwe_array_out, accumulator, 0);
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
@@ -412,8 +412,8 @@ __host__ void execute_step_two(
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, int8_t *d_mem, int lwe_iteration, uint64_t partial_sm,
|
||||
uint64_t partial_dm, uint64_t full_sm, uint64_t full_dm,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint64_t partial_dm, uint64_t full_sm, uint64_t full_dm, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
int max_shared_memory = cuda_get_max_shared_memory(0);
|
||||
cudaSetDevice(gpu_index);
|
||||
@@ -426,21 +426,21 @@ __host__ void execute_step_two(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
bootstrapping_key, global_accumulator, global_join_buffer,
|
||||
lwe_iteration, lwe_dimension, polynomial_size, base_log,
|
||||
level_count, d_mem, full_dm, num_many_lut, lut_stride);
|
||||
level_count, d_mem, full_dm, lut_count, lut_stride);
|
||||
} else if (max_shared_memory < full_sm) {
|
||||
device_programmable_bootstrap_step_two<Torus, params, PARTIALSM>
|
||||
<<<grid, thds, partial_sm, stream>>>(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
bootstrapping_key, global_accumulator, global_join_buffer,
|
||||
lwe_iteration, lwe_dimension, polynomial_size, base_log,
|
||||
level_count, d_mem, partial_dm, num_many_lut, lut_stride);
|
||||
level_count, d_mem, partial_dm, lut_count, lut_stride);
|
||||
} else {
|
||||
device_programmable_bootstrap_step_two<Torus, params, FULLSM>
|
||||
<<<grid, thds, full_sm, stream>>>(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
bootstrapping_key, global_accumulator, global_join_buffer,
|
||||
lwe_iteration, lwe_dimension, polynomial_size, base_log,
|
||||
level_count, d_mem, 0, num_many_lut, lut_stride);
|
||||
level_count, d_mem, 0, lut_count, lut_stride);
|
||||
}
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
@@ -456,7 +456,7 @@ __host__ void host_programmable_bootstrap(
|
||||
pbs_buffer<Torus, CLASSICAL> *pbs_buffer, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
// With SM each block corresponds to either the mask or body, no need to
|
||||
@@ -493,7 +493,7 @@ __host__ void host_programmable_bootstrap(
|
||||
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
|
||||
partial_sm, partial_dm_step_two, full_sm_step_two, full_dm_step_two,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
@@ -76,7 +76,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<512>>(
|
||||
@@ -84,7 +84,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<1024>>(
|
||||
@@ -92,7 +92,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 2048:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<2048>>(
|
||||
@@ -100,7 +100,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 4096:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<4096>>(
|
||||
@@ -108,7 +108,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<8192>>(
|
||||
@@ -116,7 +116,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<16384>>(
|
||||
@@ -124,7 +124,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (multi-bit PBS): unsupported polynomial size. Supported "
|
||||
@@ -142,7 +142,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
@@ -151,7 +151,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<512>>(
|
||||
@@ -159,7 +159,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<1024>>(
|
||||
@@ -167,7 +167,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 2048:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<2048>>(
|
||||
@@ -175,7 +175,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 4096:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<4096>>(
|
||||
@@ -183,7 +183,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<8192>>(
|
||||
@@ -191,7 +191,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<16384>>(
|
||||
@@ -199,7 +199,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (multi-bit PBS): unsupported polynomial size. Supported "
|
||||
@@ -215,13 +215,11 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
if (base_log > 64)
|
||||
PANIC("Cuda error (multi-bit PBS): base log should be <= 64")
|
||||
if ((glwe_dimension + 1) * level_count > 8)
|
||||
PANIC("Cuda error (multi-bit PBS): (k + 1)*l should be <= 8")
|
||||
|
||||
pbs_buffer<uint64_t, MULTI_BIT> *buffer =
|
||||
(pbs_buffer<uint64_t, MULTI_BIT> *)mem_ptr;
|
||||
@@ -238,7 +236,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const uint64_t *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
#else
|
||||
PANIC("Cuda error (multi-bit PBS): TBC pbs is not supported.")
|
||||
@@ -253,7 +251,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const uint64_t *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case PBS_VARIANT::DEFAULT:
|
||||
cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
@@ -265,7 +263,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const uint64_t *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (multi-bit PBS): unsupported implementation variant.")
|
||||
@@ -467,7 +465,7 @@ uint32_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
|
||||
#if CUDA_ARCH < 900
|
||||
// We pick a smaller divisor on GPUs other than H100, so 256-bit integer
|
||||
// multiplication can run
|
||||
int log2_max_num_pbs = log2_int(max_num_pbs);
|
||||
int log2_max_num_pbs = std::log2(max_num_pbs);
|
||||
if (log2_max_num_pbs > 13)
|
||||
ith_divisor = log2_max_num_pbs - 11;
|
||||
#endif
|
||||
@@ -501,7 +499,7 @@ cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
pbs_buffer<uint64_t, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
|
||||
template void scratch_cuda_cg_multi_bit_programmable_bootstrap<uint64_t>(
|
||||
void *stream, uint32_t gpu_index,
|
||||
@@ -518,7 +516,7 @@ cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
pbs_buffer<uint64_t, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
|
||||
template bool
|
||||
has_support_to_cuda_programmable_bootstrap_tbc_multi_bit<uint64_t>(
|
||||
@@ -590,7 +588,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
if (base_log > 32)
|
||||
PANIC("Cuda error (multi-bit PBS): base log should be <= 32")
|
||||
@@ -602,7 +600,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_tbc_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<512>>(
|
||||
@@ -610,7 +608,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_tbc_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<1024>>(
|
||||
@@ -618,7 +616,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 2048: {
|
||||
int num_sms = 0;
|
||||
@@ -631,14 +629,14 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log,
|
||||
level_count, num_samples, num_many_lut, lut_stride);
|
||||
level_count, num_samples, lut_count, lut_stride);
|
||||
else
|
||||
host_tbc_multi_bit_programmable_bootstrap<Torus, Degree<2048>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log,
|
||||
level_count, num_samples, num_many_lut, lut_stride);
|
||||
level_count, num_samples, lut_count, lut_stride);
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -648,7 +646,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_tbc_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<8192>>(
|
||||
@@ -656,7 +654,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_tbc_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<16384>>(
|
||||
@@ -664,7 +662,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
num_samples, lut_count, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (multi-bit PBS): unsupported polynomial size. Supported "
|
||||
@@ -687,5 +685,5 @@ cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
pbs_buffer<uint64_t, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
#endif
|
||||
|
||||
@@ -253,7 +253,7 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t grouping_factor, uint32_t iteration, uint32_t lwe_offset,
|
||||
uint32_t lwe_chunk_size, int8_t *device_mem,
|
||||
uint64_t device_memory_size_per_block, uint32_t num_many_lut,
|
||||
uint64_t device_memory_size_per_block, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
// We use shared memory for the polynomials that are used often during the
|
||||
// bootstrap, since shared memory is kept in L1 cache and accessing it is
|
||||
@@ -326,8 +326,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
// but we do the computation at block 0 to avoid waiting for extra blocks,
|
||||
// in case they're not synchronized
|
||||
sample_extract_mask<Torus, params>(block_lwe_array_out, global_slice);
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
(i * gridDim.x * (glwe_dimension * polynomial_size + 1));
|
||||
@@ -342,8 +342,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
}
|
||||
} else if (blockIdx.y == glwe_dimension) {
|
||||
sample_extract_body<Torus, params>(block_lwe_array_out, global_slice, 0);
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
@@ -591,14 +591,12 @@ execute_step_one(cudaStream_t stream, uint32_t gpu_index,
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
__host__ void
|
||||
execute_step_two(cudaStream_t stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
Torus const *lwe_output_indexes,
|
||||
pbs_buffer<Torus, MULTI_BIT> *buffer, uint32_t num_samples,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, int32_t grouping_factor,
|
||||
uint32_t level_count, uint32_t j, uint32_t lwe_offset,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
__host__ void execute_step_two(
|
||||
cudaStream_t stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
Torus const *lwe_output_indexes, pbs_buffer<Torus, MULTI_BIT> *buffer,
|
||||
uint32_t num_samples, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, int32_t grouping_factor, uint32_t level_count,
|
||||
uint32_t j, uint32_t lwe_offset, uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
auto lwe_chunk_size = buffer->lwe_chunk_size;
|
||||
uint64_t full_sm_accumulate_step_two =
|
||||
@@ -623,7 +621,7 @@ execute_step_two(cudaStream_t stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
global_accumulator, global_accumulator_fft, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, level_count, grouping_factor, j,
|
||||
lwe_offset, lwe_chunk_size, d_mem, full_sm_accumulate_step_two,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
else
|
||||
device_multi_bit_programmable_bootstrap_accumulate_step_two<Torus, params,
|
||||
FULLSM>
|
||||
@@ -632,7 +630,7 @@ execute_step_two(cudaStream_t stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
global_accumulator, global_accumulator_fft, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, level_count,
|
||||
grouping_factor, j, lwe_offset, lwe_chunk_size, d_mem, 0,
|
||||
num_many_lut, lut_stride);
|
||||
lut_count, lut_stride);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
@@ -645,7 +643,7 @@ __host__ void host_multi_bit_programmable_bootstrap(
|
||||
pbs_buffer<Torus, MULTI_BIT> *buffer, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
auto lwe_chunk_size = buffer->lwe_chunk_size;
|
||||
|
||||
@@ -669,8 +667,7 @@ __host__ void host_multi_bit_programmable_bootstrap(
|
||||
execute_step_two<Torus, params>(
|
||||
stream, gpu_index, lwe_array_out, lwe_output_indexes, buffer,
|
||||
num_samples, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
grouping_factor, level_count, j, lwe_offset, num_many_lut,
|
||||
lut_stride);
|
||||
grouping_factor, level_count, j, lwe_offset, lut_count, lut_stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,8 +45,8 @@ __global__ void device_programmable_bootstrap_tbc(
|
||||
const double2 *__restrict__ bootstrapping_key, double2 *join_buffer,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, int8_t *device_mem,
|
||||
uint64_t device_memory_size_per_block, bool support_dsm,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint64_t device_memory_size_per_block, bool support_dsm, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
cluster_group cluster = this_cluster();
|
||||
|
||||
@@ -158,8 +158,8 @@ __global__ void device_programmable_bootstrap_tbc(
|
||||
// in case they're not synchronized
|
||||
sample_extract_mask<Torus, params>(block_lwe_array_out, accumulator);
|
||||
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
(i * gridDim.z * (glwe_dimension * polynomial_size + 1));
|
||||
@@ -175,8 +175,8 @@ __global__ void device_programmable_bootstrap_tbc(
|
||||
} else if (blockIdx.y == glwe_dimension) {
|
||||
sample_extract_body<Torus, params>(block_lwe_array_out, accumulator, 0);
|
||||
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
@@ -261,7 +261,7 @@ __host__ void host_programmable_bootstrap_tbc(
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
|
||||
auto supports_dsm =
|
||||
supports_distributed_shared_memory_on_classic_programmable_bootstrap<
|
||||
@@ -317,7 +317,7 @@ __host__ void host_programmable_bootstrap_tbc(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
lwe_array_in, lwe_input_indexes, bootstrapping_key, buffer_fft,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, d_mem, full_dm,
|
||||
supports_dsm, num_many_lut, lut_stride));
|
||||
supports_dsm, lut_count, lut_stride));
|
||||
} else if (max_shared_memory < full_sm + minimum_sm_tbc) {
|
||||
config.dynamicSmemBytes = partial_sm + minimum_sm_tbc;
|
||||
|
||||
@@ -326,7 +326,7 @@ __host__ void host_programmable_bootstrap_tbc(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
lwe_array_in, lwe_input_indexes, bootstrapping_key, buffer_fft,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, d_mem,
|
||||
partial_dm, supports_dsm, num_many_lut, lut_stride));
|
||||
partial_dm, supports_dsm, lut_count, lut_stride));
|
||||
} else {
|
||||
config.dynamicSmemBytes = full_sm + minimum_sm_tbc;
|
||||
|
||||
@@ -335,7 +335,7 @@ __host__ void host_programmable_bootstrap_tbc(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
lwe_array_in, lwe_input_indexes, bootstrapping_key, buffer_fft,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, d_mem, 0,
|
||||
supports_dsm, num_many_lut, lut_stride));
|
||||
supports_dsm, lut_count, lut_stride));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user