Compare commits

..

2 Commits

Author SHA1 Message Date
Thomas Montaigu
313f835f1f chore(shortint): add ks32 & prod params to tests
Make KS32 params the default, add them to tests,
and add a PARAM_PROD param alias which is still the KS-PBS
so that it's still tested.

Prod params are tested in a separate workflow to avoid growing the
time tests take.

BREAKING CHANGE: `PARAM_MESSAGE_2_CARRY_2` now returns `KeySwitch32PBSParameters`
instead of `ClassicPBSParameters`.
2026-02-09 11:43:42 +01:00
Arthur Meyre
9d14581d99 chore: remove integer test filter which is not relevant anymore 2026-02-09 11:42:58 +01:00
409 changed files with 19306 additions and 47923 deletions

View File

@@ -68,12 +68,6 @@ runs:
echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
sha256sum -c checksum
sudo dpkg -i "${CUDA_KEYRING_PACKAGE}"
# Disable unattended-upgrades to avoid lock issues
sudo systemctl disable --now unattended-upgrades
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo apt update
sudo apt -y install cuda-toolkit-"${TOOLKIT_VERSION}"

View File

@@ -14,7 +14,9 @@ env:
SLACKIFY_MARKDOWN: true
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -30,11 +32,38 @@ permissions:
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
jobs:
setup-instance:
name: aws_tfhe_backward_compat_tests/setup-instance
if:
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
github.event_name != 'push'
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-small
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
backward-compat-tests:
name: aws_tfhe_backward_compat_tests/backward-compat-tests (bpr)
if: (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
github.event_name != 'push'
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-small"
needs: [ setup-instance ]
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
concurrency:
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
@@ -54,7 +83,7 @@ jobs:
- name: Retrieve data from cache
id: retrieve-data-cache
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
uses: actions/cache/restore@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
with:
path: |
utils/tfhe-backward-compat-data/**/*.cbor
@@ -83,7 +112,7 @@ jobs:
- name: Store data in cache
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
continue-on-error: true
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
uses: actions/cache/save@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
with:
path: |
utils/tfhe-backward-compat-data/**/*.cbor
@@ -105,3 +134,27 @@ jobs:
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: aws_tfhe_backward_compat_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, backward-compat-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -15,7 +15,9 @@ env:
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -132,15 +134,41 @@ jobs:
run: |
echo "any_changed=true" >> "$GITHUB_OUTPUT"
fast-tests:
name: Fast CPU tests
needs: should-run
setup-instance:
name: aws_tfhe_fast_tests/setup-instance
if: github.event_name == 'workflow_dispatch' ||
(github.event_name != 'workflow_dispatch' && needs.should-run.outputs.any_file_changed == 'true')
needs: should-run
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-big
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
fast-tests:
name: Fast CPU tests
needs: [ should-run, setup-instance ]
concurrency:
group: ${{ github.workflow_ref }}
cancel-in-progress: true
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-big"
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -191,7 +219,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
uses: actions/cache/restore@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
with:
path: |
~/.nvm
@@ -204,7 +232,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
uses: actions/cache/save@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
@@ -261,3 +289,27 @@ jobs:
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: aws_tfhe_fast_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, fast-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -17,7 +17,9 @@ env:
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
NO_BIG_PARAMS: FALSE
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -69,18 +71,44 @@ jobs:
- tfhe/src/integer/**
- .github/workflows/aws_tfhe_integer_tests.yml
unsigned-integer-tests:
name: aws_tfhe_integer_tests/unsigned-integer-tests
setup-instance:
name: aws_tfhe_integer_tests/setup-instance
needs: should-run
if:
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-big
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
unsigned-integer-tests:
name: aws_tfhe_integer_tests/unsigned-integer-tests
needs: setup-instance
concurrency:
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-big"
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
timeout-minutes: 480 # 8 hours
steps:
- name: Checkout tfhe-rs
@@ -130,3 +158,27 @@ jobs:
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: aws_tfhe_integer_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [setup-instance, unsigned-integer-tests]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -13,7 +13,8 @@ env:
SLACKIFY_MARKDOWN: true
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -34,7 +35,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -99,7 +100,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -1,5 +1,5 @@
# Compile and test zk-cuda-backend
name: gpu_zk_tests
# Run a small subset of tests to ensure quick feedback.
name: aws_tfhe_param_prod_tests
env:
CARGO_TERM_COLOR: always
@@ -17,12 +17,13 @@ env:
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
types: [ labeled ]
permissions:
contents: read
@@ -31,15 +32,22 @@ permissions:
jobs:
should-run:
name: gpu_zk_tests/should-run
name: aws_tfhe_param_prod_tests/should-run
if: (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
permissions:
pull-requests: read # Needed to check for file change
pull-requests: read # Needed to check for file change
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
versionable_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.versionable_any_changed }}
shortint_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.shortint_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
with:
fetch-depth: 0
persist-credentials: 'false'
@@ -50,21 +58,34 @@ jobs:
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
with:
files_yaml: |
gpu:
dependencies:
- tfhe/Cargo.toml
- tfhe/build.rs
- backends/zk-cuda-backend/**
- tfhe/src/integer/gpu/zk/**
- tfhe-csprng/**
- tfhe-fft/**
- tfhe-zk-pok/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_zk_tests.yml'
- ci/slab.toml
- utils/tfhe-versionable/**
- utils/tfhe-versionable-derive/**
versionable:
- utils/tfhe-versionable/**
- utils/tfhe-versionable-derive/**
shortint:
- tfhe/src/core_crypto/**
- tfhe/src/shortint/**
- name: Aggregate file changes
id: aggregated-changes
if: ( steps.changed-files.outputs.dependencies_any_changed == 'true' ||
steps.changed-files.outputs.csprng_any_changed == 'true' ||
steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
steps.changed-files.outputs.versionable_any_changed == 'true' ||
steps.changed-files.outputs.core_crypto_any_changed == 'true' ||
steps.changed-files.outputs.shortint_any_changed == 'true' )
run: |
echo "any_changed=true" >> "$GITHUB_OUTPUT"
setup-instance:
name: gpu_zk_tests/setup-instance
needs: should-run
name: aws_tfhe_param_prod_tests/setup-instance
if: github.event_name == 'workflow_dispatch' ||
needs.should-run.outputs.gpu_test == 'true'
(github.event_name != 'workflow_dispatch' && needs.should-run.outputs.any_file_changed == 'true')
needs: should-run
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
@@ -72,14 +93,14 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: gpu-test
backend: aws
profile: cpu-big
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
@@ -88,85 +109,60 @@ jobs:
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
cuda-tests-linux:
name: gpu_zk_tests/cuda-tests-linux
param-prod-tests:
name: aws_tfhe_param_prod_tests/param-prod-tests
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow_ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
with:
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
- name: Gen Keys if required
if: needs.should-run.outputs.shortint_test == 'true'
run: |
nvidia-cuda-mps-control -d
make gen_key_cache
- name: Run zk-cuda-backend integration tests
- name: Run shortint tests
if: needs.should-run.outputs.shortint_test == 'true'
run: |
make test_zk_cuda_backend
make test_zk_pok_gpu
make test_integer_zk_gpu
make test_integer_zk_experimental_gpu
slack-notify:
name: gpu_zk_tests/slack-notify
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
continue-on-error: true
steps:
BIG_TESTS_INSTANCE=TRUE FAST_TESTS=FALSE make test_param_prod_shortint_ci
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'
- name: Slack Notification
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
continue-on-error: true
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "ZK GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: gpu_zk_tests/teardown-instance
name: aws_tfhe_param_prod_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
needs: [ setup-instance, param-prod-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -175,8 +171,8 @@ jobs:
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (param-prod-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -17,7 +17,9 @@ env:
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
NO_BIG_PARAMS: FALSE
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -70,18 +72,44 @@ jobs:
- tfhe/src/integer/**
- .github/workflows/aws_tfhe_signed_integer_tests.yml
signed-integer-tests:
name: aws_tfhe_signed_integer_tests/signed-integer-tests
setup-instance:
name: aws_tfhe_signed_integer_tests/setup-instance
needs: should-run
if:
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-big
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
signed-integer-tests:
name: aws_tfhe_signed_integer_tests/signed-integer-tests
needs: setup-instance
concurrency:
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-big"
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -134,3 +162,27 @@ jobs:
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: aws_tfhe_signed_integer_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [setup-instance, signed-integer-tests]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -14,7 +14,9 @@ env:
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -141,15 +143,43 @@ jobs:
run: |
echo "any_changed=true" >> "$GITHUB_OUTPUT"
cpu-tests:
name: aws_tfhe_tests/cpu-tests
needs: should-run
setup-instance:
name: aws_tfhe_tests/setup-instance
if: github.event_name != 'pull_request' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.any_file_changed == 'true')
needs: should-run
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-big
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
cpu-tests:
name: aws_tfhe_tests/cpu-tests
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
needs: [ should-run, setup-instance ]
concurrency:
group: ${{ github.workflow_ref }}_${{github.event_name}}
cancel-in-progress: true
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-big"
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -239,3 +269,27 @@ jobs:
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: aws_tfhe_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cpu-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -13,7 +13,9 @@ env:
SLACKIFY_MARKDOWN: true
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -27,53 +29,39 @@ permissions:
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
jobs:
should-run:
name: aws_tfhe_wasm_tests/should-run
if: github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved')
setup-instance:
name: aws_tfhe_wasm_tests/setup-instance
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
runs-on: ubuntu-latest
permissions:
pull-requests: read # Needed to check for file change
outputs:
wasm_test: ${{ github.event_name == 'workflow_dispatch' ||
steps.changed-files.outputs.wasm_any_changed }}
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-small
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
with:
files_yaml: |
wasm:
- Cargo.toml
- tfhe/Cargo.toml
- tfhe-csprng/**
- tfhe-fft/**
- tfhe-zk-pok/**
- tfhe/src/core_crypto/**
- tfhe/src/shortint/**
- tfhe/src/integer/**
- tfhe/src/high_level_api/**
- tfhe/src/js_on_wasm_api/**
- tfhe/js_on_wasm_tests/**
- tfhe/web_wasm_parallel_tests/**
- utils/tfhe-versionable/**
- .github/workflows/aws_tfhe_wasm_tests.yml
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
wasm-tests:
name: aws_tfhe_wasm_tests/wasm-tests
needs: should-run
if: github.event_name == 'workflow_dispatch' ||
(contains(github.event.label.name, 'approved') && needs.should-run.outputs.wasm_test == 'true')
needs: setup-instance
concurrency:
group: ${{ github.workflow_ref }}_${{github.event_name}}
cancel-in-progress: true
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-small"
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -92,7 +80,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
uses: actions/cache/restore@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
with:
path: |
~/.nvm
@@ -105,7 +93,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
uses: actions/cache/save@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
@@ -149,3 +137,27 @@ jobs:
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: aws_tfhe_wasm_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, wasm-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -14,7 +14,6 @@ on:
- signed_integer
- integer_compression
- integer_zk
- msm_zk
- shortint
- shortint_oprf
- hlapi_unsigned
@@ -22,7 +21,6 @@ on:
- hlapi_erc20
- hlapi_dex
- hlapi_noise_squash
- hlapi_kvstore
- tfhe_zk_pok
- boolean
- pbs

View File

@@ -126,7 +126,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -261,7 +261,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -33,7 +33,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -137,7 +137,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -36,7 +36,7 @@ jobs:
uses: ./.github/workflows/benchmark_cpu_common.yml
if: inputs.run-cpu-benchmarks
with:
command: integer,hlapi_erc20
command: integer
op_flavor: fast_default
bench_type: both
precisions_set: documentation
@@ -50,40 +50,6 @@ jobs:
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-cpu-zk-server:
name: benchmark_documentation/run-benchmarks-cpu-zk-server
uses: ./.github/workflows/benchmark_cpu_common.yml
if: inputs.run-cpu-benchmarks
with:
command: integer_zk
op_flavor: default
bench_type: both
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-cpu-zk-client:
name: benchmark_documentation/run-benchmarks-cpu-zk-client
uses: ./.github/workflows/benchmark_wasm_client_common.yml
if: inputs.run-cpu-benchmarks
with:
browser: chrome
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-gpu-integer:
name: benchmark_documentation/run-benchmarks-gpu-integer
uses: ./.github/workflows/benchmark_gpu_common.yml
@@ -91,7 +57,7 @@ jobs:
with:
profile: multi-h100-sxm5
hardware_name: n3-H100-SXM5x8
command: integer_multi_bit,hlapi_erc20
command: integer_multi_bit
op_flavor: fast_default
bench_type: both
precisions_set: documentation
@@ -110,7 +76,7 @@ jobs:
uses: ./.github/workflows/benchmark_hpu_common.yml
if: inputs.run-hpu-benchmarks
with:
command: integer,hlapi_erc20
command: integer
op_flavor: default
bench_type: both
precisions_set: documentation
@@ -172,7 +138,6 @@ jobs:
inputs.generate-svgs }}
needs: [
run-benchmarks-cpu-integer, run-benchmarks-gpu-integer, run-benchmarks-hpu-integer,
run-benchmarks-cpu-zk-server, run-benchmarks-cpu-zk-client,
run-benchmarks-cpu-core-crypto, run-benchmarks-gpu-core-crypto
]
uses: ./.github/workflows/generate_svgs.yml

View File

@@ -31,8 +31,6 @@ on:
- pbs128
- ks
- ks_pbs
- tfhe_zk_pok
- msm_zk
- integer_zk
- integer_aes
- integer_aes256

View File

@@ -134,7 +134,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -324,7 +324,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -94,7 +94,7 @@ jobs:
steps:
- name: Start remote instance
id: start-remote-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -197,7 +197,7 @@ jobs:
uses: foundry-rs/foundry-toolchain@8789b3e21e6c11b2697f5eb56eddae542f746c10
- name: Cache cargo
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
uses: actions/cache@8b402f58fbc84540c8b491a91e594a4576fec3d7 # v5.0.2
with:
path: |
~/.cargo/registry
@@ -207,14 +207,14 @@ jobs:
restore-keys: ${{ runner.os }}-cargo-
- name: Login to GitHub Container Registry
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Chainguard Registry
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
with:
registry: cgr.dev
username: ${{ secrets.CGR_USERNAME }}
@@ -326,7 +326,7 @@ jobs:
steps:
- name: Stop remote instance
id: stop-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -1,28 +1,279 @@
# Run CUDA benchmarks on Hyperstack VM and return parsed results to Slab CI bot.
# Run CUDA benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot.
name: benchmark_gpu_weekly
run-name: GPU weekly benchmarks
on:
schedule:
# Weekly schedules are separated in several groups to avoid spawning too many the machines at once thus risking resource shortages.
# Group 1
# -------
# Weekly benchmarks will be triggered each Saturday at 1a.m.
- cron: '0 1 * * 6'
# Group 2
# -------
# Weekly benchmarks will be triggered each Sunday at 1a.m.
- cron: '0 1 * * 0'
# Group 3
# -------
# Weekly benchmarks will be triggered each Sunday at 9p.m.
- cron: '0 9 * * 0'
permissions: {}
# zizmor: ignore[concurrency-limits] only GitHub can trigger this workflow
jobs:
run-benchmarks-8-h100-sxm5-summary:
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-summary
prepare-inputs:
name: benchmark_cpu_weekly/prepare-inputs
if: github.repository == 'zama-ai/tfhe-rs'
runs-on: ubuntu-latest
outputs:
is_weekly_bench_group_1: ${{ steps.check_bench_group_1.outputs.is_weekly_bench_group_1 }}
is_weekly_bench_group_2: ${{ steps.check_bench_group_2.outputs.is_weekly_bench_group_2 }}
is_weekly_bench_group_3: ${{ steps.check_bench_group_3.outputs.is_weekly_bench_group_3 }}
steps:
- name: Check is weekly bench group 1
id: check_bench_group_1
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "is_weekly_bench_group_1=${{ github.event.schedule == '0 1 * * 6' }}" >> "${GITHUB_OUTPUT}"
- name: Check is weekly bench group 2
id: check_bench_group_2
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "is_weekly_bench_group_2=${{ github.event.schedule == '0 1 * * 0' }}" >> "${GITHUB_OUTPUT}"
- name: Check is weekly bench group 3
id: check_bench_group_3
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "is_weekly_bench_group_3=${{ github.event.schedule == '0 9 * * 0' }}" >> "${GITHUB_OUTPUT}"
run-benchmarks-8-h100-sxm5-integer:
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: multi-h100-sxm5
hardware_name: n3-H100-SXM5x8
command: summary
command: integer_multi_bit
op_flavor: default
bench_type: both
precisions_set: fast
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-8-h100-sxm5-integer-compression:
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer-compression
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: multi-h100-sxm5
hardware_name: n3-H100-SXM5x8
command: integer_compression
op_flavor: default
bench_type: both
precisions_set: fast
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-8-h100-sxm5-integer-zk-aes:
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer-zk-aes
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: multi-h100-sxm5
hardware_name: n3-H100-SXM5x8
command: integer_zk,integer_aes,integer_aes256
op_flavor: default
bench_type: both
precisions_set: fast
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-8-h100-sxm5-noise-squash:
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-noise-squash
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: multi-h100-sxm5
hardware_name: n3-H100-SXM5x8
command: hlapi_noise_squash
op_flavor: default
bench_type: both
precisions_set: fast
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-1-h100-core-crypto:
name: benchmark_gpu_weekly/run-benchmarks-1-h100-core-crypto (1xH100)
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: single-h100
hardware_name: n3-H100x1
command: pbs,pbs128,ks,ks_pbs
bench_type: latency
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
# -----------------------------------------------------
# ERC20 benchmarks
# -----------------------------------------------------
run-benchmarks-1-h100-erc20:
name: benchmark_gpu_weekly/run-benchmarks-1-h100-erc20
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: single-h100
hardware_name: n3-H100x1
command: hlapi_erc20
bench_type: both
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-2-h100-erc20:
name: benchmark_gpu_weekly/run-benchmarks-2-h100-erc20
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: 2-h100
hardware_name: n3-H100x2
command: hlapi_erc20
bench_type: both
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-8-h100-erc20:
name: benchmark_gpu_weekly/run-benchmarks-8-h100-erc20
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: multi-h100
hardware_name: n3-H100-SXM5x8
command: hlapi_erc20
bench_type: both
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
# -----------------------------------------------------
# DEX benchmarks
# -----------------------------------------------------
run-benchmarks-1-h100-dex:
name: benchmark_gpu_weekly/run-benchmarks-1-h100-dex
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: single-h100
hardware_name: n3-H100x1
command: hlapi_dex
bench_type: both
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-2-h100-dex:
name: benchmark_gpu_weekly/run-benchmarks-2-h100-dex
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: 2-h100
hardware_name: n3-H100x2
command: hlapi_dex
bench_type: both
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-8-h100-dex:
name: benchmark_gpu_weekly/run-benchmarks-8-h100-dex
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: multi-h100
hardware_name: n3-H100-SXM5x8
command: hlapi_dex
bench_type: both
params_type: classical + multi_bit
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}

View File

@@ -143,7 +143,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -383,7 +383,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -1,136 +0,0 @@
# Run all benchmarks displayed in the internal documentation.
name: benchmark_summary
run-name: Benchmark Summary
on:
workflow_dispatch:
inputs:
run-cpu-benchmarks:
description: "Run CPU benchmarks"
type: boolean
default: true
run-gpu-benchmarks:
description: "Run GPU benchmarks"
type: boolean
default: true
gpu-profile:
description: "GPU Instance type"
required: true
default: "multi-h100-sxm5 (n3-H100-SXM5x8)"
type: choice
options:
- "l40 (n3-L40x1)"
- "4-l40 (n3-L40x4)"
- "8-l40 (n3-L40x8)"
- "multi-a100-nvlink (n3-A100x8-NVLink)"
- "single-h100 (n3-H100x1)"
- "2-h100 (n3-H100x2)"
- "4-h100 (n3-H100x4)"
- "multi-h100 (n3-H100x8)"
- "multi-h100-nvlink (n3-H100x8-NVLink)"
- "multi-h100-sxm5 (n3-H100-SXM5x8)"
bench_type:
description: "Benchmarks type"
type: choice
default: both
options:
- latency
- throughput
- both
run-hpu-benchmarks:
description: "Run HPU benchmarks"
type: boolean
default: true
permissions: {}
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
jobs:
parse-gpu-inputs:
name: benchmark_summary/parse-gpu-inputs
if: inputs.run-gpu-benchmarks
runs-on: ubuntu-latest
outputs:
profile: ${{ steps.parse_profile.outputs.profile }}
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
env:
INPUTS_PROFILE: ${{ inputs.gpu-profile }}
steps:
- name: Parse profile
id: parse_profile
run: |
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
# shellcheck disable=SC2001
PROFILE=$(echo "${INPUTS_PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
echo "profile=${PROFILE}" >> "${GITHUB_OUTPUT}"
- name: Parse hardware name
id: parse_hardware_name
run: |
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
# shellcheck disable=SC2001
NAME=$(echo "${INPUTS_PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
echo "name=${NAME}" >> "${GITHUB_OUTPUT}"
run-benchmarks-cpu:
name: benchmark_documentation/run-benchmarks-cpu-integer
uses: ./.github/workflows/benchmark_cpu_common.yml
if: inputs.run-cpu-benchmarks
with:
command: summary
bench_type: ${{ inputs.bench_type }}
params_type: classical + multi_bit
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-gpu:
name: benchmark_documentation/run-benchmarks-gpu
uses: ./.github/workflows/benchmark_gpu_common.yml
if: inputs.run-gpu-benchmarks
needs: parse-gpu-inputs
with:
profile: ${{ needs.parse-gpu-inputs.outputs.profile }}
hardware_name: ${{ needs.parse-gpu-inputs.outputs.hardware_name }}
command: summary
bench_type: ${{ inputs.bench_type }}
params_type: classical + multi_bit
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
# TODO add make recipe for HPU benchmarks
# run-benchmarks-hpu:
# name: benchmark_documentation/run-benchmarks-hpu
# uses: ./.github/workflows/benchmark_hpu_common.yml
# if: inputs.run-hpu-benchmarks
# with:
# command: summary
# bench_type: ${{ inputs.bench_type }}
# v80_pcie_dev: 24
# v80_serial_number: XFL12NWY3ZKG
# secrets:
# BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
# SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
# SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
# REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# JOB_SECRET: ${{ secrets.JOB_SECRET }}
# SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
# SLAB_URL: ${{ secrets.SLAB_URL }}
# SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
# SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}

View File

@@ -40,7 +40,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -137,7 +137,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -40,7 +40,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -137,7 +137,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -58,19 +58,171 @@ jobs:
- tfhe/web_wasm_parallel_tests/**
- .github/workflows/wasm_client_benchmark.yml
run-benchmarks-cpu-zk-client:
name: benchmark_documentation/run-benchmarks-cpu-zk-client
uses: ./.github/workflows/benchmark_wasm_client_common.yml
needs: should-run
setup-instance:
name: benchmark_wasm_client/setup-instance
if: github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.wasm_bench)
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
needs: should-run
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-small
wasm-client-benchmarks:
name: benchmark_wasm_client/wasm-client-benchmarks
needs: setup-instance
if: needs.setup-instance.result != 'skipped'
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
max-parallel: 1
matrix:
browser: [ chrome, firefox ]
steps:
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
- name: Get Node version
run: |
echo "NODE_VERSION=$(make node_version)" >> "${GITHUB_ENV}"
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
with:
path: |
~/.nvm
~/.npm
key: node-${{ env.NODE_VERSION }}
- name: Install Node
if: steps.node-cache.outputs.cache-hit != 'true'
run: |
make install_node
- name: Node cache save
uses: actions/cache/save@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
~/.nvm
~/.npm
key: node-${{ env.NODE_VERSION }}
- name: Install web resources
run: |
make install_"${BROWSER}"_browser
make install_"${BROWSER}"_web_driver
env:
BROWSER: ${{ matrix.browser }}
- name: Run benchmarks
run: |
make bench_web_js_api_parallel_"${BROWSER}"_ci
env:
BROWSER: ${{ matrix.browser }}
- name: Run benchmarks (unsafe coop)
run: |
make bench_web_js_api_unsafe_coop_"${BROWSER}"_ci
env:
BROWSER: ${{ matrix.browser }}
- name: Parse results
run: |
make parse_wasm_benchmarks
python3 ./ci/benchmark_parser.py tfhe-benchmark/wasm_pk_gen.csv "${RESULTS_FILENAME}" \
--database tfhe_rs \
--hardware "m6i.4xlarge" \
--project-version "${COMMIT_HASH}" \
--branch "${REF_NAME}" \
--commit-date "${COMMIT_DATE}" \
--bench-date "${BENCH_DATE}" \
--key-gen
rm tfhe-benchmark/wasm_pk_gen.csv
env:
REF_NAME: ${{ github.ref_name }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
continue-on-error: true
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "WASM benchmarks (${{ matrix.browser }}) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: benchmark_wasm_client/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, wasm-client-benchmarks ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (wasm-client-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,234 +0,0 @@
# Run WASM client benchmarks on an instance and return parsed results to Slab CI bot.
name: benchmark_wasm_client_common
on:
workflow_call:
inputs:
browser:
type: string # Use comma separated values to generate an array
default: chrome,firefox
secrets:
REPO_CHECKOUT_TOKEN:
required: true
SLAB_ACTION_TOKEN:
required: true
SLAB_BASE_URL:
required: true
SLAB_URL:
required: true
JOB_SECRET:
required: true
SLACK_CHANNEL:
required: true
BOT_USERNAME:
required: true
SLACK_WEBHOOK:
required: true
env:
CARGO_TERM_COLOR: always
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
permissions: {}
# zizmor: ignore[concurrency-limits] only Zama organization members and GitHub can trigger this workflow
jobs:
prepare-matrix:
name: benchmark_wasm_client_common/prepare-matrix
runs-on: ubuntu-latest
outputs:
browser: ${{ steps.set_matrix_arg.outputs.browser }}
steps:
- name: Parse user inputs
shell: python
env:
INPUTS_BROWSER: ${{ inputs.browser }}
run: |
import os
inputs_browser = os.environ["INPUTS_BROWSER"]
env_file = os.environ["GITHUB_ENV"]
split_browser = inputs_browser.replace(" ", "").split(",")
with open(env_file, "a") as f:
f.write(f"""BROWSER=["{'", "'.join(split_browser)}"]\n""")
- name: Set martix arguments output
id: set_matrix_arg
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "browser=${{ toJSON(env.BROWSER) }}" >> "${GITHUB_OUTPUT}"
setup-instance:
name: benchmark_wasm_client_common/setup-instance
needs: prepare-matrix
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-small
wasm-client-benchmarks:
name: benchmark_wasm_client_common/wasm-client-benchmarks
needs: [ prepare-matrix, setup-instance ]
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
max-parallel: 1
matrix:
browser: ${{ fromJSON(needs.prepare-matrix.outputs.browser) }}
steps:
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
- name: Get Node version
run: |
echo "NODE_VERSION=$(make node_version)" >> "${GITHUB_ENV}"
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: |
~/.nvm
~/.npm
key: node-${{ env.NODE_VERSION }}
- name: Install Node
if: steps.node-cache.outputs.cache-hit != 'true'
run: |
make install_node
- name: Node cache save
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
~/.nvm
~/.npm
key: node-${{ env.NODE_VERSION }}
- name: Install web resources
run: |
make install_"${BROWSER}"_browser
make install_"${BROWSER}"_web_driver
env:
BROWSER: ${{ matrix.browser }}
- name: Run benchmarks
run: |
make bench_web_js_api_parallel_"${BROWSER}"_ci
env:
BROWSER: ${{ matrix.browser }}
- name: Run benchmarks (unsafe coop)
run: |
make bench_web_js_api_unsafe_coop_"${BROWSER}"_ci
env:
BROWSER: ${{ matrix.browser }}
- name: Parse results
run: |
make parse_wasm_benchmarks
python3 ./ci/benchmark_parser.py tfhe-benchmark/wasm_pk_gen.csv "${RESULTS_FILENAME}" \
--database tfhe_rs \
--hardware "m6i.4xlarge" \
--project-version "${COMMIT_HASH}" \
--branch "${REF_NAME}" \
--commit-date "${COMMIT_DATE}" \
--bench-date "${BENCH_DATE}" \
--key-gen
rm tfhe-benchmark/wasm_pk_gen.csv
env:
REF_NAME: ${{ github.ref_name }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
continue-on-error: true
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "WASM benchmarks (${{ matrix.browser }}) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: benchmark_wasm_client_common/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, wasm-client-benchmarks ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (wasm-client-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -57,7 +57,9 @@ env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACKIFY_MARKDOWN: true
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"
LINELINT_VERSION: 0.0.6
LINELINT_CHECKSUM: "16b70fb7b471d6f95cbdc0b4e5dc2b0ac9e84ba9ecdc488f7bdf13df823aca4b"
@@ -67,10 +69,37 @@ permissions:
# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
jobs:
prepare-matrix:
name: cargo_build_common/prepare-matrix
setup-instance:
name: cargo_build_common/setup-instance
if: inputs.run-pcc-cpu-batch || inputs.run-pcc-hpu || inputs.run-build || inputs.run-build-layers || inputs.run-build-tfhe-full || inputs.run-build-c-api
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
run_attempt: ${{ github.run_attempt }} # On a re-run with a successful previous run for this job, the run_attempt will not be incremented
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-small
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
prepare-matrix:
name: cargo_build_common/prepare-matrix
runs-on: ubuntu-latest
needs: setup-instance
outputs:
runners: ${{ steps.set_matrix_runners.outputs.runners }}
steps:
@@ -78,12 +107,12 @@ jobs:
shell: python
env:
INPUTS_EXTRA_RUNNERS_TO_USE: ${{ inputs.extra-runners-to-use }}
REMOTE_RUNNER: "runs-on=${{ github.run_id }}/runner=cpu-small"
REMOTE_RUNNER_LABEL: ${{ needs.setup-instance.outputs.runner-name }}
run: |
import os
inputs_extra_runners = os.environ["INPUTS_EXTRA_RUNNERS_TO_USE"]
remote_runner_label = os.environ["REMOTE_RUNNER"]
remote_runner_label = os.environ["REMOTE_RUNNER_LABEL"]
env_file = os.environ["GITHUB_ENV"]
runners = [remote_runner_label, ]
@@ -101,7 +130,7 @@ jobs:
builds:
name: cargo_build_common/builds
needs: prepare-matrix
needs: [ setup-instance, prepare-matrix ]
runs-on: ${{ matrix.runner }}
strategy:
matrix:
@@ -130,35 +159,6 @@ jobs:
chmod +x linelint-linux-amd64
ln -s "$(pwd)/linelint-linux-amd64" /usr/local/bin/linelint
- name: Get Node version
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2'
run: |
echo "NODE_VERSION=$(make node_version)" >> "${GITHUB_ENV}"
- name: Node cache restoration
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2'
id: node-cache
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: |
~/.nvm
~/.npm
key: node-${{ env.NODE_VERSION }}
- name: Install Node
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2'
run: |
make install_node
- name: Node cache save
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2' && steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
~/.nvm
~/.npm
key: node-${{ env.NODE_VERSION }}
- name: Run pcc checks batch
if: inputs.run-pcc-cpu-batch
run: |
@@ -230,3 +230,29 @@ jobs:
if: ${{ always() }}
run: | # zizmor: ignore[template-injection] this context variable is safe
echo "result=${{ job.status }}" >> "${GITHUB_OUTPUT}"
teardown-instance:
name: cargo_build_common/teardown-instance
if: ${{ always() &&
needs.setup-instance.result == 'success' &&
github.run_attempt == needs.setup-instance.outputs.run_attempt }} # Only run if setup-instance has been executed during this run attempt
needs: [setup-instance, builds]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cargo-builds) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -63,7 +63,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -146,7 +146,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -18,7 +18,7 @@ jobs:
- name: Check first line
uses: gsactions/commit-message-checker@16fa2d5de096ae0d35626443bcd24f1e756cafee
with:
pattern: '^((feat|fix|chore|refactor|style|test|docs|doc|perf)(\([\w\-_]+\))?\!?\:) .+$'
pattern: '^((feat|fix|chore|refactor|style|test|docs|doc)(\([\w\-_]+\))?\!?\:) .+$'
flags: "gs"
error: 'Your first line has to contain a commit type and scope like "feat(my_feature): msg".'
excludeDescription: "true" # optional: this excludes the description body of a pull request

View File

@@ -43,14 +43,14 @@ jobs:
echo "version=$(make zizmor_version)" >> "${GITHUB_OUTPUT}"
- name: Check workflows security
uses: zizmorcore/zizmor-action@0dce2577a4760a2749d8cfb7a84b7d5585ebcb7d # v0.5.0
uses: zizmorcore/zizmor-action@135698455da5c3b3e55f73f4419e481ab68cdd95 # v0.4.1
with:
advanced-security: 'false' # Print results directly in logs
persona: pedantic
version: ${{ steps.get_zizmor.outputs.version }}
- name: Ensure SHA pinned actions
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@d5d20e15f2736816ee0e001ba8b24b54d9ffcff4 # v5.0.0
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@6124774845927d14c601359ab8138699fa5b70c3 # v4.0.1
with:
allowlist: |
slsa-framework/slsa-github-generator

View File

@@ -23,12 +23,30 @@ permissions:
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
jobs:
setup-instance:
name: code_coverage/setup-instance
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-small
code-coverage-tests:
name: code_coverage/code-coverage-tests
needs: setup-instance
concurrency:
group: ${{ github.workflow_ref }}_${{ github.event_name }}
cancel-in-progress: true
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-small"
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
timeout-minutes: 5760 # 4 days
steps:
- name: Checkout tfhe-rs
@@ -103,3 +121,26 @@ jobs:
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Code coverage finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: code_coverage/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, code-coverage-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (code-coverage-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -10,10 +10,10 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACKIFY_MARKDOWN: true
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -27,44 +27,39 @@ permissions:
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
jobs:
should-run:
name: csprng_randomness_tests/should-run
if: github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved')
setup-instance:
name: csprng_randomness_tests/setup-instance
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
runs-on: ubuntu-latest
permissions:
pull-requests: read # Needed to check for file change
outputs:
csprng_test: ${{ github.event_name == 'workflow_dispatch' ||
steps.changed-files.outputs.csprng_any_changed }}
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-small
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
with:
files_yaml: |
csprng:
- Cargo.toml
- tfhe/Cargo.toml
- tfhe-csprng/**
- utils/tfhe-versionable/**
- .github/workflows/csprng_randomness_tests.yml
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
csprng-randomness-tests:
name: csprng_randomness_tests/csprng-randomness-tests
needs: should-run
if: github.event_name == 'workflow_dispatch' ||
(contains(github.event.label.name, 'approved') && needs.should-run.outputs.csprng_test == 'true')
needs: setup-instance
concurrency:
group: ${{ github.workflow_ref }}_${{ github.sha }}_${{ github.event_name }}
cancel-in-progress: true
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-small"
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -81,18 +76,34 @@ jobs:
run: |
make dieharder_csprng
- name: Set pull-request URL
if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
continue-on-error: true
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: csprng_randomness_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, csprng-randomness-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -9,9 +9,6 @@ on:
type: string
layer:
type: string
bench_subset:
type: string
default: all
pbs_kind: # Valid values are 'classical', 'multi_bit' or 'any'
type: string
grouping_factor: # Valid values are 2, 3, or 4
@@ -19,9 +16,6 @@ on:
default: 4
bench_type: # Valid values are 'latency', 'throughput'
type: string
name_suffix:
type: string
default: _mean_avx512
backend_comparison:
type: boolean
default: false
@@ -66,8 +60,6 @@ jobs:
--pbs-kind "${PBS_KIND}" \
--grouping-factor "${GROUPING_FACTOR}" \
--bench-type "${BENCH_TYPE}" \
--bench-subset "${BENCH_SUBSET}" \
--name-suffix "${NAME_SUFFIX}" \
--time-span-days "${TIME_SPAN}"
env:
OUTPUT_FILENAME: ${{ inputs.output_filename }}
@@ -78,8 +70,6 @@ jobs:
PBS_KIND: ${{ inputs.pbs_kind }}
GROUPING_FACTOR: ${{ inputs.grouping_factor }}
BENCH_TYPE: ${{ inputs.bench_type }}
BENCH_SUBSET: ${{ inputs.bench_subset }}
NAME_SUFFIX: ${{ inputs.name_suffix }}
TIME_SPAN: ${{ inputs.time_span_days }}
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
@@ -89,7 +79,7 @@ jobs:
if: inputs.backend_comparison == false
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: ${{ github.sha }}_${{ inputs.backend }}_${{ inputs.layer }}_subset_${{inputs.bench_subset}}_${{ inputs.pbs_kind }}_${{ inputs.bench_type }}_tables
name: ${{ github.sha }}_${{ inputs.backend }}_${{ inputs.layer }}_${{ inputs.pbs_kind }}_${{ inputs.bench_type }}_tables
# This will upload all the file generated
path: ${{ inputs.output_filename }}*.svg
retention-days: 60

View File

@@ -51,7 +51,7 @@ jobs:
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
cpu-integer-throughput-table:
name: generate_documentation_svgs/cpu-integer-throughput-table
name: generate_documentation_svgs/cpu-integer-latency-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-cpu-svgs
with:
@@ -150,124 +150,6 @@ jobs:
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
# -----------------------------------------------------------
# ZK benchmarks tables
# -----------------------------------------------------------
cpu-zk-server-latency-table:
name: generate_documentation_svgs/cpu-zk-server-latency-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-cpu-svgs
with:
backend: cpu
hardware_name: hpc7a.96xlarge
layer: integer
bench_subset: zk
pbs_kind: classical
bench_type: latency
time_span_days: ${{ inputs.time_span_days }}
output_filename: cpu-zk-benchmark-latency
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
cpu-zk-server-throughput-table:
name: generate_documentation_svgs/cpu-zk-server-throughput-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-cpu-svgs
with:
backend: cpu
hardware_name: hpc7a.96xlarge
layer: integer
bench_subset: zk
pbs_kind: classical
bench_type: throughput
time_span_days: ${{ inputs.time_span_days }}
output_filename: cpu-zk-benchmark-throughput
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
cpu-zk-client-latency-table:
name: generate_documentation_svgs/cpu-zk-client-latency-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-cpu-svgs
with:
backend: cpu
hardware_name: m6i.4xlarge
layer: wasm
bench_subset: zk
pbs_kind: classical
bench_type: latency
name_suffix: _chrome_mean
time_span_days: ${{ inputs.time_span_days }}
output_filename: cpu-zk-wasm-benchmark-latency
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
# -----------------------------------------------------------
# ERC20 benchmarks tables
# -----------------------------------------------------------
cpu-erc20-latency-throughput-table:
name: generate_documentation_svgs/cpu-erc20-latency-throughput-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-cpu-svgs
with:
backend: cpu
hardware_name: hpc7a.96xlarge
layer: hlapi
bench_subset: erc20
pbs_kind: classical
bench_type: both
time_span_days: ${{ inputs.time_span_days }}
output_filename: cpu-hlapi-erc20-benchmark-latency-throughput
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
gpu-erc20-latency-throughput-table:
name: generate_documentation_svgs/gpu-erc20-latency-throughput-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-gpu-svgs
with:
backend: gpu
hardware_name: n3-H100-SXM5x8
layer: hlapi
bench_subset: erc20
pbs_kind: multi_bit
grouping_factor: 4
bench_type: both
time_span_days: ${{ inputs.time_span_days }}
output_filename: gpu-hlapi-erc20-benchmark-h100x8-sxm5-latency-throughput
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
hpu-erc20-latency-throughput-table:
name: generate_documentation_svgs/hpu-erc20-latency-throughput-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-hpu-svgs
with:
backend: hpu
hardware_name: hpu_x1
layer: hlapi
bench_subset: erc20
pbs_kind: classical
bench_type: both
time_span_days: ${{ inputs.time_span_days }}
output_filename: hpu-hlapi-erc20-benchmark-hpux1-latency-throughput.svg
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
# -----------------------------------------------------------
# PBS benchmarks tables
# -----------------------------------------------------------

View File

@@ -43,7 +43,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -93,11 +93,6 @@ jobs:
- name: Find tools
run: |
# Disable unattended-upgrades to avoid lock issues
sudo systemctl disable --now unattended-upgrades
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo apt update && sudo apt install -y valgrind
find /usr -executable -name "compute-sanitizer"
which valgrind
@@ -111,10 +106,6 @@ jobs:
run: |
make test_high_level_api_gpu_valgrind
- name: Run CUDA backend racecheck tests
run: |
make test_cuda_backend_race_check
slack-notify:
name: gpu_code_validation_tests/slack-notify
needs: [ setup-instance, cuda-tests-linux ]
@@ -146,7 +137,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -64,6 +64,8 @@ jobs:
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_core_h100_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
setup-instance:
name: gpu_core_h100_tests/setup-instance
@@ -84,7 +86,7 @@ jobs:
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -183,7 +185,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -64,6 +64,8 @@ jobs:
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_fast_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
setup-instance:
name: gpu_fast_tests/setup-instance
@@ -77,7 +79,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -182,7 +184,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -35,7 +35,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -124,7 +124,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -66,6 +66,7 @@ jobs:
- 'tfhe/docs/**/**.md'
- '.github/workflows/**_multi_gpu_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
setup-instance:
name: gpu_full_multi_gpu_tests/setup-instance
@@ -80,7 +81,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -186,7 +187,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -65,6 +65,8 @@ jobs:
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_hlapi_h100_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
setup-instance:
name: gpu_hlapi_h100_tests/setup-instance
@@ -85,7 +87,7 @@ jobs:
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -191,7 +193,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -38,7 +38,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -112,7 +112,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -31,50 +31,18 @@ permissions:
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
jobs:
should-run:
name: gpu_memory_sanitizer/should-run
runs-on: ubuntu-latest
permissions:
pull-requests: read # Needed to check for file change
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
with:
files_yaml: |
gpu:
- Cargo.toml
- tfhe/Cargo.toml
- tfhe/build.rs
- backends/tfhe-cuda-backend/**
- tfhe/src/core_crypto/gpu/**
- tfhe/src/integer/gpu/**
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- '.github/workflows/gpu_memory_sanitizer.yml'
setup-instance:
name: gpu_memory_sanitizer/setup-instance
needs: should-run
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
if: github.event_name != 'pull_request' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved')
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -166,7 +134,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -31,50 +31,18 @@ permissions:
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
jobs:
should-run:
name: gpu_memory_sanitizer_h100/should-run
runs-on: ubuntu-latest
permissions:
pull-requests: read # Needed to check for file change
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
with:
files_yaml: |
gpu:
- Cargo.toml
- tfhe/Cargo.toml
- tfhe/build.rs
- backends/tfhe-cuda-backend/**
- tfhe/src/core_crypto/gpu/**
- tfhe/src/integer/gpu/**
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- '.github/workflows/gpu_memory_sanitizer_h100.yml'
setup-instance:
name: gpu_memory_sanitizer/setup-instance
needs: should-run
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
if: github.event_name != 'pull_request' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved')
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -166,7 +134,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -38,7 +38,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -79,7 +79,7 @@ jobs:
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install CUDA and other dependencies
- name: Install CUDA
if: env.SECRETS_AVAILABLE == 'false'
shell: bash
run: |
@@ -90,12 +90,6 @@ jobs:
echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
sha256sum -c checksum
sudo dpkg -i "${CUDA_KEYRING_PACKAGE}"
# Disable unattended-upgrades to avoid lock issues
sudo systemctl disable --now unattended-upgrades
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo apt update
sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format
env:
@@ -136,17 +130,6 @@ jobs:
run: |
make pcc_gpu
- name: Run semgrep and lint checks on CUDA code
run: |
# Disable unattended-upgrades to avoid lock issues
sudo systemctl disable --now unattended-upgrades
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo apt update
sudo apt -y install python3-venv
make semgrep_and_lint_gpu_code
- name: Check build with hpu enabled
run: |
make clippy_gpu_hpu
@@ -176,7 +159,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -66,6 +66,7 @@ jobs:
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_signed_integer_classic_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
setup-instance:
name: gpu_signed_integer_classic_tests/setup-instance
@@ -80,7 +81,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -169,7 +170,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -66,6 +66,7 @@ jobs:
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_signed_integer_h100_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
setup-instance:
name: gpu_signed_integer_h100_tests/setup-instance
@@ -86,7 +87,7 @@ jobs:
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -183,7 +184,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -67,6 +67,7 @@ jobs:
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_signed_integer_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
setup-instance:
name: gpu_signed_integer_tests/setup-instance
@@ -81,7 +82,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -178,7 +179,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -66,6 +66,7 @@ jobs:
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_unsigned_integer_classic_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
setup-instance:
name: gpu_unsigned_integer_classic_tests/setup-instance
@@ -80,7 +81,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -169,7 +170,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -66,6 +66,7 @@ jobs:
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_unsigned_integer_h100_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
setup-instance:
name: gpu_unsigned_integer_h100_tests/setup-instance
@@ -86,7 +87,7 @@ jobs:
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -183,7 +184,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -67,6 +67,7 @@ jobs:
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_unsigned_integer_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
setup-instance:
name: gpu_unsigned_integer_tests/setup-instance
@@ -81,7 +82,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -178,7 +179,7 @@ jobs:
- name: Stop instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -13,7 +13,9 @@ env:
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
@@ -50,13 +52,39 @@ jobs:
- backends/tfhe-hpu-backend/**
- mockups/tfhe-hpu-mockup/**
cargo-tests-hpu:
name: hpu_hlapi_tests/cargo-tests-hpu (bpr)
setup-instance:
name: hpu_hlapi_tests/setup-instance
needs: should-run
if:
needs.should-run.outputs.hpu_test == 'true' &&
((github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||github.event_name == 'pull_request')
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-big"
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-big
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
cargo-tests-hpu:
name: hpu_hlapi_tests/cargo-tests-hpu (bpr)
needs: setup-instance
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
@@ -79,3 +107,27 @@ jobs:
just -f mockups/tfhe-hpu-mockup/Justfile BUILD_PROFILE=release mockup &
make HPU_CONFIG=sim test_high_level_api_hpu
make HPU_CONFIG=sim test_user_doc_hpu
teardown-instance:
name: hpu_hlapi_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [setup-instance, cargo-tests-hpu]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (hpu_hlapi_tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -24,14 +24,32 @@ permissions: {}
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
jobs:
cpu-tests:
name: integer_long_run_tests/cpu-tests
setup-instance:
name: integer_long_run_tests/setup-instance
if: github.event_name != 'schedule' ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-big
cpu-tests:
name: integer_long_run_tests/cpu-tests
needs: [ setup-instance ]
concurrency:
group: ${{ github.workflow_ref }}_${{github.event_name}}
cancel-in-progress: true
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-big"
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
timeout-minutes: 4320 # 72 hours
steps:
- name: Checkout tfhe-rs
@@ -56,3 +74,26 @@ jobs:
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "CPU long run tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: integer_long_run_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cpu-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cpu-long-run-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -37,7 +37,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -222,7 +222,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -92,7 +92,7 @@ jobs:
- name: Publish web package
if: ${{ inputs.push_web_package }}
uses: JS-DevTools/npm-publish@0fd2f4369c5d6bcfcde6091a7c527d810b9b5c3f
uses: JS-DevTools/npm-publish@4ce4bd0f334d5316473155078da1955d42148494
with:
package: tfhe/pkg/package.json
dry-run: ${{ inputs.dry_run }}
@@ -109,7 +109,7 @@ jobs:
- name: Publish Node package
if: ${{ inputs.push_node_package }}
uses: JS-DevTools/npm-publish@0fd2f4369c5d6bcfcde6091a7c527d810b9b5c3f
uses: JS-DevTools/npm-publish@4ce4bd0f334d5316473155078da1955d42148494
with:
package: tfhe/pkg/package.json
dry-run: ${{ inputs.dry_run }}

View File

@@ -5,8 +5,9 @@ env:
CARGO_TERM_COLOR: always
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUSTFLAGS: "-C target-cpu=native"
SAGEMATH_VERSION: 10.8
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"
on:
pull_request:
@@ -25,12 +26,38 @@ permissions: {}
# zizmor: ignore[concurrency-limits] only Zama organization members and GitHub can trigger this workflow
jobs:
params-curves-security-check:
name: parameters_check/params-curves-security-check
setup-instance:
name: parameters_check/setup-instance
if:
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
github.event_name != 'push'
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-small"
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-small
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
params-curves-security-check:
name: parameters_check/params-curves-security-check
needs: setup-instance
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -47,39 +74,14 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
repository: malb/lattice-estimator
path: lattice-estimator
path: lattice_estimator
ref: '352ddaf4a288a0543f5d9eb588d2f89c7acec463'
persist-credentials: 'false'
- name: Restore Sagemath image from cache
id: docker-cache
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: /tmp/sagemath_image
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}
restore-keys: sagemath-image-
- name: Load cached Docker sagemath image
if: steps.docker-cache.outputs.cache-hit == 'true'
- name: Install Sage
run: |
docker load -i /tmp/sagemath_image/sagemath.tar
- name: Pull Docker sagemath image
if: steps.docker-cache.outputs.cache-hit != 'true'
run: |
docker pull sagemath/sagemath:"${VERSION}"
mkdir -p /tmp/sagemath_image
docker save sagemath/sagemath:"${VERSION}" -o /tmp/sagemath_image/sagemath.tar
env:
VERSION: ${{ env.SAGEMATH_VERSION }}
- name: Store Sagemath image in cache
if: steps.docker-cache.outputs.cache-hit != 'true'
continue-on-error: true
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: /tmp/sagemath_image
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}
sudo apt update
sudo apt install -y sagemath
- name: Collect parameters
run: |
@@ -93,9 +95,7 @@ jobs:
- name: Perform security check
run: |
docker run \
-v "${PWD}":/repo_src \
sagemath/sagemath:10.8 /bin/bash /repo_src/scripts/execute_lattice_estimator.sh
PYTHONPATH=lattice_estimator sage ci/lattice_estimator.sage
- name: Get time elapsed
if: ${{ always() }}
@@ -127,3 +127,27 @@ jobs:
SLACK_MESSAGE: "Security check for parameters finished with status: ${{ job.status }} (analysis took: ${{ env.TIME_ELAPSED }} mins). (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
teardown-instance:
name: parameters_check/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [setup-instance, params-curves-security-check]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@d4580322fc216877c48ac2987df9573ffd03476c # v1.5.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (params-curves-security-check) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,18 +1,179 @@
# Placeholder workflow file allowing running it without having to merge to main first
name: placeholder_workflow
env:
CARGO_TERM_COLOR: always
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUSTFLAGS: "-C target-cpu=native"
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACKIFY_MARKDOWN: true
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
types: [ labeled ]
permissions: {}
permissions:
contents: read
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
jobs:
placeholder:
name: placeholder_workflow/placeholder
should-run:
name: aws_tfhe_param_prod_tests/should-run
if: (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
permissions:
pull-requests: read # Needed to check for file change
outputs:
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
versionable_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.versionable_any_changed }}
shortint_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.shortint_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
steps:
- run: |
echo "Hello this is a Placeholder Workflow"
- name: Checkout tfhe-rs
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
with:
files_yaml: |
dependencies:
- tfhe/Cargo.toml
- tfhe-csprng/**
- tfhe-fft/**
- tfhe-zk-pok/**
- utils/tfhe-versionable/**
- utils/tfhe-versionable-derive/**
versionable:
- utils/tfhe-versionable/**
- utils/tfhe-versionable-derive/**
shortint:
- tfhe/src/core_crypto/**
- tfhe/src/shortint/**
- name: Aggregate file changes
id: aggregated-changes
if: ( steps.changed-files.outputs.dependencies_any_changed == 'true' ||
steps.changed-files.outputs.csprng_any_changed == 'true' ||
steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
steps.changed-files.outputs.versionable_any_changed == 'true' ||
steps.changed-files.outputs.core_crypto_any_changed == 'true' ||
steps.changed-files.outputs.shortint_any_changed == 'true' )
run: |
echo "any_changed=true" >> "$GITHUB_OUTPUT"
setup-instance:
name: aws_tfhe_param_prod_tests/setup-instance
if: github.event_name == 'workflow_dispatch' ||
(github.event_name != 'workflow_dispatch' && needs.should-run.outputs.any_file_changed == 'true')
needs: should-run
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: cpu-big
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
param-prod-tests:
name: aws_tfhe_param_prod_tests/param-prod-tests
needs: [ should-run, setup-instance ]
concurrency:
group: ${{ github.workflow_ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
with:
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Gen Keys if required
if: needs.should-run.outputs.shortint_test == 'true'
run: |
make gen_key_cache
- name: Run shortint tests
if: needs.should-run.outputs.shortint_test == 'true'
run: |
BIG_TESTS_INSTANCE=TRUE FAST_TESTS=FALSE make test_param_prod_shortint_ci
- name: Set pull-request URL
if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Slack Notification
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
continue-on-error: true
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: aws_tfhe_param_prod_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, param-prod-tests ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (param-prod-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -18,7 +18,7 @@ jobs:
issues: read # Needed to fetch all issues
pull-requests: write # Needed to write message and close the PR
steps:
- uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0
- uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # v10.1.1
with:
stale-pr-message: 'This PR is unverified and has been open for 2 days, it will now be closed. If you want to contribute please sign the CLA as indicated by the bot.'
days-before-stale: 2

1
.gitignore vendored
View File

@@ -25,7 +25,6 @@ dieharder_run.log
# Cuda local build
backends/tfhe-cuda-backend/cuda/cmake-build-debug/
backends/tfhe-cuda-backend/cuda/build/
# WASM tests
tfhe/web_wasm_parallel_tests/server.PID

View File

@@ -9,7 +9,7 @@ ignore:
- tfhe/web_wasm_parallel_tests/dist
- keys
- coverage
- utils/tfhe-lints/tests/*/main.stderr
- utils/tfhe-lints/ui/main.stderr
- utils/tfhe-backward-compat-data/**/*.ron # ron files are autogenerated
rules:

View File

@@ -2,37 +2,35 @@
# i.e. the `core_crypto` dir is owned and needs owner approval/review, but not the `gpu` sub dir
# See https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#example-of-a-codeowners-file
/backends/tfhe-cuda-backend/ @zama-ai/gpu
/backends/zk-cuda-backend/ @zama-ai/gpu
/backends/tfhe-cuda-backend/ @agnesLeroy
/backends/tfhe-hpu-backend/ @zama-ai/hardware
/tfhe/examples/hpu @zama-ai/hardware
/tfhe/src/core_crypto/ @IceTDrinker @mayeul-zama
/tfhe/src/core_crypto/gpu @zama-ai/gpu
/tfhe/src/core_crypto/ @IceTDrinker
/tfhe/src/core_crypto/gpu @agnesLeroy
/tfhe/src/core_crypto/hpu @zama-ai/hardware
/tfhe/src/shortint/ @mayeul-zama @nsarlin-zama
/tfhe/src/integer/ @tmontaigu @nsarlin-zama
/tfhe/src/integer/gpu @zama-ai/gpu
/tfhe/src/integer/ @tmontaigu
/tfhe/src/integer/gpu @agnesLeroy
/tfhe/src/integer/hpu @zama-ai/hardware
/tfhe/src/high_level_api/ @tmontaigu @nsarlin-zama
/tfhe/src/high_level_api/ @tmontaigu
/tfhe-zk-pok/ @nsarlin-zama @tmontaigu
/tfhe-zk-pok/src/gpu @zama-ai/gpu
/tfhe-zk-pok/ @nsarlin-zama
/tfhe-benchmark/ @soonum @SouchonTheo
/tfhe-benchmark/ @soonum
/utils/ @nsarlin-zama @SouchonTheo
/utils/ @nsarlin-zama
/Makefile @IceTDrinker @soonum
/mockups/tfhe-hpu-mockup @zama-ai/hardware
/.github/ @soonum @SouchonTheo
/ci/ @soonum @SouchonTheo
/scripts/ @soonum @SouchonTheo
/.github/ @soonum
/ci/ @soonum
/scripts/ @soonum
/CODEOWNERS @IceTDrinker @nsarlin-zama
/CODEOWNERS @IceTDrinker

View File

@@ -17,7 +17,7 @@ Start by [forking](https://docs.github.com/en/pull-requests/collaborating-with-p
- **Performance**: For optimal performance, it is highly recommended to run **TFHE-rs** code in release mode with cargo's `--release` flag.
{% endhint %}
To get more details about the library, please refer to the [documentation](https://docs.zama.org/tfhe-rs).
To get more details about the library, please refer to the [documentation](https://docs.zama.ai/tfhe-rs).
## 2. Creating a new branch

View File

@@ -9,12 +9,10 @@ members = [
"tasks",
"tfhe-csprng",
"backends/tfhe-cuda-backend",
"backends/zk-cuda-backend",
"backends/tfhe-hpu-backend",
"utils/tfhe-versionable",
"utils/tfhe-versionable-derive",
"utils/tfhe-backward-compat-data",
"utils/tfhe-backward-compat-data/crates/add_new_version",
"utils/param_dedup",
"tests",
"mockups/tfhe-hpu-mockup",
@@ -28,8 +26,6 @@ rust-version = "1.91.1"
[workspace.dependencies]
aligned-vec = { version = "0.6", default-features = false }
ark-ec = "0.5.0"
ark-ff = "0.5.0"
bytemuck = "1.24"
dyn-stack = { version = "0.13", default-features = false }
itertools = "0.14"
@@ -41,11 +37,7 @@ serde = { version = "1.0", default-features = false }
wasm-bindgen = "0.2.101"
getrandom = "0.2.8"
# The project maintainers consider that this is the last version of the 1.3 branch, any newer version should not be trusted
bindgen = "0.71"
bincode = "=1.3.3"
cmake = "0.1"
pkg-config = "0.3"
clap = { version = "4.5", features = ["derive"] }
[profile.bench]
lto = "fat"

269
Makefile
View File

@@ -1,7 +1,4 @@
SHELL:=$(shell /usr/bin/env which bash)
# Enable stop on error, no undefined variables
# the c flag is to run the script inline
.SHELLFLAGS := -eu -c
OS:=$(shell uname)
RS_CHECK_TOOLCHAIN:=$(shell cat nightly-toolchain.txt | tr -d '\n')
CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
@@ -58,9 +55,6 @@ REGEX_PATTERN?=''
# tfhe-cuda-backend
TFHECUDA_SRC=backends/tfhe-cuda-backend/cuda
TFHECUDA_BUILD=$(TFHECUDA_SRC)/build
ZKCUDA_SRC=backends/zk-cuda-backend/cuda
ZKCUDA_BUILD=$(ZKCUDA_SRC)/build
ZKCUDARS_SRC=backends/zk-cuda-backend/src
# tfhe-hpu-backend
HPU_CONFIG=v80
@@ -270,23 +264,12 @@ install_mlc:
cargo install mlc --locked || \
( echo "Unable to install mlc, unknown error." && exit 1 )
fmt: FMT_CHECK =
.PHONY: fmt # Format rust code
fmt: fmt_internal
check_fmt: FMT_CHECK = --check
.PHONY: check_fmt # Check rust code format
check_fmt: fmt_internal
.PHONY: fmt_internal # internal recipe for fmt
fmt_internal: install_rs_check_toolchain
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt $(FMT_CHECK)
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C utils/tfhe-lints fmt $(FMT_CHECK)
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C apps/trivium fmt $(FMT_CHECK)
for crate in `ls -1 $(BACKWARD_COMPAT_DATA_DIR)/crates/ | grep generate_`; do \
echo "fmt $$crate"; \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate fmt $(FMT_CHECK); \
done
fmt: install_rs_check_toolchain
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C $(BACKWARD_COMPAT_DATA_DIR) fmt
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C utils/tfhe-lints fmt
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C apps/trivium fmt
.PHONY: fmt_js # Format javascript code
fmt_js: check_nvm_installed
@@ -296,24 +279,10 @@ fmt_js: check_nvm_installed
$(MAKE) -C tfhe/web_wasm_parallel_tests fmt && \
$(MAKE) -C tfhe/js_on_wasm_tests fmt
.PHONY: semgrep_lint_setup_venv # Create venv and install Python dependencies for GPU lint checks
semgrep_lint_setup_venv:
python3 -m venv venv
venv/bin/pip install -r scripts/gpu-lint-requirements.txt
.PHONY: semgrep_and_lint_gpu_code # Run semgrep and lint checks on CUDA backend code
semgrep_and_lint_gpu_code: semgrep_lint_setup_venv
find "$(TFHECUDA_SRC)" -name '*.h' -o -name '*.cuh' -o -name '*.cu' \
| grep -v '/cmake-build-debug/' \
| grep -v '/build/' \
| xargs venv/bin/semgrep --config "$(TFHECUDA_SRC)/.semgrep/release-ordering.yaml" --scan-unknown-extensions
venv/bin/python3 "scripts/check_scratch_cleanup.py"
.PHONY: fmt_gpu # Format rust and cuda code
fmt_gpu: install_rs_check_toolchain
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt
cd "$(TFHECUDA_SRC)" && ./format_tfhe_cuda_backend.sh
cd "$(ZKCUDA_SRC)" && ./format_zk_cuda_backend.sh
.PHONY: fmt_c_tests # Format c tests
fmt_c_tests:
@@ -323,6 +292,13 @@ fmt_c_tests:
fmt_toml: install_taplo
taplo fmt
.PHONY: check_fmt # Check rust code format
check_fmt: install_rs_check_toolchain
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt --check
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C $(BACKWARD_COMPAT_DATA_DIR) fmt --check
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C utils/tfhe-lints fmt --check
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C apps/trivium fmt --check
.PHONY: check_fmt_c_tests # Check C tests format
check_fmt_c_tests:
find tfhe/c_api_tests/ -regex '.*\.\(cpp\|hpp\|cu\|c\|h\)' -exec clang-format --dry-run --Werror -style=file {} \;
@@ -331,7 +307,6 @@ check_fmt_c_tests:
check_fmt_gpu: install_rs_check_toolchain
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt --check
cd "$(TFHECUDA_SRC)" && ./format_tfhe_cuda_backend.sh -c
cd "$(ZKCUDA_SRC)" && ./format_zk_cuda_backend.sh -c
.PHONY: check_fmt_js # Check javascript code format
check_fmt_js: check_nvm_installed
@@ -353,14 +328,14 @@ check_typos: install_typos_checker
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
clippy_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean,shortint,integer,internal-keycache,gpu,gpu-experimental-zk,pbs-stats,extended-types,zk-pok \
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats,extended-types,zk-pok \
--all-targets \
-p tfhe -- --no-deps -D warnings
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
check_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
--features=boolean,shortint,integer,internal-keycache,gpu,gpu-experimental-zk,pbs-stats \
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
--all-targets \
-p tfhe
@@ -374,7 +349,7 @@ clippy_hpu: install_rs_check_toolchain
.PHONY: clippy_gpu_hpu # Run clippy lints on tfhe with "gpu" and "hpu" enabled
clippy_gpu_hpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean,shortint,integer,internal-keycache,gpu,gpu-experimental-zk,hpu,pbs-stats,extended-types,zk-pok \
--features=boolean,shortint,integer,internal-keycache,gpu,hpu,pbs-stats,extended-types,zk-pok \
--all-targets \
-p tfhe -- --no-deps -D warnings
@@ -467,7 +442,7 @@ clippy_rustdoc_gpu: install_rs_check_toolchain
fi && \
CARGO_TERM_QUIET=true CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu,gpu-experimental-zk \
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu \
-p tfhe -- --nocapture
.PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
@@ -543,10 +518,11 @@ clippy_param_dedup: install_rs_check_toolchain
.PHONY: clippy_backward_compat_data # Run clippy lints on tfhe-backward-compat-data
clippy_backward_compat_data: install_rs_check_toolchain # the toolchain is selected with toolchain.toml
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p tfhe-backward-compat-data -- --no-deps -D warnings
@# Some old crates are x86 specific, only run in that case
@if uname -a | grep -q x86; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options \
-C $(BACKWARD_COMPAT_DATA_DIR) clippy --all --all-targets \
-- --no-deps -D warnings; \
for crate in `ls -1 $(BACKWARD_COMPAT_DATA_DIR)/crates/ | grep generate_`; do \
echo "checking $$crate"; \
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options \
@@ -575,8 +551,6 @@ clippy_core clippy_tfhe_csprng
clippy_cuda_backend: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p tfhe-cuda-backend -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p zk-cuda-backend -- --no-deps -D warnings
.PHONY: clippy_hpu_backend # Run clippy lints on the tfhe-hpu-backend
clippy_hpu_backend: install_rs_check_toolchain
@@ -670,7 +644,7 @@ build_c_api: install_rs_check_toolchain
.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
build_c_api_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types,gpu,gpu-experimental-zk \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types,gpu \
-p tfhe
.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
@@ -748,28 +722,8 @@ test_cuda_backend:
"$(MAKE)" -j "$(CPU_COUNT)" && \
"$(MAKE)" test
.PHONY: test_cuda_backend_race_check # Build and run selected CUDA backend tests with Compute Sanitizer racecheck
test_cuda_backend_race_check:
mkdir -p "$(TFHECUDA_BUILD)" && \
cd "$(TFHECUDA_BUILD)" && \
cmake .. -DCMAKE_BUILD_TYPE=Release -DTFHE_CUDA_BACKEND_BUILD_TESTS=ON && \
"$(MAKE)" -j "$(CPU_COUNT)" test_tfhe_cuda_backend && \
compute-sanitizer --tool racecheck --target-processes all ./tests_and_benchmarks/tests/test_tfhe_cuda_backend \
--gtest_filter="*ClassicalProgrammableBootstrap*:*MultiBitProgrammableBootstrap*"
.PHONY: test_zk_cuda_backend # Run the internal tests of the CUDA ZK backend
test_zk_cuda_backend:
mkdir -p "$(ZKCUDA_BUILD)" && \
cd "$(ZKCUDA_BUILD)" && \
cmake .. -DCMAKE_BUILD_TYPE=Release -DZK_CUDA_BACKEND_BUILD_TESTS=ON && \
"$(MAKE)" -j "$(CPU_COUNT)" && \
"$(MAKE)" test
cd "$(ZKCUDARS_SRC)" && \
cargo test --release
.PHONY: test_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend test_zk_cuda_backend
test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
test_core_crypto_gpu:
@@ -809,7 +763,7 @@ test_integer_hl_test_gpu_check_warnings:
--features=integer,internal-keycache,gpu-debug,zk-pok -vv -p tfhe &> /tmp/gpu_compile_output
WARNINGS=$$(cat /tmp/gpu_compile_output | grep ": warning #" | grep "\[tfhe-cuda-backend" | grep -v "inline qualifier" || true) && \
if [[ "$${WARNINGS}" != "" ]]; then \
echo "FAILING BECAUSE CUDA COMPILATION WARNINGS WERE DETECTED: " && \
echo "FAILING BECAUSE CUDA COMPILATION WARNINGS WERE DETECTED: " && \
echo "$${WARNINGS}" && exit 1; \
fi
@@ -966,6 +920,14 @@ test_shortint_ci: install_cargo_nextest
./scripts/shortint-tests.sh \
--cargo-profile "$(CARGO_PROFILE)" --tfhe-package "tfhe"
.PHONY: test_param_prod_shortint_ci # Run the tests for shortint ci
test_param_prod_shortint_ci: install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
FAST_TESTS="$(FAST_TESTS)" \
./scripts/shortint-tests.sh \
--cargo-profile "$(CARGO_PROFILE)" --run-prod-only --tfhe-package "tfhe"
.PHONY: test_shortint_multi_bit_ci # Run the tests for shortint ci running only multibit tests
test_shortint_multi_bit_ci: install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
@@ -1205,31 +1167,12 @@ test_tfhe_csprng_big_endian: install_cargo_cross
RUSTFLAGS="" cross test --profile $(CARGO_PROFILE) \
-p tfhe-csprng --target=powerpc64-unknown-linux-gnu
.PHONY: test_zk_pok # Run tfhe-zk-pok tests
test_zk_pok:
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
-p tfhe-zk-pok --features experimental
.PHONY: test_zk_pok_gpu # Run tfhe-zk-pok GPU-accelerated tests
test_zk_pok_gpu:
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
-p tfhe-zk-pok --features experimental,gpu-experimental -- gpu
.PHONY: test_integer_zk_gpu # Run tfhe-zk-pok tests
test_integer_zk_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile release \
--features=integer,zk-pok,gpu -p tfhe -- \
integer::gpu::zk::
.PHONY: test_integer_zk_experimental_gpu # Run tfhe-zk-pok tests
test_integer_zk_experimental_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile release \
--features=integer,zk-pok,gpu,gpu-experimental-zk -p tfhe -- \
integer::gpu::zk::
.PHONY: test_zk_cuda # Run all GPU MSM integration tests (CPU vs GPU comparison + integration test)
test_zk_cuda: install_rs_check_toolchain test_zk_cuda_backend test_zk_pok_gpu test_integer_zk_gpu test_integer_zk_experimental_gpu
.PHONY: test_zk_wasm_x86_compat_ci
test_zk_wasm_x86_compat_ci: check_nvm_installed
source ~/.nvm/nvm.sh && \
@@ -1503,14 +1446,14 @@ bench_integer_hpu: install_rs_check_toolchain
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
bench_integer_compression: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-glwe_packing_compression \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
.PHONY: bench_integer_compression_gpu
bench_integer_compression_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-glwe_packing_compression \
--features=integer,internal-keycache,gpu,pbs-stats -p tfhe-benchmark --profile release_lto_off --
@@ -1522,47 +1465,26 @@ bench_integer_compression_128b_gpu: install_rs_check_toolchain
--bench glwe_packing_compression_128b-integer-bench \
--features=integer,internal-keycache,gpu,pbs-stats -p tfhe-benchmark --
.PHONY: bench_msm_zk
bench_msm_zk: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench zk-msm \
--features=zk-pok -p tfhe-benchmark --profile release --
.PHONY: bench_msm_zk_gpu
bench_msm_zk_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench zk-msm \
--features=gpu,gpu-experimental-zk,zk-pok -p tfhe-benchmark --profile release --
.PHONY: bench_integer_zk_gpu
bench_integer_zk_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,gpu,pbs-stats,zk-pok -p tfhe-benchmark --profile release --
.PHONY: bench_integer_zk_experimental_gpu
bench_integer_zk_experimental_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,gpu,gpu-experimental-zk,pbs-stats,zk-pok -p tfhe-benchmark --profile release --
--features=integer,internal-keycache,gpu,pbs-stats,zk-pok -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_integer_aes_gpu # Run benchmarks for AES on GPU backend
bench_integer_aes_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-aes \
--features=integer,internal-keycache,gpu -p tfhe-benchmark --profile release_lto_off --
--features=integer,internal-keycache,gpu, -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_integer_aes256_gpu # Run benchmarks for AES256 on GPU backend
bench_integer_aes256_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-aes256 \
--features=integer,internal-keycache,gpu -p tfhe-benchmark --profile release_lto_off --
--features=integer,internal-keycache,gpu, -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_integer_trivium_gpu # Run benchmarks for trivium on GPU backend
bench_integer_trivium_gpu: install_rs_check_toolchain
@@ -1612,7 +1534,7 @@ bench_signed_integer_multi_bit_gpu: install_rs_check_toolchain
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
bench_integer_zk: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,zk-pok,pbs-stats \
@@ -1799,14 +1721,14 @@ bench_hlapi_erc20: install_rs_check_toolchain
.PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ERC20 operations on GPU
bench_hlapi_erc20_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc20 \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_hlapi_erc20_gpu_classical # Run benchmarks for ERC20 operations on GPU with classical parameters
bench_hlapi_erc20_gpu_classical: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=classical \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc20 \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
@@ -1845,13 +1767,6 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench -p tfhe-zk-pok --
.PHONY: bench_tfhe_zk_pok_gpu # Run benchmarks for the tfhe_zk_pok crate using GPU acceleration
bench_tfhe_zk_pok_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--package tfhe-zk-pok \
--features=gpu-experimental --profile release
.PHONY: bench_hlapi_noise_squash # Run benchmarks for noise squash operation
bench_hlapi_noise_squash: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
@@ -1866,108 +1781,6 @@ bench_hlapi_noise_squash_gpu: install_rs_check_toolchain
--bench hlapi-noise-squash \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_hlapi_kvstore # Run benchmarks for Key-Value Store operations
bench_hlapi_kvstore: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-kvstore \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
.PHONY: bench_summary # Run summary benchmarks
bench_summary: install_rs_check_toolchain
# Arithmetic operations: addition, multiplication, division, comparison
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi_unsigned \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::add|::mul|::gt|::div_rem'
# Noise squash
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-noise-squash \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::noise_squash::'
# Noise squash and compression
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-noise-squash \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::decomp_noise_squash_comp::'
# ERC20
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc20 \
--features=integer,internal-keycache -p tfhe-benchmark -- '::transfer::overflow'
# DEX
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-dex \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::no_cmux::'
# ZK
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,zk-pok,pbs-stats \
-p tfhe-benchmark --
# Compression
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-glwe_packing_compression \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
.PHONY: bench_summary_gpu # Run summary benchmarks on GPU
bench_summary_gpu: install_rs_check_toolchain
# Arithmetic operations: addition, multiplication, division, comparison
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=FAST_DEFAULT __TFHE_RS_BENCH_BIT_SIZES_SET=FAST __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::add|::mul|::gt|::div_rem'
# Noise squash
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-noise-squash \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::noise_squash::'
# Noise squash and compression
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-noise-squash \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::decomp_noise_squash_comp::'
# ERC20
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc20 \
--features=integer,gpu,internal-keycache -p tfhe-benchmark --profile release_lto_off -- '::transfer::overflow'
# DEX
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-dex \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::no_cmux::'
# ZK
# Proof is done on CPU node of the instance
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=fast_default __TFHE_RS_BENCH_BIT_SIZES_SET=fast \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,zk-pok,pbs-stats \
-p tfhe-benchmark -- '::pke_zk_proof'
# Verify is done on GPUs
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=fast_default __TFHE_RS_BENCH_BIT_SIZES_SET=fast \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,gpu,pbs-stats,zk-pok -p tfhe-benchmark --
# Compression
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=FAST \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-glwe_packing_compression \
--features=integer,internal-keycache,gpu,pbs-stats -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_custom # Run benchmarks with a user-defined command
bench_custom: install_rs_check_toolchain
@@ -2094,7 +1907,7 @@ pcc_batch_1:
pcc_batch_2:
$(call run_recipe_with_details,clippy)
$(call run_recipe_with_details,clippy_all_targets)
$(call run_recipe_with_details,check_fmt_js) # This needs to stay there, CI pipeline rely on this recipe to conditionally install Node
$(call run_recipe_with_details,check_fmt_js)
$(call run_recipe_with_details,clippy_test_vectors)
$(call run_recipe_with_details,check_test_vectors)

View File

@@ -10,7 +10,7 @@
<hr/>
<p align="center">
<a href="https://github.com/zama-ai/tfhe-rs-handbook/blob/main/tfhe-rs-handbook.pdf"> 📃 Read Handbook</a> |<a href="https://docs.zama.org/tfhe-rs"> 📒 Documentation</a> | <a href="https://www.zama.org/community-channels"> 💛 Community support</a> | <a href="https://github.com/zama-ai/awesome-zama"> 📚 FHE resources by Zama</a>
<a href="https://github.com/zama-ai/tfhe-rs-handbook/blob/main/tfhe-rs-handbook.pdf"> 📃 Read Handbook</a> |<a href="https://docs.zama.ai/tfhe-rs"> 📒 Documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a> | <a href="https://github.com/zama-ai/awesome-zama"> 📚 FHE resources by Zama</a>
</p>
@@ -47,7 +47,7 @@ production-ready library for all the advanced features of TFHE.
- **Ciphertext and server key compression** for efficient data transfer
- **Full Rust API, C bindings to the Rust High-Level API, and client-side JavaScript API using WASM**.
*Learn more about TFHE-rs features in the [documentation](https://docs.zama.org/tfhe-rs).*
*Learn more about TFHE-rs features in the [documentation](https://docs.zama.ai/tfhe-rs/readme).*
<br></br>
## Table of Contents
@@ -149,7 +149,7 @@ To run this code, use the following command:
> Note that when running code that uses `TFHE-rs`, it is highly recommended
to run in release mode with cargo's `--release` flag to have the best performance possible.
*Find an example with more explanations in [this part of the documentation](https://docs.zama.org/tfhe-rs/get-started/quick-start)*
*Find an example with more explanations in [this part of the documentation](https://docs.zama.ai/tfhe-rs/get-started/quick-start)*
<p align="right">
<a href="#about" > ↑ Back to top </a>
@@ -163,25 +163,25 @@ to run in release mode with cargo's `--release` flag to have the best performanc
A document containing scientific and technical details about algorithms implemented into the library is available here: [TFHE-rs: A (Practical) Handbook](https://github.com/zama-ai/tfhe-rs-handbook/blob/main/tfhe-rs-handbook.pdf).
### TFHE deep dive
- [TFHE Deep Dive - Part I - Ciphertext types](https://www.zama.org/post/tfhe-deep-dive-part-1)
- [TFHE Deep Dive - Part II - Encodings and linear leveled operations](https://www.zama.org/post/tfhe-deep-dive-part-2)
- [TFHE Deep Dive - Part III - Key switching and leveled multiplications](https://www.zama.org/post/tfhe-deep-dive-part-3)
- [TFHE Deep Dive - Part IV - Programmable Bootstrapping](https://www.zama.org/post/tfhe-deep-dive-part-4)
- [TFHE Deep Dive - Part I - Ciphertext types](https://www.zama.ai/post/tfhe-deep-dive-part-1)
- [TFHE Deep Dive - Part II - Encodings and linear leveled operations](https://www.zama.ai/post/tfhe-deep-dive-part-2)
- [TFHE Deep Dive - Part III - Key switching and leveled multiplications](https://www.zama.ai/post/tfhe-deep-dive-part-3)
- [TFHE Deep Dive - Part IV - Programmable Bootstrapping](https://www.zama.ai/post/tfhe-deep-dive-part-4)
<br></br>
### Tutorials
- [Video tutorial: Implement signed integers using TFHE-rs](https://www.zama.org/post/video-tutorial-implement-signed-integers-sing-tfhe-rs)
- [Homomorphic parity bit](https://docs.zama.org/tfhe-rs/tutorials/parity-bit)
- [Homomorphic case changing on Ascii string](https://docs.zama.org/tfhe-rs/tutorials/ascii-fhe-string)
- [Boolean SHA256 with TFHE-rs](https://www.zama.org/post/boolean-sha256-tfhe-rs)
- [Dark market with TFHE-rs](https://www.zama.org/post/dark-market-tfhe-rs)
- [Regular expression engine with TFHE-rs](https://www.zama.org/post/regex-engine-tfhe-rs)
- [[Video tutorial] Implement signed integers using TFHE-rs ](https://www.zama.ai/post/video-tutorial-implement-signed-integers-sing-tfhe-rs)
- [Homomorphic parity bit](https://docs.zama.ai/tfhe-rs/tutorials/parity-bit)
- [Homomorphic case changing on Ascii string](https://docs.zama.ai/tfhe-rs/tutorials/ascii-fhe-string)
- [Boolean SHA256 with TFHE-rs](https://www.zama.ai/post/boolean-sha256-tfhe-rs)
- [Dark market with TFHE-rs](https://www.zama.ai/post/dark-market-tfhe-rs)
- [Regular expression engine with TFHE-rs](https://www.zama.ai/post/regex-engine-tfhe-rs)
*Explore more useful resources in [TFHE-rs tutorials](https://docs.zama.org/tfhe-rs/tutorials) and [Awesome Zama repo](https://github.com/zama-ai/awesome-zama)*
*Explore more useful resources in [TFHE-rs tutorials](https://docs.zama.ai/tfhe-rs/tutorials) and [Awesome Zama repo](https://github.com/zama-ai/awesome-zama)*
<br></br>
### Documentation
Full, comprehensive documentation is available here: [https://docs.zama.org/tfhe-rs](https://docs.zama.org/tfhe-rs).
Full, comprehensive documentation is available here: [https://docs.zama.ai/tfhe-rs](https://docs.zama.ai/tfhe-rs).
<p align="right">
<a href="#about" > ↑ Back to top </a>
</p>
@@ -202,7 +202,7 @@ When a new update is published in the Lattice Estimator, we update parameters ac
### Security model
By default, the parameter sets used in the High-Level API have a failure probability $\le 2^{-128}$ to securely work in the IND-CPA^D model using the algorithmic techniques provided in our code base [1].
If you want to work within the IND-CPA security model, which is less strict than the IND-CPA-D model, the parameter sets can easily be changed and would have slightly better performance. More details can be found in the [TFHE-rs documentation](https://docs.zama.org/tfhe-rs).
If you want to work within the IND-CPA security model, which is less strict than the IND-CPA-D model, the parameter sets can easily be changed and would have slightly better performance. More details can be found in the [TFHE-rs documentation](https://docs.zama.ai/tfhe-rs).
[1] Bernard, Olivier, et al. "Drifting Towards Better Error Probabilities in Fully Homomorphic Encryption Schemes". https://eprint.iacr.org/2024/1718.pdf
@@ -231,7 +231,7 @@ To cite TFHE-rs in academic papers, please use the following entry:
There are two ways to contribute to TFHE-rs:
- [Open issues](https://github.com/zama-ai/tfhe-rs/issues/new/choose) to report bugs and typos, or to suggest new ideas
- Request to become an official contributor by emailing [hello@zama.org](mailto:hello@zama.org).
- Request to become an official contributor by emailing [hello@zama.ai](mailto:hello@zama.ai).
Becoming an approved contributor involves signing our Contributor License Agreement (CLA). Only approved contributors can send pull requests, so please make sure to get in touch before you do!
<br></br>
@@ -243,16 +243,16 @@ This software is distributed under the **BSD-3-Clause-Clear** license. Read [thi
**Is Zamas technology free to use?**
>Zamas libraries are free to use under the BSD 3-Clause Clear license only for development, research, prototyping, and experimentation purposes. However, for any commercial use of Zama's open source code, companies must purchase Zamas commercial patent license.
>
>Everything we do is open source and we are very transparent on what it means for our users, you can read more about how we monetize our open source products at Zama in [this blogpost](https://www.zama.org/post/open-source).
>Everything we do is open source and we are very transparent on what it means for our users, you can read more about how we monetize our open source products at Zama in [this blogpost](https://www.zama.ai/post/open-source).
**What do I need to do if I want to use Zamas technology for commercial purposes?**
>To commercially use Zamas technology you need to be granted Zamas patent license. Please contact us hello@zama.org for more information.
>To commercially use Zamas technology you need to be granted Zamas patent license. Please contact us hello@zama.ai for more information.
**Do you file IP on your technology?**
>Yes, all Zamas technologies are patented.
**Can you customize a solution for my specific use case?**
>We are open to collaborating and advancing the FHE space with our partners. If you have specific needs, please email us at hello@zama.org.
>We are open to collaborating and advancing the FHE space with our partners. If you have specific needs, please email us at hello@zama.ai.
<p align="right">
<a href="#about" > ↑ Back to top </a>
@@ -261,7 +261,7 @@ This software is distributed under the **BSD-3-Clause-Clear** license. Read [thi
## Support
<a target="_blank" href="https://community.zama.org">
<a target="_blank" href="https://community.zama.ai">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/zama-ai/tfhe-rs/assets/157474013/08656d0a-3f44-4126-b8b6-8c601dff5380">
<source media="(prefers-color-scheme: light)" srcset="https://github.com/zama-ai/tfhe-rs/assets/157474013/1c9c9308-50ac-4aab-a4b9-469bb8c536a4">

View File

@@ -5,16 +5,16 @@ edition = "2021"
authors = ["Zama team"]
license = "BSD-3-Clause-Clear"
description = "Cuda implementation of TFHE-rs primitives."
homepage = "https://www.zama.org/"
documentation = "https://docs.zama.org/tfhe-rs"
homepage = "https://www.zama.ai/"
documentation = "https://docs.zama.ai/tfhe-rs"
repository = "https://github.com/zama-ai/tfhe-rs"
readme = "README.md"
keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
[build-dependencies]
cmake.workspace = true
pkg-config.workspace = true
bindgen.workspace = true
cmake = { version = "0.1" }
pkg-config = { version = "0.3" }
bindgen = "0.71"
[features]
experimental-multi-arch = []

View File

@@ -51,4 +51,4 @@ If your machine does not have an available Nvidia GPU, the compilation will work
## License
This software is distributed under the BSD-3-Clause-Clear license. If you have any questions,
please contact us at `hello@zama.org`.
please contact us at `hello@zama.ai`.

View File

@@ -1,64 +0,0 @@
rules:
- id: release-missing-cuda-synchronize
message: >-
release() method does not call cuda_synchronize_stream or delegate to
another release(). All release methods must synchronize the CUDA stream
(directly or via delegation) to ensure async GPU operations complete
before returning.
severity: ERROR
languages: [cpp]
paths:
exclude:
- "**/helper_multi_gpu.h"
patterns:
- pattern: |
void release(...) {
...
}
- pattern-not: |
void release(...) {
...
cuda_synchronize_stream($S.stream(0), ...);
...
}
- pattern-not: |
void release(cudaStream_t stream, ...) {
...
cuda_synchronize_stream(stream, ...);
...
}
- pattern-not: |
void release(...) {
...
$MEM->release(...);
...
}
- id: cleanup-missing-release-or-synchronize
message: >-
cleanup_ function does not call release() or cuda_synchronize_stream().
All non-async cleanup_ functions must either call release() on a memory
structure or synchronize the CUDA stream.
severity: ERROR
languages: [cpp]
patterns:
- pattern: |
void $FUNC(...) {
...
}
- metavariable-regex:
metavariable: $FUNC
regex: ^cleanup_.*(?<!_async)$
- pattern-not: |
void $FUNC(...) {
...
$MEM->release(...);
...
}
- pattern-not: |
void $FUNC(...) {
...
cuda_synchronize_stream(...);
...
}

View File

@@ -3,7 +3,7 @@
#include "../integer/integer.h"
extern "C" {
uint64_t scratch_cuda_integer_aes_ctr_encrypt_64_async(
uint64_t scratch_cuda_integer_aes_encrypt_64(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
@@ -12,28 +12,18 @@ uint64_t scratch_cuda_integer_aes_ctr_encrypt_64_async(
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_aes_inputs,
uint32_t sbox_parallelism);
uint64_t scratch_cuda_integer_aes_ctr_256_encrypt_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_aes_inputs,
uint32_t sbox_parallelism);
void cuda_integer_aes_ctr_encrypt_64(CudaStreamsFFI streams,
CudaRadixCiphertextFFI *output,
CudaRadixCiphertextFFI const *iv,
CudaRadixCiphertextFFI const *round_keys,
const uint64_t *counter_bits_le_all_blocks,
uint32_t num_aes_inputs, int8_t *mem_ptr,
void *const *bsks, void *const *ksks);
void cuda_integer_aes_ctr_encrypt_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *output,
CudaRadixCiphertextFFI const *iv, CudaRadixCiphertextFFI const *round_keys,
const uint64_t *counter_bits_le_all_blocks, uint32_t num_aes_inputs,
int8_t *mem_ptr, void *const *bsks, void *const *ksks);
void cleanup_cuda_integer_aes_encrypt_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
void cleanup_cuda_integer_aes_ctr_encrypt_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
void cleanup_cuda_integer_aes_ctr_256_encrypt_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_key_expansion_64_async(
uint64_t scratch_cuda_integer_key_expansion_64(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
@@ -41,22 +31,22 @@ uint64_t scratch_cuda_integer_key_expansion_64_async(
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_key_expansion_64_async(CudaStreamsFFI streams,
CudaRadixCiphertextFFI *expanded_keys,
CudaRadixCiphertextFFI const *key,
int8_t *mem_ptr, void *const *bsks,
void *const *ksks);
void cuda_integer_key_expansion_64(CudaStreamsFFI streams,
CudaRadixCiphertextFFI *expanded_keys,
CudaRadixCiphertextFFI const *key,
int8_t *mem_ptr, void *const *bsks,
void *const *ksks);
void cleanup_cuda_integer_key_expansion_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
void cuda_integer_aes_ctr_256_encrypt_64_async(
void cuda_integer_aes_ctr_256_encrypt_64(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *output,
CudaRadixCiphertextFFI const *iv, CudaRadixCiphertextFFI const *round_keys,
const uint64_t *counter_bits_le_all_blocks, uint32_t num_aes_inputs,
int8_t *mem_ptr, void *const *bsks, void *const *ksks);
uint64_t scratch_cuda_integer_key_expansion_256_64_async(
uint64_t scratch_cuda_integer_key_expansion_256_64(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
@@ -64,10 +54,11 @@ uint64_t scratch_cuda_integer_key_expansion_256_64_async(
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_key_expansion_256_64_async(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *expanded_keys,
CudaRadixCiphertextFFI const *key, int8_t *mem_ptr, void *const *bsks,
void *const *ksks);
void cuda_integer_key_expansion_256_64(CudaStreamsFFI streams,
CudaRadixCiphertextFFI *expanded_keys,
CudaRadixCiphertextFFI const *key,
int8_t *mem_ptr, void *const *bsks,
void *const *ksks);
void cleanup_cuda_integer_key_expansion_256_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);

View File

@@ -34,7 +34,7 @@ template <typename Torus> struct int_aes_lut_buffers {
SBOX_MAX_AND_GATES * num_aes_inputs * sbox_parallelism,
params.pbs_type);
this->and_lut->generate_and_broadcast_bivariate_lut(
active_streams_and_lut, {0}, {and_lambda}, LUT_0_FOR_ALL_BLOCKS);
active_streams_and_lut, {0}, {and_lambda}, allocate_gpu_memory);
this->and_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
@@ -48,7 +48,7 @@ template <typename Torus> struct int_aes_lut_buffers {
auto active_streams_flush_lut = streams.active_gpu_subset(
AES_STATE_BITS * num_aes_inputs, params.pbs_type);
this->flush_lut->generate_and_broadcast_lut(
active_streams_flush_lut, {0}, {flush_lambda}, LUT_0_FOR_ALL_BLOCKS);
active_streams_flush_lut, {0}, {flush_lambda}, allocate_gpu_memory);
this->flush_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
this->carry_lut = new int_radix_lut<Torus>(
@@ -60,7 +60,7 @@ template <typename Torus> struct int_aes_lut_buffers {
auto active_streams_carry_lut =
streams.active_gpu_subset(num_aes_inputs, params.pbs_type);
this->carry_lut->generate_and_broadcast_lut(
active_streams_carry_lut, {0}, {carry_lambda}, LUT_0_FOR_ALL_BLOCKS);
active_streams_carry_lut, {0}, {carry_lambda}, allocate_gpu_memory);
this->carry_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
}
@@ -179,11 +179,11 @@ template <typename Torus> struct int_aes_counter_workspaces {
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
this->h_counter_bits_buffer =
(Torus *)malloc(safe_mul_sizeof<Torus>(num_aes_inputs));
size_tracker += safe_mul_sizeof<Torus>(num_aes_inputs);
(Torus *)malloc(num_aes_inputs * sizeof(Torus));
size_tracker += num_aes_inputs * sizeof(Torus);
this->d_counter_bits_buffer = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(num_aes_inputs), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
num_aes_inputs * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
size_tracker, allocate_gpu_memory);
}
void release(CudaStreams streams, bool allocate_gpu_memory) {

View File

@@ -1,35 +0,0 @@
#pragma once
#include <cstddef>
#include <cstdio>
#include "device.h"
// Variadic checked multiplication of size_t values.
// Folds left-to-right using __builtin_mul_overflow, returning true on overflow.
// On overflow the value written to *out is unspecified.
template <typename... Args>
inline bool checked_mul(size_t *out, size_t first, Args... rest) {
size_t result = first;
for (size_t value : {static_cast<size_t>(rest)...}) {
if (__builtin_mul_overflow(result, value, &result))
return true;
}
*out = result;
return false;
}
// Variadic safe multiplication: computes the product and panics on overflow.
template <typename... Args> inline size_t safe_mul(size_t first, Args... rest) {
size_t result;
bool overflow = checked_mul(&result, first, rest...);
PANIC_IF_FALSE(!overflow, "multiplication overflow wraps size_t");
return result;
}
// Variadic safe multiplication with an appended sizeof(T) factor.
// Computes (args... * sizeof(T)) with overflow checking.
template <typename T, typename... Args>
inline size_t safe_mul_sizeof(Args... args) {
return safe_mul(args..., sizeof(T));
}

View File

@@ -5,36 +5,39 @@
extern "C" {
void cuda_convert_lwe_ciphertext_vector_to_gpu_64_async(
void *stream, uint32_t gpu_index, void *dest, void const *src,
uint32_t number_of_cts, uint32_t lwe_dimension);
void cuda_convert_lwe_ciphertext_vector_to_cpu_64_async(
void *stream, uint32_t gpu_index, void *dest, void const *src,
uint32_t number_of_cts, uint32_t lwe_dimension);
void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *stream,
uint32_t gpu_index,
void *dest, void const *src,
uint32_t number_of_cts,
uint32_t lwe_dimension);
void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *stream,
uint32_t gpu_index,
void *dest, void const *src,
uint32_t number_of_cts,
uint32_t lwe_dimension);
void cuda_glwe_sample_extract_64_async(
void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index,
void *lwe_array_out, void const *glwe_array_in,
uint32_t const *nth_array, uint32_t num_nths,
uint32_t lwe_per_glwe, uint32_t glwe_dimension,
uint32_t polynomial_size);
void cuda_modulus_switch_inplace_64(void *stream, uint32_t gpu_index,
void *lwe_array_out, uint32_t size,
uint32_t log_modulus);
void cuda_modulus_switch_64(void *stream, uint32_t gpu_index, void *lwe_out,
const void *lwe_in, uint32_t size,
uint32_t log_modulus);
void cuda_centered_modulus_switch_64(void *stream, uint32_t gpu_index,
void *lwe_out, const void *lwe_in,
uint32_t lwe_dimension,
uint32_t log_modulus);
void cuda_glwe_sample_extract_128(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *glwe_array_in, uint32_t const *nth_array, uint32_t num_nths,
uint32_t num_lwes_to_extract_per_glwe, uint32_t num_lwes_stored_per_glwe,
uint32_t glwe_dimension, uint32_t polynomial_size);
void cuda_modulus_switch_inplace_64_async(void *stream, uint32_t gpu_index,
void *lwe_array_out, uint32_t size,
uint32_t log_modulus);
void cuda_modulus_switch_64_async(void *stream, uint32_t gpu_index,
void *lwe_out, const void *lwe_in,
uint32_t size, uint32_t log_modulus);
void cuda_centered_modulus_switch_64_async(void *stream, uint32_t gpu_index,
void *lwe_out, const void *lwe_in,
uint32_t lwe_dimension,
uint32_t log_modulus);
void cuda_glwe_sample_extract_128_async(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *glwe_array_in, uint32_t const *nth_array, uint32_t num_nths,
uint32_t num_lwes_to_extract_per_glwe, uint32_t num_lwes_stored_per_glwe,
uint32_t glwe_dimension, uint32_t polynomial_size);
uint32_t lwe_per_glwe, uint32_t glwe_dimension, uint32_t polynomial_size);
}
#endif

View File

@@ -132,8 +132,6 @@ void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index);
uint32_t cuda_get_max_shared_memory(uint32_t gpu_index);
uint32_t cuda_get_max_shared_memory_per_block(uint32_t gpu_index);
bool cuda_check_support_cooperative_groups();
bool cuda_check_support_thread_block_clusters();

View File

@@ -150,9 +150,9 @@ public:
_gpu_count(src._gpu_count), _owns_streams(false) {}
CudaStreams &operator=(CudaStreams const &other) {
/* PANIC_IF_FALSE(this->_streams == nullptr ||
this->_streams == other._streams,
"Assigning an already initialized CudaStreams");*/
PANIC_IF_FALSE(this->_streams == nullptr ||
this->_streams == other._streams,
"Assigning an already initialized CudaStreams");
this->_streams = other._streams;
this->_gpu_indexes = other._gpu_indexes;
this->_gpu_count = other._gpu_count;

View File

@@ -47,7 +47,7 @@ template <typename Torus> struct boolean_bitop_buffer {
// only lut for degree = 1 is generated
lut->generate_and_broadcast_bivariate_lut(active_streams, {0},
{lut_bivariate_f},
LUT_0_FOR_ALL_BLOCKS, {}, 2);
gpu_memory_allocated, {}, 2);
}
break;
default:
@@ -63,7 +63,7 @@ template <typename Torus> struct boolean_bitop_buffer {
};
message_extract_lut->generate_and_broadcast_lut(
active_streams, {0}, {lut_f_message_extract}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {lut_f_message_extract}, gpu_memory_allocated);
}
tmp_lwe_left = new CudaRadixCiphertextFFI;
create_zero_radix_ciphertext_async<Torus>(
@@ -134,7 +134,7 @@ template <typename Torus> struct int_bitop_buffer {
};
lut->generate_and_broadcast_bivariate_lut(
active_streams, {0}, {lut_bivariate_f}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {lut_bivariate_f}, gpu_memory_allocated);
}
break;
default:
@@ -166,7 +166,7 @@ template <typename Torus> struct int_bitop_buffer {
}
lut->generate_and_broadcast_lut(active_streams, lut_indices, lut_funcs,
LUT_0_FOR_ALL_BLOCKS);
gpu_memory_allocated);
}
}
@@ -203,7 +203,7 @@ template <typename Torus> struct boolean_bitnot_buffer {
streams.active_gpu_subset(lwe_ciphertext_count, params.pbs_type);
message_extract_lut->generate_and_broadcast_lut(
active_streams, {0}, {lut_f_message_extract}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {lut_f_message_extract}, gpu_memory_allocated);
}
}

View File

@@ -37,7 +37,7 @@ template <typename Torus> struct int_extend_radix_with_sign_msb_buffer {
const auto sign_bit = (xm >> (bits_per_block - 1)) & 1;
return (Torus)((msg_modulus - 1) * sign_bit);
}},
LUT_0_FOR_ALL_BLOCKS);
allocate_gpu_memory);
this->last_block = new CudaRadixCiphertextFFI;

View File

@@ -85,28 +85,30 @@ template <typename Torus> struct int_cmux_buffer {
new int_radix_lut<Torus>(streams, params, 1, num_radix_blocks,
allocate_gpu_memory, size_tracker);
Torus *h_lut_indexes = predicate_lut->h_lut_indexes;
for (int index = 0; index < 2 * num_radix_blocks; index++) {
if (index < num_radix_blocks) {
h_lut_indexes[index] = 0;
} else {
h_lut_indexes[index] = 1;
}
}
cuda_memcpy_with_size_tracking_async_to_gpu(
predicate_lut->get_lut_indexes(0, 0), h_lut_indexes,
2 * num_radix_blocks * sizeof(Torus), streams.stream(0),
streams.gpu_index(0), allocate_gpu_memory);
auto active_streams_pred =
streams.active_gpu_subset(2 * num_radix_blocks, params.pbs_type);
auto lut_index_generator = [num_radix_blocks](Torus *h_lut_indexes,
uint32_t num_indexes) {
for (int index = 0; index < 2 * num_radix_blocks; index++) {
if (index < num_radix_blocks) {
h_lut_indexes[index] = 0;
} else {
h_lut_indexes[index] = 1;
}
}
};
predicate_lut->generate_and_broadcast_bivariate_lut(
active_streams_pred, {0, 1}, {inverted_lut_f, lut_f},
lut_index_generator);
gpu_memory_allocated);
auto active_streams_msg =
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
message_extract_lut->generate_and_broadcast_lut(
active_streams_msg, {0}, {message_extract_lut_f}, LUT_0_FOR_ALL_BLOCKS);
active_streams_msg, {0}, {message_extract_lut_f}, gpu_memory_allocated);
}
void release(CudaStreams streams) {

View File

@@ -1,5 +1,4 @@
#pragma once
#include "checked_arithmetic.h"
#include "cmux.h"
#include "integer_utilities.h"
@@ -40,8 +39,8 @@ template <typename Torus> struct int_are_all_block_true_buffer {
max_chunks, params.big_lwe_dimension, size_tracker,
allocate_gpu_memory);
preallocated_h_lut = (Torus *)malloc(safe_mul_sizeof<Torus>(
params.glwe_dimension + 1, params.polynomial_size));
preallocated_h_lut = (Torus *)malloc(
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus));
is_max_value = new int_radix_lut<Torus>(streams, params, 2, max_chunks,
allocate_gpu_memory, size_tracker);
@@ -54,7 +53,7 @@ template <typename Torus> struct int_are_all_block_true_buffer {
};
is_max_value->generate_and_broadcast_lut(
active_streams, {0}, {is_max_value_f}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {is_max_value_f}, gpu_memory_allocated);
}
void release(CudaStreams streams) {
@@ -106,7 +105,7 @@ template <typename Torus> struct int_comparison_eq_buffer {
auto active_streams =
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
is_non_zero_lut->generate_and_broadcast_lut(
active_streams, {0}, {is_non_zero_lut_f}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {is_non_zero_lut_f}, gpu_memory_allocated);
// Scalar may have up to num_radix_blocks blocks
scalar_comparison_luts = new int_radix_lut<Torus>(
@@ -136,7 +135,7 @@ template <typename Torus> struct int_comparison_eq_buffer {
}
scalar_comparison_luts->generate_and_broadcast_lut(
active_streams, lut_indices, lut_funcs, LUT_0_FOR_ALL_BLOCKS);
active_streams, lut_indices, lut_funcs, gpu_memory_allocated);
if (op == COMPARISON_TYPE::EQ || op == COMPARISON_TYPE::NE) {
operator_lut =
@@ -144,7 +143,7 @@ template <typename Torus> struct int_comparison_eq_buffer {
allocate_gpu_memory, size_tracker);
operator_lut->generate_and_broadcast_bivariate_lut(
active_streams, {0}, {operator_f}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {operator_f}, gpu_memory_allocated);
} else {
operator_lut = nullptr;
}
@@ -215,8 +214,8 @@ template <typename Torus> struct int_tree_sign_reduction_buffer {
tree_last_leaf_lut = new int_radix_lut<Torus>(
streams, params, 1, 1, allocate_gpu_memory, size_tracker);
preallocated_h_lut = (Torus *)malloc(safe_mul_sizeof<Torus>(
params.glwe_dimension + 1, params.polynomial_size));
preallocated_h_lut = (Torus *)malloc(
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus));
tree_last_leaf_scalar_lut = new int_radix_lut<Torus>(
streams, params, 1, 1, allocate_gpu_memory, size_tracker);
@@ -228,7 +227,7 @@ template <typename Torus> struct int_tree_sign_reduction_buffer {
auto active_streams =
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
tree_inner_leaf_lut->generate_and_broadcast_bivariate_lut(
active_streams, {0}, {block_selector_f}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {block_selector_f}, allocate_gpu_memory);
}
void release(CudaStreams streams) {
@@ -308,10 +307,10 @@ template <typename Torus> struct int_comparison_diff_buffer {
reduce_signs_lut =
new int_radix_lut<Torus>(streams, params, 1, num_radix_blocks,
allocate_gpu_memory, size_tracker);
preallocated_h_lut1 = (Torus *)malloc(safe_mul_sizeof<Torus>(
params.glwe_dimension + 1, params.polynomial_size));
preallocated_h_lut2 = (Torus *)malloc(safe_mul_sizeof<Torus>(
params.glwe_dimension + 1, params.polynomial_size));
preallocated_h_lut1 = (Torus *)malloc(
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus));
preallocated_h_lut2 = (Torus *)malloc(
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus));
}
void release(CudaStreams streams) {
@@ -413,7 +412,7 @@ template <typename Torus> struct int_comparison_buffer {
allocate_gpu_memory, size_tracker);
identity_lut->generate_and_broadcast_lut(
active_streams, {0}, {identity_lut_f}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {identity_lut_f}, gpu_memory_allocated);
uint32_t total_modulus = params.message_modulus * params.carry_modulus;
auto is_zero_f = [total_modulus](Torus x) -> Torus {
@@ -424,7 +423,7 @@ template <typename Torus> struct int_comparison_buffer {
allocate_gpu_memory, size_tracker);
is_zero_lut->generate_and_broadcast_lut(active_streams, {0}, {is_zero_f},
LUT_0_FOR_ALL_BLOCKS);
gpu_memory_allocated);
switch (op) {
case COMPARISON_TYPE::MAX:
@@ -501,10 +500,10 @@ template <typename Torus> struct int_comparison_buffer {
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
signed_lut->generate_and_broadcast_bivariate_lut(
active_streams, {0}, {signed_lut_f}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {signed_lut_f}, gpu_memory_allocated);
}
preallocated_h_lut = (Torus *)malloc(safe_mul_sizeof<Torus>(
params.glwe_dimension + 1, params.polynomial_size));
preallocated_h_lut = (Torus *)malloc(
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus));
}
void release(CudaStreams streams) {

View File

@@ -5,15 +5,14 @@
#include "../integer.h"
extern "C" {
uint64_t scratch_cuda_integer_compress_radix_ciphertext_64_async(
uint64_t scratch_cuda_integer_compress_radix_ciphertext_64(
CudaStreamsFFI streams, int8_t **mem_ptr,
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t num_lwes_stored_per_glwe,
bool allocate_gpu_memory);
PBS_TYPE pbs_type, uint32_t lwe_per_glwe, bool allocate_gpu_memory);
uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64_async(
uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64(
CudaStreamsFFI streams, int8_t **mem_ptr,
uint32_t encryption_glwe_dimension, uint32_t encryption_polynomial_size,
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
@@ -22,12 +21,12 @@ uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64_async(
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_integer_compress_radix_ciphertext_64_async(
void cuda_integer_compress_radix_ciphertext_64(
CudaStreamsFFI streams, CudaPackedGlweCiphertextListFFI *glwe_array_out,
CudaLweCiphertextListFFI const *lwe_array_in, void *const *fp_ksk,
int8_t *mem_ptr);
void cuda_integer_decompress_radix_ciphertext_64_async(
void cuda_integer_decompress_radix_ciphertext_64(
CudaStreamsFFI streams, CudaLweCiphertextListFFI *lwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_in,
uint32_t const *indexes_array, void *const *bsks, int8_t *mem_ptr);
@@ -38,26 +37,25 @@ void cleanup_cuda_integer_compress_radix_ciphertext_64(CudaStreamsFFI streams,
void cleanup_cuda_integer_decompress_radix_ciphertext_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_compress_radix_ciphertext_128_async(
uint64_t scratch_cuda_integer_compress_radix_ciphertext_128(
CudaStreamsFFI streams, int8_t **mem_ptr,
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, uint32_t num_lwes_stored_per_glwe,
bool allocate_gpu_memory);
PBS_TYPE pbs_type, uint32_t lwe_per_glwe, bool allocate_gpu_memory);
uint64_t scratch_cuda_integer_decompress_radix_ciphertext_128_async(
uint64_t scratch_cuda_integer_decompress_radix_ciphertext_128(
CudaStreamsFFI streams, int8_t **mem_ptr,
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
uint32_t lwe_dimension, uint32_t num_radix_blocks, uint32_t message_modulus,
uint32_t carry_modulus, bool allocate_gpu_memory);
void cuda_integer_compress_radix_ciphertext_128_async(
void cuda_integer_compress_radix_ciphertext_128(
CudaStreamsFFI streams, CudaPackedGlweCiphertextListFFI *glwe_array_out,
CudaLweCiphertextListFFI const *lwe_array_in, void *const *fp_ksk,
int8_t *mem_ptr);
void cuda_integer_decompress_radix_ciphertext_128_async(
void cuda_integer_decompress_radix_ciphertext_128(
CudaStreamsFFI streams, CudaLweCiphertextListFFI *lwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_in,
uint32_t const *indexes_array, int8_t *mem_ptr);
@@ -68,12 +66,12 @@ void cleanup_cuda_integer_compress_radix_ciphertext_128(CudaStreamsFFI streams,
void cleanup_cuda_integer_decompress_radix_ciphertext_128(
CudaStreamsFFI streams, int8_t **mem_ptr_void);
void cuda_integer_extract_glwe_128_async(
void cuda_integer_extract_glwe_128(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index);
void cuda_integer_extract_glwe_64_async(
void cuda_integer_extract_glwe_64(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index);

View File

@@ -10,37 +10,35 @@ template <typename Torus> struct int_compression {
Torus *tmp_lwe;
Torus *tmp_glwe_array_out;
bool gpu_memory_allocated;
uint32_t num_lwes_stored_per_glwe;
uint32_t lwe_per_glwe;
uint32_t max_num_glwes;
// num_radix_blocks: total number of LWE ciphertexts (radix blocks) to
// compress num_lwes_stored_per_glwe: max LWEs packed per GLWE (<=
// polynomial_size), defined by the chosen parameter set
// compress lwe_per_glwe: max LWEs packed per GLWE (= polynomial_size),
// defined by the chosen parameter set
int_compression(CudaStreams streams, int_radix_params compression_params,
uint32_t num_radix_blocks, uint32_t num_lwes_stored_per_glwe,
uint32_t num_radix_blocks, uint32_t lwe_per_glwe,
bool allocate_gpu_memory, uint64_t &size_tracker) {
gpu_memory_allocated = allocate_gpu_memory;
this->compression_params = compression_params;
this->num_lwes_stored_per_glwe = num_lwes_stored_per_glwe;
this->lwe_per_glwe = lwe_per_glwe;
uint64_t glwe_accumulator_size = (compression_params.glwe_dimension + 1) *
compression_params.polynomial_size;
// Calculate the actual number of GLWEs needed based on total radix blocks.
// This ensures we allocate enough memory when num_radix_blocks >
// num_lwes_stored_per_glwe.
max_num_glwes = CEIL_DIV(num_radix_blocks, num_lwes_stored_per_glwe);
// lwe_per_glwe.
max_num_glwes = CEIL_DIV(num_radix_blocks, lwe_per_glwe);
tmp_lwe = static_cast<Torus *>(cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(
(size_t)num_radix_blocks,
(size_t)(compression_params.small_lwe_dimension + 1)),
num_radix_blocks * (compression_params.small_lwe_dimension + 1) *
sizeof(Torus),
streams.stream(0), streams.gpu_index(0), size_tracker,
allocate_gpu_memory));
tmp_glwe_array_out =
static_cast<Torus *>(cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>((size_t)max_num_glwes,
glwe_accumulator_size),
max_num_glwes * glwe_accumulator_size * sizeof(Torus),
streams.stream(0), streams.gpu_index(0), size_tracker,
allocate_gpu_memory));
@@ -53,21 +51,12 @@ template <typename Torus> struct int_compression {
void release(CudaStreams streams) {
cuda_drop_with_size_tracking_async(
tmp_lwe, streams.stream(0), streams.gpu_index(0), gpu_memory_allocated);
tmp_lwe = nullptr;
cuda_drop_with_size_tracking_async(tmp_glwe_array_out, streams.stream(0),
streams.gpu_index(0),
gpu_memory_allocated);
tmp_glwe_array_out = nullptr;
if constexpr (sizeof(Torus) == 8)
cleanup_cuda_packing_keyswitch_lwe_list_to_glwe_64(
streams.stream(0), streams.gpu_index(0), &fp_ks_buffer,
gpu_memory_allocated);
else
cleanup_cuda_packing_keyswitch_lwe_list_to_glwe_128(
streams.stream(0), streams.gpu_index(0), &fp_ks_buffer,
gpu_memory_allocated);
cleanup_packing_keyswitch_lwe_list_to_glwe(
streams.stream(0), streams.gpu_index(0), &fp_ks_buffer,
gpu_memory_allocated);
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
}
};
@@ -100,17 +89,14 @@ template <typename Torus> struct int_decompression {
1);
tmp_extracted_glwe = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>((size_t)num_blocks_to_decompress,
glwe_accumulator_size),
num_blocks_to_decompress * glwe_accumulator_size * sizeof(Torus),
streams.stream(0), streams.gpu_index(0), size_tracker,
allocate_gpu_memory);
tmp_indexes_array = (uint32_t *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<uint32_t>((size_t)num_blocks_to_decompress),
streams.stream(0), streams.gpu_index(0), size_tracker,
allocate_gpu_memory);
num_blocks_to_decompress * sizeof(uint32_t), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
tmp_extracted_lwe = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>((size_t)num_blocks_to_decompress,
lwe_accumulator_size),
num_blocks_to_decompress * lwe_accumulator_size * sizeof(Torus),
streams.stream(0), streams.gpu_index(0), size_tracker,
allocate_gpu_memory);
@@ -136,24 +122,20 @@ template <typename Torus> struct int_decompression {
active_streams, {0}, {decompression_rescale_f},
effective_compression_message_modulus,
effective_compression_carry_modulus,
encryption_params.message_modulus, encryption_params.carry_modulus);
encryption_params.message_modulus, encryption_params.carry_modulus,
gpu_memory_allocated);
}
}
void release(CudaStreams streams) {
cuda_drop_with_size_tracking_async(tmp_extracted_glwe, streams.stream(0),
streams.gpu_index(0),
gpu_memory_allocated);
tmp_extracted_glwe = nullptr;
cuda_drop_with_size_tracking_async(tmp_extracted_lwe, streams.stream(0),
streams.gpu_index(0),
gpu_memory_allocated);
tmp_extracted_lwe = nullptr;
cuda_drop_with_size_tracking_async(tmp_indexes_array, streams.stream(0),
streams.gpu_index(0),
gpu_memory_allocated);
tmp_indexes_array = nullptr;
if constexpr (std::is_same_v<Torus, uint64_t>) {
decompression_rescale_lut->release(streams);
delete decompression_rescale_lut;

View File

@@ -285,7 +285,7 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
for (int j = 0; j < 2; j++) {
luts[j]->generate_and_broadcast_lut(streams.get_ith(lut_gpu_indexes[j]),
{0}, {zero_out_if_not_1_lut_f},
LUT_0_FOR_ALL_BLOCKS);
gpu_memory_allocated);
}
luts[0] = zero_out_if_not_2_lut_1;
@@ -295,7 +295,7 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
for (int j = 0; j < 2; j++) {
luts[j]->generate_and_broadcast_lut(streams.get_ith(lut_gpu_indexes[j]),
{0}, {zero_out_if_not_2_lut_f},
LUT_0_FOR_ALL_BLOCKS);
gpu_memory_allocated);
}
quotient_lut_1 =
@@ -316,11 +316,11 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
auto quotient_lut_3_f = [](Torus cond) -> Torus { return cond * 3; };
quotient_lut_1->generate_and_broadcast_lut(
streams.get_ith(2), {0}, {quotient_lut_1_f}, LUT_0_FOR_ALL_BLOCKS);
streams.get_ith(2), {0}, {quotient_lut_1_f}, gpu_memory_allocated);
quotient_lut_2->generate_and_broadcast_lut(
streams.get_ith(1), {0}, {quotient_lut_2_f}, LUT_0_FOR_ALL_BLOCKS);
streams.get_ith(1), {0}, {quotient_lut_2_f}, gpu_memory_allocated);
quotient_lut_3->generate_and_broadcast_lut(
streams.get_ith(0), {0}, {quotient_lut_3_f}, LUT_0_FOR_ALL_BLOCKS);
streams.get_ith(0), {0}, {quotient_lut_3_f}, gpu_memory_allocated);
message_extract_lut_1 = new int_radix_lut<Torus>(
streams, params, 1, num_blocks, allocate_gpu_memory, size_tracker);
@@ -340,7 +340,7 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
for (int j = 0; j < 2; j++) {
luts[j]->generate_and_broadcast_lut(
active_streams, {0}, {lut_f_message_extract}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {lut_f_message_extract}, gpu_memory_allocated);
}
}
@@ -433,31 +433,30 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
Torus ***second_indexes_ptr, Torus ***scalars_ptr, uint32_t num_blocks,
bool allocate_gpu_memory, uint64_t &size_tracker) {
auto first_indexes = (Torus **)malloc(safe_mul_sizeof<Torus *>(num_blocks));
auto second_indexes =
(Torus **)malloc(safe_mul_sizeof<Torus *>(num_blocks));
auto scalars = (Torus **)malloc(safe_mul_sizeof<Torus *>(num_blocks));
auto first_indexes = (Torus **)malloc(num_blocks * sizeof(Torus *));
auto second_indexes = (Torus **)malloc(num_blocks * sizeof(Torus *));
auto scalars = (Torus **)malloc(num_blocks * sizeof(Torus *));
for (int nb = 1; nb <= num_blocks; nb++) {
first_indexes[nb - 1] = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(nb), stream, gpu_index, size_tracker,
nb * sizeof(Torus), stream, gpu_index, size_tracker,
allocate_gpu_memory);
second_indexes[nb - 1] = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(nb), stream, gpu_index, size_tracker,
nb * sizeof(Torus), stream, gpu_index, size_tracker,
allocate_gpu_memory);
scalars[nb - 1] = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(nb), stream, gpu_index, size_tracker,
nb * sizeof(Torus), stream, gpu_index, size_tracker,
allocate_gpu_memory);
cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
first_indexes[nb - 1], first_indexes_for_overflow_sub_gpu_0[nb - 1],
safe_mul_sizeof<Torus>(nb), stream, gpu_index, allocate_gpu_memory);
nb * sizeof(Torus), stream, gpu_index, allocate_gpu_memory);
cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
second_indexes[nb - 1], second_indexes_for_overflow_sub_gpu_0[nb - 1],
safe_mul_sizeof<Torus>(nb), stream, gpu_index, allocate_gpu_memory);
nb * sizeof(Torus), stream, gpu_index, allocate_gpu_memory);
cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
scalars[nb - 1], scalars_for_overflow_sub_gpu_0[nb - 1],
safe_mul_sizeof<Torus>(nb), stream, gpu_index, allocate_gpu_memory);
nb * sizeof(Torus), stream, gpu_index, allocate_gpu_memory);
*first_indexes_ptr = first_indexes;
*second_indexes_ptr = second_indexes;
*scalars_ptr = scalars;
@@ -471,91 +470,72 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
max_indexes_to_erase = num_blocks;
first_indexes_for_overflow_sub_gpu_0 =
(Torus **)malloc(safe_mul_sizeof<Torus *>(num_blocks));
(Torus **)malloc(num_blocks * sizeof(Torus *));
second_indexes_for_overflow_sub_gpu_0 =
(Torus **)malloc(safe_mul_sizeof<Torus *>(num_blocks));
(Torus **)malloc(num_blocks * sizeof(Torus *));
scalars_for_overflow_sub_gpu_0 =
(Torus **)malloc(safe_mul_sizeof<Torus *>(num_blocks));
(Torus **)malloc(num_blocks * sizeof(Torus *));
Torus *h_lut_indexes = (Torus *)malloc(safe_mul_sizeof<Torus>(num_blocks));
Torus *h_scalar = (Torus *)malloc(safe_mul_sizeof<Torus>(num_blocks));
Torus *h_lut_indexes = (Torus *)malloc(num_blocks * sizeof(Torus));
Torus *h_scalar = (Torus *)malloc(num_blocks * sizeof(Torus));
// Extra indexes for the luts in first step
for (int nb = 1; nb <= num_blocks; nb++) {
first_indexes_for_overflow_sub_gpu_0[nb - 1] =
(Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(nb), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
auto index_generator = [nb, group_size](Torus *h_lut_indexes, uint32_t) {
for (int index = 0; index < nb; index++) {
uint32_t grouping_index = index / group_size;
bool is_in_first_grouping = (grouping_index == 0);
uint32_t index_in_grouping = index % group_size;
bool is_last_index = (index == (nb - 1));
if (is_last_index) {
if (nb == 1) {
h_lut_indexes[index] = 2 * group_size;
} else {
h_lut_indexes[index] = 2;
}
} else if (is_in_first_grouping) {
h_lut_indexes[index] = index_in_grouping;
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
size_tracker, allocate_gpu_memory);
for (int index = 0; index < nb; index++) {
uint32_t grouping_index = index / group_size;
bool is_in_first_grouping = (grouping_index == 0);
uint32_t index_in_grouping = index % group_size;
bool is_last_index = (index == (nb - 1));
if (is_last_index) {
if (nb == 1) {
h_lut_indexes[index] = 2 * group_size;
} else {
h_lut_indexes[index] = index_in_grouping + group_size;
h_lut_indexes[index] = 2;
}
} else if (is_in_first_grouping) {
h_lut_indexes[index] = index_in_grouping;
} else {
h_lut_indexes[index] = index_in_grouping + group_size;
}
};
generate_lut_indexes<Torus>(streams, index_generator,
first_indexes_for_overflow_sub_gpu_0[nb - 1],
nb, 2 * group_size + 1, h_lut_indexes,
allocate_gpu_memory);
}
cuda_memcpy_with_size_tracking_async_to_gpu(
first_indexes_for_overflow_sub_gpu_0[nb - 1], h_lut_indexes,
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
allocate_gpu_memory);
}
// Extra indexes for the luts in second step
uint32_t num_extra_luts = use_seq ? (group_size - 1) : 1;
uint32_t num_luts_second_step = 2 * group_size + num_extra_luts;
for (int nb = 1; nb <= num_blocks; nb++) {
second_indexes_for_overflow_sub_gpu_0[nb - 1] =
(Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(nb), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
size_tracker, allocate_gpu_memory);
scalars_for_overflow_sub_gpu_0[nb - 1] =
(Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(nb), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
auto index_generator = [nb, group_size, use_seq](Torus *h_lut_indexes,
uint32_t) {
for (int index = 0; index < nb; index++) {
uint32_t grouping_index = index / group_size;
bool is_in_first_grouping = (grouping_index == 0);
uint32_t index_in_grouping = index % group_size;
if (is_in_first_grouping) {
h_lut_indexes[index] = index_in_grouping;
} else if (index_in_grouping == (group_size - 1)) {
if (use_seq) {
int inner_index = (grouping_index - 1) % (group_size - 1);
h_lut_indexes[index] = inner_index + 2 * group_size;
} else {
h_lut_indexes[index] = 2 * group_size;
}
} else {
h_lut_indexes[index] = index_in_grouping + group_size;
}
}
};
generate_lut_indexes<Torus>(streams, index_generator,
second_indexes_for_overflow_sub_gpu_0[nb - 1],
nb, num_luts_second_step, h_lut_indexes,
allocate_gpu_memory);
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
size_tracker, allocate_gpu_memory);
for (int index = 0; index < nb; index++) {
uint32_t grouping_index = index / group_size;
bool is_in_first_grouping = (grouping_index == 0);
uint32_t index_in_grouping = index % group_size;
if (is_in_first_grouping) {
h_lut_indexes[index] = index_in_grouping;
} else if (index_in_grouping == (group_size - 1)) {
if (use_seq) {
int inner_index = (grouping_index - 1) % (group_size - 1);
h_lut_indexes[index] = inner_index + 2 * group_size;
} else {
h_lut_indexes[index] = 2 * group_size;
}
} else {
h_lut_indexes[index] = index_in_grouping + group_size;
}
bool may_have_its_padding_bit_set =
!is_in_first_grouping && (index_in_grouping == group_size - 1);
@@ -570,9 +550,12 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
}
}
cuda_memcpy_with_size_tracking_async_to_gpu(
scalars_for_overflow_sub_gpu_0[nb - 1], h_scalar,
safe_mul_sizeof<Torus>(nb), streams.stream(0), streams.gpu_index(0),
second_indexes_for_overflow_sub_gpu_0[nb - 1], h_lut_indexes,
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
allocate_gpu_memory);
cuda_memcpy_with_size_tracking_async_to_gpu(
scalars_for_overflow_sub_gpu_0[nb - 1], h_scalar, nb * sizeof(Torus),
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
}
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
free(h_lut_indexes);
@@ -1008,12 +991,12 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
auto active_streams_1 = streams.active_gpu_subset(1, params.pbs_type);
masking_luts_1[i]->generate_and_broadcast_lut(
active_streams_1, {0}, {lut_f_masking}, LUT_0_FOR_ALL_BLOCKS);
active_streams_1, {0}, {lut_f_masking}, gpu_memory_allocated);
auto active_streams_2 =
streams.active_gpu_subset(num_blocks, params.pbs_type);
masking_luts_2[i]->generate_and_broadcast_lut(
active_streams_2, {0}, {lut_f_masking}, LUT_0_FOR_ALL_BLOCKS);
active_streams_2, {0}, {lut_f_masking}, gpu_memory_allocated);
}
// create and generate message_extract_lut_1 and message_extract_lut_2
@@ -1036,7 +1019,7 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
streams.active_gpu_subset(num_blocks, params.pbs_type);
for (int j = 0; j < 2; j++) {
luts[j]->generate_and_broadcast_lut(
active_streams, {0}, {lut_f_message_extract}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {lut_f_message_extract}, gpu_memory_allocated);
}
// Give name to closures to improve readability
@@ -1064,11 +1047,11 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
zero_out_if_overflow_did_not_happen[0]
->generate_and_broadcast_bivariate_lut(active_streams, {0}, {cur_lut_f},
LUT_0_FOR_ALL_BLOCKS, {},
gpu_memory_allocated, {},
params.message_modulus - 2);
zero_out_if_overflow_did_not_happen[1]
->generate_and_broadcast_bivariate_lut(active_streams, {0}, {cur_lut_f},
LUT_0_FOR_ALL_BLOCKS, {},
gpu_memory_allocated, {},
params.message_modulus - 1);
// create and generate zero_out_if_overflow_happened
@@ -1087,10 +1070,10 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
};
zero_out_if_overflow_happened[0]->generate_and_broadcast_bivariate_lut(
active_streams, {0}, {overflow_happened_f}, LUT_0_FOR_ALL_BLOCKS, {},
active_streams, {0}, {overflow_happened_f}, gpu_memory_allocated, {},
params.message_modulus - 2);
zero_out_if_overflow_happened[1]->generate_and_broadcast_bivariate_lut(
active_streams, {0}, {overflow_happened_f}, LUT_0_FOR_ALL_BLOCKS, {},
active_streams, {0}, {overflow_happened_f}, gpu_memory_allocated, {},
params.message_modulus - 1);
// merge_overflow_flags_luts
@@ -1106,7 +1089,7 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
streams, params, 1, 1, allocate_gpu_memory, size_tracker);
merge_overflow_flags_luts[i]->generate_and_broadcast_bivariate_lut(
active_gpu_count_for_bits, {0}, {lut_f_bit}, LUT_0_FOR_ALL_BLOCKS);
active_gpu_count_for_bits, {0}, {lut_f_bit}, gpu_memory_allocated);
}
}
@@ -1164,89 +1147,71 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
max_indexes_to_erase = num_blocks;
first_indexes_for_overflow_sub =
(Torus **)malloc(safe_mul_sizeof<Torus *>(num_blocks));
(Torus **)malloc(num_blocks * sizeof(Torus *));
second_indexes_for_overflow_sub =
(Torus **)malloc(safe_mul_sizeof<Torus *>(num_blocks));
scalars_for_overflow_sub =
(Torus **)malloc(safe_mul_sizeof<Torus *>(num_blocks));
(Torus **)malloc(num_blocks * sizeof(Torus *));
scalars_for_overflow_sub = (Torus **)malloc(num_blocks * sizeof(Torus *));
Torus *h_lut_indexes = (Torus *)malloc(safe_mul_sizeof<Torus>(num_blocks));
Torus *h_scalar = (Torus *)malloc(safe_mul_sizeof<Torus>(num_blocks));
Torus *h_lut_indexes = (Torus *)malloc(num_blocks * sizeof(Torus));
Torus *h_scalar = (Torus *)malloc(num_blocks * sizeof(Torus));
// Extra indexes for the luts in first step
for (int nb = 1; nb <= num_blocks; nb++) {
first_indexes_for_overflow_sub[nb - 1] =
(Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(nb), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
auto index_generator = [nb, group_size](Torus *h_lut_indexes, uint32_t) {
for (int index = 0; index < nb; index++) {
uint32_t grouping_index = index / group_size;
bool is_in_first_grouping = (grouping_index == 0);
uint32_t index_in_grouping = index % group_size;
bool is_last_index = (index == (nb - 1));
if (is_last_index) {
if (nb == 1) {
h_lut_indexes[index] = 2 * group_size;
} else {
h_lut_indexes[index] = 2;
}
} else if (is_in_first_grouping) {
h_lut_indexes[index] = index_in_grouping;
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
size_tracker, allocate_gpu_memory);
for (int index = 0; index < nb; index++) {
uint32_t grouping_index = index / group_size;
bool is_in_first_grouping = (grouping_index == 0);
uint32_t index_in_grouping = index % group_size;
bool is_last_index = (index == (nb - 1));
if (is_last_index) {
if (nb == 1) {
h_lut_indexes[index] = 2 * group_size;
} else {
h_lut_indexes[index] = index_in_grouping + group_size;
h_lut_indexes[index] = 2;
}
} else if (is_in_first_grouping) {
h_lut_indexes[index] = index_in_grouping;
} else {
h_lut_indexes[index] = index_in_grouping + group_size;
}
};
generate_lut_indexes<Torus>(
streams, index_generator, first_indexes_for_overflow_sub[nb - 1], nb,
2 * group_size + 1, h_lut_indexes, allocate_gpu_memory);
}
cuda_memcpy_with_size_tracking_async_to_gpu(
first_indexes_for_overflow_sub[nb - 1], h_lut_indexes,
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
allocate_gpu_memory);
}
// Extra indexes for the luts in second step
uint32_t num_extra_luts = use_seq ? (group_size - 1) : 1;
uint32_t num_luts_second_step = 2 * group_size + num_extra_luts;
for (int nb = 1; nb <= num_blocks; nb++) {
second_indexes_for_overflow_sub[nb - 1] =
(Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(nb), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
size_tracker, allocate_gpu_memory);
scalars_for_overflow_sub[nb - 1] =
(Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(nb), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
auto index_generator = [nb, group_size, use_seq](Torus *h_lut_indexes,
uint32_t) {
for (int index = 0; index < nb; index++) {
uint32_t grouping_index = index / group_size;
bool is_in_first_grouping = (grouping_index == 0);
uint32_t index_in_grouping = index % group_size;
if (is_in_first_grouping) {
h_lut_indexes[index] = index_in_grouping;
} else if (index_in_grouping == (group_size - 1)) {
if (use_seq) {
int inner_index = (grouping_index - 1) % (group_size - 1);
h_lut_indexes[index] = inner_index + 2 * group_size;
} else {
h_lut_indexes[index] = 2 * group_size;
}
} else {
h_lut_indexes[index] = index_in_grouping + group_size;
}
}
};
generate_lut_indexes<Torus>(
streams, index_generator, second_indexes_for_overflow_sub[nb - 1], nb,
num_luts_second_step, h_lut_indexes, allocate_gpu_memory);
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
size_tracker, allocate_gpu_memory);
for (int index = 0; index < nb; index++) {
uint32_t grouping_index = index / group_size;
bool is_in_first_grouping = (grouping_index == 0);
uint32_t index_in_grouping = index % group_size;
if (is_in_first_grouping) {
h_lut_indexes[index] = index_in_grouping;
} else if (index_in_grouping == (group_size - 1)) {
if (use_seq) {
int inner_index = (grouping_index - 1) % (group_size - 1);
h_lut_indexes[index] = inner_index + 2 * group_size;
} else {
h_lut_indexes[index] = 2 * group_size;
}
} else {
h_lut_indexes[index] = index_in_grouping + group_size;
}
bool may_have_its_padding_bit_set =
!is_in_first_grouping && (index_in_grouping == group_size - 1);
@@ -1261,9 +1226,12 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
}
}
cuda_memcpy_with_size_tracking_async_to_gpu(
scalars_for_overflow_sub[nb - 1], h_scalar,
safe_mul_sizeof<Torus>(nb), streams.stream(0), streams.gpu_index(0),
second_indexes_for_overflow_sub[nb - 1], h_lut_indexes,
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
allocate_gpu_memory);
cuda_memcpy_with_size_tracking_async_to_gpu(
scalars_for_overflow_sub[nb - 1], h_scalar, nb * sizeof(Torus),
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
}
free(h_lut_indexes);
free(h_scalar);
@@ -1535,7 +1503,7 @@ template <typename Torus> struct int_div_rem_memory {
compare_signed_bits_lut->generate_and_broadcast_bivariate_lut(
active_gpu_count_cmp, {0}, {f_compare_extracted_signed_bits},
LUT_0_FOR_ALL_BLOCKS);
gpu_memory_allocated);
}
}

View File

@@ -54,7 +54,7 @@ template <typename Torus> struct int_prepare_count_of_consecutive_bits_buffer {
};
univ_lut_mem->generate_and_broadcast_lut(
active_streams, {0}, {generate_uni_lut_lambda}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {generate_uni_lut_lambda}, allocate_gpu_memory);
auto generate_bi_lut_lambda =
[num_bits](Torus block_num_bit_count,
@@ -66,7 +66,7 @@ template <typename Torus> struct int_prepare_count_of_consecutive_bits_buffer {
};
biv_lut_mem->generate_and_broadcast_bivariate_lut(
active_streams, {0}, {generate_bi_lut_lambda}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {generate_bi_lut_lambda}, allocate_gpu_memory);
this->tmp_ct = new CudaRadixCiphertextFFI;
create_zero_radix_ciphertext_async<Torus>(
@@ -234,7 +234,7 @@ template <typename Torus> struct int_ilog2_buffer {
auto active_streams =
streams.active_gpu_subset(counter_num_blocks, params.pbs_type);
lut_message_not->generate_and_broadcast_lut(
active_streams, {0}, {lut_message_lambda}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {lut_message_lambda}, allocate_gpu_memory);
this->lut_carry_not =
new int_radix_lut<Torus>(streams, params, 1, counter_num_blocks,
@@ -245,7 +245,7 @@ template <typename Torus> struct int_ilog2_buffer {
return (~carry) % this->params.message_modulus;
};
lut_carry_not->generate_and_broadcast_lut(
active_streams, {0}, {lut_carry_lambda}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {lut_carry_lambda}, allocate_gpu_memory);
this->message_blocks_not = new CudaRadixCiphertextFFI;
create_zero_radix_ciphertext_async<Torus>(

File diff suppressed because it is too large Load Diff

View File

@@ -42,7 +42,7 @@ template <typename Torus> struct int_mul_memory {
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
zero_out_predicate_lut->generate_and_broadcast_bivariate_lut(
active_streams, {0}, {zero_out_predicate_lut_f},
LUT_0_FOR_ALL_BLOCKS);
gpu_memory_allocated);
zero_out_mem = new int_zero_out_if_buffer<Torus>(
streams, params, num_radix_blocks, allocate_gpu_memory, size_tracker);
@@ -62,10 +62,6 @@ template <typename Torus> struct int_mul_memory {
int total_block_count = num_radix_blocks * num_radix_blocks;
GPU_ASSERT(lsb_vector_block_count + msb_vector_block_count ==
total_block_count,
"MSB and LSB vector block counts don't match");
// allocate memory for intermediate buffers
vector_result_sb = new CudaRadixCiphertextFFI;
create_zero_radix_ciphertext_async<Torus>(
@@ -100,16 +96,16 @@ template <typename Torus> struct int_mul_memory {
// first lsb_vector_block_count value should reference to lsb_acc
// last msb_vector_block_count values should reference to msb_acc
// for message and carry default lut_indexes_vec is fine
if (allocate_gpu_memory)
cuda_set_value_async<Torus>(
streams.stream(0), streams.gpu_index(0),
luts_array->get_lut_indexes(0, lsb_vector_block_count), 1,
msb_vector_block_count);
auto active_streams =
streams.active_gpu_subset(total_block_count, params.pbs_type);
auto lut_index_generator = [lsb_vector_block_count](Torus *h_lut_indexes,
uint32_t num_indexes) {
for (uint32_t i = 0; i < num_indexes; i++) {
h_lut_indexes[i] = (i < lsb_vector_block_count) ? 0 : 1;
}
};
luts_array->generate_and_broadcast_bivariate_lut(
active_streams, {0, 1}, {lut_f_lsb, lut_f_msb}, lut_index_generator);
active_streams, {0, 1}, {lut_f_lsb, lut_f_msb}, gpu_memory_allocated);
// create memory object for sum ciphertexts
sum_ciphertexts_mem = new int_sum_ciphertexts_vec_memory<Torus>(

View File

@@ -86,10 +86,9 @@ template <typename Torus> struct int_grouped_oprf_memory {
// (handling both bounded and unbounded cases), which pre-computed LUT to
// use, and the final plaintext correction to add.
//
Torus *h_corrections = (Torus *)calloc(
1, safe_mul_sizeof<Torus>(num_blocks_to_process, lwe_size));
this->h_lut_indexes =
(Torus *)calloc(1, safe_mul_sizeof<Torus>(num_blocks_to_process));
Torus *h_corrections =
(Torus *)calloc(num_blocks_to_process * lwe_size, sizeof(Torus));
this->h_lut_indexes = (Torus *)calloc(num_blocks_to_process, sizeof(Torus));
uint64_t bits_processed = 0;
for (uint32_t i = 0; i < num_blocks_to_process; ++i) {
@@ -105,6 +104,10 @@ template <typename Torus> struct int_grouped_oprf_memory {
Torus plaintext_to_add = (p - 1) * delta / 2;
h_corrections[i * lwe_size + params.big_lwe_dimension] = plaintext_to_add;
if (bits_for_this_block < 1) {
PANIC("bits_for_this_block should be greater than 1");
}
this->h_lut_indexes[i] = bits_for_this_block - 1;
bits_processed += bits_for_this_block;
}
@@ -116,35 +119,22 @@ template <typename Torus> struct int_grouped_oprf_memory {
// Copy the prepared plaintext corrections to the GPU.
cuda_memcpy_with_size_tracking_async_to_gpu(
this->plaintext_corrections->ptr, h_corrections,
safe_mul_sizeof<Torus>(num_blocks_to_process, lwe_size),
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
num_blocks_to_process * lwe_size * sizeof(Torus), streams.stream(0),
streams.gpu_index(0), allocate_gpu_memory);
// Copy the prepared LUT indexes to the GPU 0, before broadcast to all other
// GPUs.
cuda_memcpy_with_size_tracking_async_to_gpu(
luts->get_lut_indexes(0, 0), this->h_lut_indexes,
num_blocks_to_process * sizeof(Torus), streams.stream(0),
streams.gpu_index(0), allocate_gpu_memory);
auto active_streams =
streams.active_gpu_subset(num_blocks_to_process, params.pbs_type);
// No encoding for these LUTS. Generate LUT also sets LUT degrees to default
// values
auto luts_index_generator = [total_random_bits, message_bits_per_block](
Torus *h_lut_indexes, uint32_t num_blocks) {
uint64_t bits_processed = 0;
for (uint32_t i = 0; i < num_blocks; ++i) {
if (total_random_bits <= bits_processed) {
PANIC("total_random_bits should be greater than bits_processed");
}
uint64_t bits_remaining = total_random_bits - bits_processed;
uint32_t bits_for_this_block =
std::min((uint64_t)message_bits_per_block, bits_remaining);
if (bits_for_this_block < 1) {
PANIC("bits_for_this_block should be greater than 1");
}
h_lut_indexes[i] = bits_for_this_block - 1;
bits_processed += bits_for_this_block;
}
};
luts->generate_and_broadcast_lut(active_streams, lut_indices, lut_funcs,
luts_index_generator, false, {},
this->h_lut_indexes);
allocate_gpu_memory, false);
// OPRF requires custom LUT degrees
for (uint32_t i = 0; i < lut_degrees.size(); ++i) {

View File

@@ -3,16 +3,17 @@
#include "integer.h"
extern "C" {
uint64_t scratch_cuda_rerand_64_async(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, bool allocate_gpu_memory);
uint64_t
scratch_cuda_rerand_64(CudaStreamsFFI streams, int8_t **mem_ptr,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension,
uint32_t ks_level, uint32_t ks_base_log,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, bool allocate_gpu_memory);
void cuda_rerand_64_async(
void cuda_rerand_64(
CudaStreamsFFI streams, void *lwe_array,
const void *lwe_flattened_encryptions_of_zero_compact_array_in,
int8_t *mem_ptr, void *const *ksk);
void cleanup_cuda_rerand_64(CudaStreamsFFI streams, int8_t **mem_ptr_void);
void cleanup_cuda_rerand(CudaStreamsFFI streams, int8_t **mem_ptr_void);
}

View File

@@ -1,6 +1,5 @@
#pragma once
#include "checked_arithmetic.h"
#include "integer_utilities.h"
#include "keyswitch/ks_enums.h"
#include "zk/expand.cuh"
@@ -30,34 +29,34 @@ template <typename Torus> struct int_rerand_mem {
gpu_memory_allocated(allocate_gpu_memory) {
tmp_zero_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(num_lwes, params.big_lwe_dimension + 1),
num_lwes * (params.big_lwe_dimension + 1) * sizeof(Torus),
streams.stream(0), streams.gpu_index(0), size_tracker,
allocate_gpu_memory);
tmp_ksed_zero_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(num_lwes, params.small_lwe_dimension + 1),
num_lwes * (params.small_lwe_dimension + 1) * sizeof(Torus),
streams.stream(0), streams.gpu_index(0), size_tracker,
allocate_gpu_memory);
d_expand_jobs =
static_cast<expand_job<Torus> *>(cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<expand_job<Torus>>(num_lwes), streams.stream(0),
num_lwes * sizeof(expand_job<Torus>), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory));
h_expand_jobs = static_cast<expand_job<Torus> *>(
malloc(safe_mul_sizeof<expand_job<Torus>>(num_lwes)));
malloc(num_lwes * sizeof(expand_job<Torus>)));
auto h_lwe_trivial_indexes =
static_cast<Torus *>(malloc(safe_mul_sizeof<Torus>(num_lwes)));
static_cast<Torus *>(malloc(num_lwes * sizeof(Torus)));
for (auto i = 0; i < num_lwes; ++i) {
h_lwe_trivial_indexes[i] = i;
}
lwe_trivial_indexes = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(num_lwes), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
num_lwes * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
size_tracker, allocate_gpu_memory);
cuda_memcpy_async_to_gpu(lwe_trivial_indexes, h_lwe_trivial_indexes,
safe_mul_sizeof<Torus>(num_lwes),
streams.stream(0), streams.gpu_index(0));
num_lwes * sizeof(Torus), streams.stream(0),
streams.gpu_index(0));
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
@@ -68,19 +67,15 @@ template <typename Torus> struct int_rerand_mem {
cuda_drop_with_size_tracking_async(tmp_zero_lwes, streams.stream(0),
streams.gpu_index(0),
gpu_memory_allocated);
tmp_zero_lwes = nullptr;
cuda_drop_with_size_tracking_async(tmp_ksed_zero_lwes, streams.stream(0),
streams.gpu_index(0),
gpu_memory_allocated);
tmp_ksed_zero_lwes = nullptr;
cuda_drop_with_size_tracking_async(lwe_trivial_indexes, streams.stream(0),
streams.gpu_index(0),
gpu_memory_allocated);
lwe_trivial_indexes = nullptr;
cuda_drop_with_size_tracking_async(d_expand_jobs, streams.stream(0),
streams.gpu_index(0),
gpu_memory_allocated);
d_expand_jobs = nullptr;
for (auto i = 0; i < ks_tmp_buf_vec.size(); i++) {
cleanup_cuda_keyswitch(streams.stream(i), streams.gpu_index(i),
@@ -90,6 +85,5 @@ template <typename Torus> struct int_rerand_mem {
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
free(h_expand_jobs);
h_expand_jobs = nullptr;
}
};

View File

@@ -89,7 +89,7 @@ template <typename Torus> struct int_logical_scalar_shift_buffer {
auto active_streams =
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
cur_lut_bivariate->generate_and_broadcast_bivariate_lut(
active_streams, {0}, {shift_lut_f}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {shift_lut_f}, gpu_memory_allocated);
lut_buffers_bivariate.push_back(cur_lut_bivariate);
}
@@ -171,7 +171,7 @@ template <typename Torus> struct int_logical_scalar_shift_buffer {
auto active_streams =
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
cur_lut_bivariate->generate_and_broadcast_bivariate_lut(
active_streams, {0}, {shift_lut_f}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {shift_lut_f}, gpu_memory_allocated);
lut_buffers_bivariate.push_back(cur_lut_bivariate);
}
}
@@ -265,7 +265,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
streams.active_gpu_subset(1, params.pbs_type);
shift_last_block_lut_univariate->generate_and_broadcast_lut(
active_streams_shift_last, {0}, {last_block_lut_f},
LUT_0_FOR_ALL_BLOCKS);
gpu_memory_allocated);
lut_buffers_univariate.push_back(shift_last_block_lut_univariate);
}
@@ -284,7 +284,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
};
padding_block_lut_univariate->generate_and_broadcast_lut(
active_streams, {0}, {padding_block_lut_f}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {padding_block_lut_f}, gpu_memory_allocated);
lut_buffers_univariate.push_back(padding_block_lut_univariate);
@@ -321,7 +321,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
shift_blocks_lut_bivariate->generate_and_broadcast_bivariate_lut(
active_streams_shift_blocks, {0}, {blocks_lut_f},
LUT_0_FOR_ALL_BLOCKS);
gpu_memory_allocated);
lut_buffers_bivariate.push_back(shift_blocks_lut_bivariate);
}

View File

@@ -117,7 +117,7 @@ template <typename Torus> struct int_shift_and_rotate_buffer {
bits_per_block * num_radix_blocks, params.pbs_type);
mux_lut->generate_and_broadcast_lut(active_gpu_count_mux, {0}, {mux_lut_f},
LUT_0_FOR_ALL_BLOCKS);
gpu_memory_allocated);
auto cleaning_lut_f = [params](Torus x) -> Torus {
return x % params.message_modulus;
@@ -126,7 +126,7 @@ template <typename Torus> struct int_shift_and_rotate_buffer {
auto active_gpu_count_cleaning =
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
cleaning_lut->generate_and_broadcast_lut(
active_gpu_count_cleaning, {0}, {cleaning_lut_f}, LUT_0_FOR_ALL_BLOCKS);
active_gpu_count_cleaning, {0}, {cleaning_lut_f}, gpu_memory_allocated);
}
void release(CudaStreams streams) {

View File

@@ -22,22 +22,21 @@ template <typename Torus> struct int_overflowing_sub_memory {
auto message_modulus = params.message_modulus;
auto carry_modulus = params.carry_modulus;
auto big_lwe_size = (polynomial_size * glwe_dimension + 1);
auto total_size_bytes =
safe_mul_sizeof<Torus>((size_t)num_radix_blocks, big_lwe_size);
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
// allocate memory for intermediate calculations
generates_or_propagates = (Torus *)cuda_malloc_with_size_tracking_async(
total_size_bytes, streams.stream(0), streams.gpu_index(0), size_tracker,
allocate_gpu_memory);
num_radix_blocks * big_lwe_size_bytes, streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
step_output = (Torus *)cuda_malloc_with_size_tracking_async(
total_size_bytes, streams.stream(0), streams.gpu_index(0), size_tracker,
allocate_gpu_memory);
num_radix_blocks * big_lwe_size_bytes, streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
cuda_memset_with_size_tracking_async(
generates_or_propagates, 0, total_size_bytes, streams.stream(0),
streams.gpu_index(0), allocate_gpu_memory);
generates_or_propagates, 0, num_radix_blocks * big_lwe_size_bytes,
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
cuda_memset_with_size_tracking_async(
step_output, 0, total_size_bytes, streams.stream(0),
streams.gpu_index(0), allocate_gpu_memory);
step_output, 0, num_radix_blocks * big_lwe_size_bytes,
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
// declare functions for lut generation
auto f_lut_does_block_generate_carry = [message_modulus](Torus x) -> Torus {
@@ -75,27 +74,26 @@ template <typename Torus> struct int_overflowing_sub_memory {
luts_array, size_tracker,
allocate_gpu_memory, size_tracker);
if (allocate_gpu_memory)
cuda_set_value_async<Torus>(streams.stream(0), streams.gpu_index(0),
luts_array->get_lut_indexes(0, 1), 1,
num_radix_blocks - 1);
auto active_streams =
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
luts_borrow_propagation_sum->generate_and_broadcast_bivariate_lut(
active_streams, {0}, {f_luts_borrow_propagation_sum},
LUT_0_FOR_ALL_BLOCKS);
gpu_memory_allocated);
auto luts_array_index_generator = [](Torus *h_lut_indexes,
uint32_t num_indexes) {
for (uint32_t i = 0; i < num_indexes; i++) {
h_lut_indexes[i] = (i == 0) ? 0 : 1;
}
};
luts_array->generate_and_broadcast_lut(
active_streams, {0, 1},
{f_lut_does_block_generate_carry,
f_lut_does_block_generate_or_propagate},
luts_array_index_generator);
gpu_memory_allocated);
// generate luts (aka accumulators)
message_acc->generate_and_broadcast_lut(
active_streams, {0}, {f_message_acc}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {f_message_acc}, gpu_memory_allocated);
}
void release(CudaStreams streams) {

View File

@@ -62,7 +62,7 @@ template <typename Torus> struct int_equality_selectors_buffer {
}
this->comparison_luts->generate_and_broadcast_many_lut(
active_streams, {0}, {fns}, LUT_0_FOR_ALL_BLOCKS);
active_streams, {0}, {fns}, allocate_gpu_memory);
fns.clear();
this->tmp_many_luts_output = new CudaRadixCiphertextFFI;
@@ -196,7 +196,7 @@ template <typename Torus> struct int_possible_results_buffer {
current_lut->generate_and_broadcast_many_lut(
streams.active_gpu_subset(1, params.pbs_type), {0}, {fns},
LUT_0_FOR_ALL_BLOCKS);
allocate_gpu_memory);
stream_luts[lut_count++] = current_lut;
lut_value_start += luts_in_this_call;
@@ -287,7 +287,7 @@ template <typename Torus> struct int_aggregate_one_hot_buffer {
lut->generate_and_broadcast_lut(
streams.active_gpu_subset(num_blocks, params.pbs_type), {0}, {id_fn},
LUT_0_FOR_ALL_BLOCKS);
allocate_gpu_memory);
this->stream_identity_luts[i] = lut;
}
@@ -304,14 +304,14 @@ template <typename Torus> struct int_aggregate_one_hot_buffer {
this->message_extract_lut->generate_and_broadcast_lut(
streams.active_gpu_subset(num_blocks, params.pbs_type), {0}, {msg_fn},
LUT_0_FOR_ALL_BLOCKS);
allocate_gpu_memory);
this->carry_extract_lut = new int_radix_lut<Torus>(
streams, params, 1, num_blocks, allocate_gpu_memory, size_tracker);
this->carry_extract_lut->generate_and_broadcast_lut(
streams.active_gpu_subset(num_blocks, params.pbs_type), {0}, {carry_fn},
LUT_0_FOR_ALL_BLOCKS);
allocate_gpu_memory);
this->partial_aggregated_vectors =
new CudaRadixCiphertextFFI *[num_streams];
@@ -534,7 +534,7 @@ template <typename Torus> struct int_unchecked_match_value_or_buffer {
this->tmp_or_value = new CudaRadixCiphertextFFI;
this->d_or_value = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(num_final_blocks), streams.stream(0),
num_final_blocks * sizeof(Torus), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
create_zero_radix_ciphertext_async<Torus>(
@@ -712,8 +712,8 @@ template <typename Torus> struct int_unchecked_contains_clear_buffer {
allocate_gpu_memory);
this->d_clear_val = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(num_blocks), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
num_blocks * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
size_tracker, allocate_gpu_memory);
}
void release(CudaStreams streams) {
@@ -866,7 +866,7 @@ template <typename Torus> struct int_final_index_from_selectors_buffer {
uint32_t num_bits_in_message = log2_int(params.message_modulus);
uint32_t bits_per_packed_block = 2 * num_bits_in_message;
h_indices = new uint64_t[safe_mul((size_t)num_inputs, (size_t)packed_len)];
h_indices = new uint64_t[num_inputs * packed_len];
for (uint32_t i = 0; i < num_inputs; i++) {
uint64_t val = i;
for (uint32_t b = 0; b < packed_len; b++) {
@@ -1128,16 +1128,15 @@ template <typename Torus> struct int_unchecked_first_index_of_clear_buffer {
allocate_gpu_memory);
this->d_clear_val = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(num_blocks), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
num_blocks * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
size_tracker, allocate_gpu_memory);
h_indices = nullptr;
if (allocate_gpu_memory) {
uint32_t num_bits_in_message = log2_int(params.message_modulus);
uint32_t bits_per_packed_block = 2 * num_bits_in_message;
h_indices =
new uint64_t[safe_mul((size_t)num_inputs, (size_t)packed_len)];
h_indices = new uint64_t[num_inputs * packed_len];
for (uint32_t i = 0; i < num_inputs; i++) {
uint64_t val = i;
for (uint32_t b = 0; b < packed_len; b++) {
@@ -1157,11 +1156,11 @@ template <typename Torus> struct int_unchecked_first_index_of_clear_buffer {
return current;
};
this->prefix_sum_lut = new int_radix_lut<Torus>(
streams, params, 1, num_inputs, allocate_gpu_memory, size_tracker);
streams, params, 2, num_inputs, allocate_gpu_memory, size_tracker);
this->prefix_sum_lut->generate_and_broadcast_bivariate_lut(
streams.active_gpu_subset(num_inputs, params.pbs_type), {0},
{prefix_sum_fn}, LUT_0_FOR_ALL_BLOCKS);
{prefix_sum_fn}, allocate_gpu_memory);
auto cleanup_fn = [ALREADY_SEEN, params](Torus x) -> Torus {
Torus val = x % params.message_modulus;
@@ -1173,7 +1172,7 @@ template <typename Torus> struct int_unchecked_first_index_of_clear_buffer {
streams, params, 1, num_inputs, allocate_gpu_memory, size_tracker);
this->cleanup_lut->generate_and_broadcast_lut(
streams.active_gpu_subset(num_inputs, params.pbs_type), {0},
{cleanup_fn}, LUT_0_FOR_ALL_BLOCKS);
{cleanup_fn}, allocate_gpu_memory);
}
void release(CudaStreams streams) {
@@ -1317,8 +1316,7 @@ template <typename Torus> struct int_unchecked_first_index_of_buffer {
uint32_t num_bits_in_message = log2_int(params.message_modulus);
uint32_t bits_per_packed_block = 2 * num_bits_in_message;
h_indices =
new uint64_t[safe_mul((size_t)num_inputs, (size_t)packed_len)];
h_indices = new uint64_t[num_inputs * packed_len];
for (uint32_t i = 0; i < num_inputs; i++) {
uint64_t val = i;
for (uint32_t b = 0; b < packed_len; b++) {
@@ -1338,11 +1336,11 @@ template <typename Torus> struct int_unchecked_first_index_of_buffer {
return current;
};
this->prefix_sum_lut = new int_radix_lut<Torus>(
streams, params, 1, num_inputs, allocate_gpu_memory, size_tracker);
streams, params, 2, num_inputs, allocate_gpu_memory, size_tracker);
this->prefix_sum_lut->generate_and_broadcast_bivariate_lut(
streams.active_gpu_subset(num_inputs, params.pbs_type), {0},
{prefix_sum_fn}, LUT_0_FOR_ALL_BLOCKS);
{prefix_sum_fn}, allocate_gpu_memory);
auto cleanup_fn = [ALREADY_SEEN, params](Torus x) -> Torus {
Torus val = x % params.message_modulus;
@@ -1354,7 +1352,7 @@ template <typename Torus> struct int_unchecked_first_index_of_buffer {
streams, params, 1, num_inputs, allocate_gpu_memory, size_tracker);
this->cleanup_lut->generate_and_broadcast_lut(
streams.active_gpu_subset(num_inputs, params.pbs_type), {0},
{cleanup_fn}, LUT_0_FOR_ALL_BLOCKS);
{cleanup_fn}, allocate_gpu_memory);
}
void release(CudaStreams streams) {

View File

@@ -5,83 +5,73 @@
extern "C" {
void cuda_keyswitch_lwe_ciphertext_vector_64_64_async(
void cuda_keyswitch_lwe_ciphertext_vector_64_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lwe_array_in,
void const *lwe_input_indexes, void const *ksk, uint32_t lwe_dimension_in,
uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
uint32_t num_samples);
void cuda_keyswitch_lwe_ciphertext_vector_64_32_async(
void cuda_keyswitch_lwe_ciphertext_vector_64_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lwe_array_in,
void const *lwe_input_indexes, void const *ksk, uint32_t lwe_dimension_in,
uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
uint32_t num_samples);
uint64_t scratch_cuda_packing_keyswitch_lwe_list_to_glwe_64_async(
uint64_t scratch_packing_keyswitch_lwe_list_to_glwe_64(
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t num_lwes, bool allocate_gpu_memory);
void cuda_keyswitch_gemm_64_64_async(
void cuda_keyswitch_gemm_lwe_ciphertext_vector_64_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lwe_array_in,
void const *lwe_input_indexes, void const *ksk, uint32_t lwe_dimension_in,
uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
uint32_t num_samples, const void *ks_tmp_buffer, bool uses_trivial_indexes);
void cuda_keyswitch_gemm_64_32_async(
void cuda_keyswitch_gemm_lwe_ciphertext_vector_64_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lwe_array_in,
void const *lwe_input_indexes, void const *ksk, uint32_t lwe_dimension_in,
uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
uint32_t num_samples, const void *ks_tmp_buffer, bool uses_trivial_indexes);
uint64_t scratch_cuda_keyswitch_gemm_64_64_async(
void *stream, uint32_t gpu_index, void **ks_tmp_memory,
uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t num_lwes,
bool allocate_gpu_memory);
uint64_t scratch_cuda_keyswitch_gemm_64(void *stream, uint32_t gpu_index,
void **ks_tmp_memory,
uint32_t lwe_dimension_in,
uint32_t lwe_dimension_out,
uint32_t num_lwes,
bool allocate_gpu_memory);
uint64_t scratch_cuda_keyswitch_gemm_64_32_async(
void *stream, uint32_t gpu_index, void **ks_tmp_memory,
uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t num_lwes,
bool allocate_gpu_memory);
void cleanup_cuda_keyswitch_gemm_64(void *stream, uint32_t gpu_index,
void **ks_tmp_memory,
bool allocate_gpu_memory);
void cleanup_cuda_keyswitch_gemm_64_64(void *stream, uint32_t gpu_index,
void **ks_tmp_memory,
bool allocate_gpu_memory);
void cleanup_cuda_keyswitch_gemm_64_32(void *stream, uint32_t gpu_index,
void **ks_tmp_memory,
bool allocate_gpu_memory);
void cuda_packing_keyswitch_lwe_list_to_glwe_64_async(
void cuda_packing_keyswitch_lwe_list_to_glwe_64(
void *stream, uint32_t gpu_index, void *glwe_array_out,
void const *lwe_array_in, void const *fp_ksk_array, int8_t *fp_ks_buffer,
uint32_t input_lwe_dimension, uint32_t output_glwe_dimension,
uint32_t output_polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_lwes);
void scratch_cuda_packing_keyswitch_lwe_list_to_glwe_128_async(
void scratch_packing_keyswitch_lwe_list_to_glwe_128(
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t num_lwes, bool allocate_gpu_memory);
void cuda_packing_keyswitch_lwe_list_to_glwe_128_async(
void cuda_packing_keyswitch_lwe_list_to_glwe_128(
void *stream, uint32_t gpu_index, void *glwe_array_out,
void const *lwe_array_in, void const *fp_ksk_array, int8_t *fp_ks_buffer,
uint32_t input_lwe_dimension, uint32_t output_glwe_dimension,
uint32_t output_polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_lwes);
void cleanup_cuda_packing_keyswitch_lwe_list_to_glwe_64(
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
bool gpu_memory_allocated);
void cleanup_cuda_packing_keyswitch_lwe_list_to_glwe_128(
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
bool gpu_memory_allocated);
void cleanup_packing_keyswitch_lwe_list_to_glwe(void *stream,
uint32_t gpu_index,
int8_t **fp_ks_buffer,
bool gpu_memory_allocated);
void cuda_closest_representable_64_async(void *stream, uint32_t gpu_index,
void const *input, void *output,

View File

@@ -4,7 +4,7 @@
#include "../integer/integer.h"
extern "C" {
uint64_t scratch_cuda_kreyvium_generate_keystream_64_async(
uint64_t scratch_cuda_kreyvium_64(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
@@ -12,14 +12,13 @@ uint64_t scratch_cuda_kreyvium_generate_keystream_64_async(
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_inputs);
void cuda_kreyvium_generate_keystream_64_async(
void cuda_kreyvium_generate_keystream_64(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *keystream_output,
const CudaRadixCiphertextFFI *key, const CudaRadixCiphertextFFI *iv,
uint32_t num_inputs, uint32_t num_steps, int8_t *mem_ptr, void *const *bsks,
void *const *ksks);
void cleanup_cuda_kreyvium_generate_keystream_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
void cleanup_cuda_kreyvium_64(CudaStreamsFFI streams, int8_t **mem_ptr_void);
}
#endif

View File

@@ -43,19 +43,20 @@ void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void scratch_cuda_wrapping_polynomial_mul_one_to_many_64_async(
void *stream, uint32_t gpu_index, uint32_t polynomial_size,
int8_t **circulant_buf);
void scratch_wrapping_polynomial_mul_one_to_many_64(void *stream,
uint32_t gpu_index,
uint32_t polynomial_size,
int8_t **circulant_buf);
void cleanup_cuda_wrapping_polynomial_mul_one_to_many_64(void *stream,
uint32_t gpu_index,
int8_t *circulant_buf);
void cleanup_wrapping_polynomial_mul_one_to_many_64(void *stream,
uint32_t gpu_index,
int8_t *circulant_buf);
void cuda_wrapping_polynomial_mul_one_to_many_64_async(
void cuda_wrapping_polynomial_mul_one_to_many_64(
void *stream, uint32_t gpu_index, void *result, void const *poly_lhs,
int8_t *circulant, void const *poly_rhs, uint32_t polynomial_size,
uint32_t n_rhs);
void cuda_glwe_wrapping_polynomial_mul_one_to_many_64_async(
void cuda_glwe_wrapping_polynomial_mul_one_to_many_64(
void *stream, uint32_t gpu_index, void *result, void const *poly_lhs,
int8_t *circulant, void const *poly_rhs, uint32_t polynomial_size,
uint32_t glwe_dimension, uint32_t n_rhs);

View File

@@ -1,7 +1,6 @@
#ifndef CUDA_MULTI_BIT_UTILITIES_H
#define CUDA_MULTI_BIT_UTILITIES_H
#include "checked_arithmetic.h"
#include "pbs_utilities.h"
template <typename Torus>
@@ -106,8 +105,7 @@ uint64_t get_lwe_chunk_size_128(uint32_t gpu_index, uint32_t max_num_pbs,
uint32_t polynomial_size,
uint32_t glwe_dimension, uint32_t level_count,
uint64_t full_sm_keybundle);
template <typename Torus>
struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> : public pbs_buffer_base {
template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
int8_t *d_mem_keybundle = NULL;
int8_t *d_mem_acc_step_one = NULL;
int8_t *d_mem_acc_step_two = NULL;
@@ -154,18 +152,15 @@ struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> : public pbs_buffer_base {
get_buffer_size_partial_sm_cg_multibit_programmable_bootstrap<Torus>(
polynomial_size);
size_t num_blocks_keybundle = safe_mul(
(size_t)input_lwe_ciphertext_count, (size_t)lwe_chunk_size,
safe_mul((size_t)(glwe_dimension + 1), (size_t)(glwe_dimension + 1)),
(size_t)level_count);
size_t num_blocks_acc_step_one =
safe_mul((size_t)level_count, (size_t)(glwe_dimension + 1),
(size_t)input_lwe_ciphertext_count);
size_t num_blocks_acc_step_two = safe_mul(
(size_t)input_lwe_ciphertext_count, (size_t)(glwe_dimension + 1));
size_t num_blocks_acc_cg =
safe_mul((size_t)level_count, (size_t)(glwe_dimension + 1),
(size_t)input_lwe_ciphertext_count);
auto num_blocks_keybundle = input_lwe_ciphertext_count * lwe_chunk_size *
(glwe_dimension + 1) * (glwe_dimension + 1) *
level_count;
auto num_blocks_acc_step_one =
level_count * (glwe_dimension + 1) * input_lwe_ciphertext_count;
auto num_blocks_acc_step_two =
input_lwe_ciphertext_count * (glwe_dimension + 1);
auto num_blocks_acc_cg =
level_count * (glwe_dimension + 1) * input_lwe_ciphertext_count;
#if CUDA_ARCH >= 900
uint64_t full_sm_tbc_accumulate =
@@ -177,13 +172,13 @@ struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> : public pbs_buffer_base {
uint64_t minimum_sm_tbc =
get_buffer_size_sm_dsm_plus_tbc_multibit_programmable_bootstrap<Torus>(
polynomial_size);
size_t num_blocks_acc_tbc = num_blocks_acc_cg;
auto num_blocks_acc_tbc = num_blocks_acc_cg;
#endif
// Keybundle
if (max_shared_memory < full_sm_keybundle)
d_mem_keybundle = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_keybundle, full_sm_keybundle), stream, gpu_index,
num_blocks_keybundle * full_sm_keybundle, stream, gpu_index,
size_tracker, allocate_gpu_memory);
switch (pbs_variant) {
@@ -191,29 +186,29 @@ struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> : public pbs_buffer_base {
// Accumulator CG
if (max_shared_memory < partial_sm_cg_accumulate)
d_mem_acc_cg = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_cg, full_sm_cg_accumulate), stream,
gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_cg * full_sm_cg_accumulate, stream, gpu_index,
size_tracker, allocate_gpu_memory);
else if (max_shared_memory < full_sm_cg_accumulate)
d_mem_acc_cg = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_cg, partial_sm_cg_accumulate), stream,
gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_cg * partial_sm_cg_accumulate, stream, gpu_index,
size_tracker, allocate_gpu_memory);
break;
case PBS_VARIANT::DEFAULT:
// Accumulator step one
if (max_shared_memory < partial_sm_accumulate_step_one)
d_mem_acc_step_one = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_step_one, full_sm_accumulate_step_one),
stream, gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_step_one * full_sm_accumulate_step_one, stream,
gpu_index, size_tracker, allocate_gpu_memory);
else if (max_shared_memory < full_sm_accumulate_step_one)
d_mem_acc_step_one = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_step_one, partial_sm_accumulate_step_one),
stream, gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_step_one * partial_sm_accumulate_step_one, stream,
gpu_index, size_tracker, allocate_gpu_memory);
// Accumulator step two
if (max_shared_memory < full_sm_accumulate_step_two)
d_mem_acc_step_two = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_step_two, full_sm_accumulate_step_two),
stream, gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_step_two * full_sm_accumulate_step_two, stream,
gpu_index, size_tracker, allocate_gpu_memory);
break;
#if CUDA_ARCH >= 900
case TBC:
@@ -230,12 +225,12 @@ struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> : public pbs_buffer_base {
// Accumulator TBC
if (max_shared_memory < partial_sm_tbc_accumulate + minimum_sm_tbc)
d_mem_acc_tbc = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_tbc, full_sm_tbc_accumulate), stream,
gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_tbc * full_sm_tbc_accumulate, stream, gpu_index,
size_tracker, allocate_gpu_memory);
else if (max_shared_memory < full_sm_tbc_accumulate + minimum_sm_tbc)
d_mem_acc_tbc = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_tbc, partial_sm_tbc_accumulate), stream,
gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_tbc * partial_sm_tbc_accumulate, stream, gpu_index,
size_tracker, allocate_gpu_memory);
break;
#endif
default:
@@ -243,22 +238,19 @@ struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> : public pbs_buffer_base {
}
keybundle_fft = (double2 *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<double2>(num_blocks_keybundle,
(size_t)(polynomial_size / 2)),
stream, gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_keybundle * (polynomial_size / 2) * sizeof(double2), stream,
gpu_index, size_tracker, allocate_gpu_memory);
global_accumulator = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>((size_t)input_lwe_ciphertext_count,
(size_t)(glwe_dimension + 1),
(size_t)polynomial_size),
input_lwe_ciphertext_count * (glwe_dimension + 1) * polynomial_size *
sizeof(Torus),
stream, gpu_index, size_tracker, allocate_gpu_memory);
global_join_buffer = (double2 *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<double2>(
safe_mul((size_t)level_count, (size_t)(glwe_dimension + 1)),
(size_t)input_lwe_ciphertext_count, (size_t)(polynomial_size / 2)),
level_count * (glwe_dimension + 1) * input_lwe_ciphertext_count *
(polynomial_size / 2) * sizeof(double2),
stream, gpu_index, size_tracker, allocate_gpu_memory);
}
void release(cudaStream_t stream, uint32_t gpu_index) override {
void release(cudaStream_t stream, uint32_t gpu_index) {
if (d_mem_keybundle)
cuda_drop_with_size_tracking_async(d_mem_keybundle, stream, gpu_index,
@@ -294,14 +286,11 @@ struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> : public pbs_buffer_base {
gpu_memory_allocated);
cuda_drop_with_size_tracking_async(global_join_buffer, stream, gpu_index,
gpu_memory_allocated);
cuda_synchronize_stream(stream, gpu_index);
}
};
template <typename InputTorus>
struct pbs_buffer_128<InputTorus, PBS_TYPE::MULTI_BIT>
: public pbs_buffer_base {
struct pbs_buffer_128<InputTorus, PBS_TYPE::MULTI_BIT> {
int8_t *d_mem_keybundle = NULL;
int8_t *d_mem_acc_step_one = NULL;
int8_t *d_mem_acc_step_two = NULL;
@@ -348,23 +337,20 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::MULTI_BIT>
get_buffer_size_partial_sm_cg_multibit_programmable_bootstrap<
__uint128_t>(polynomial_size);
size_t num_blocks_keybundle = safe_mul(
(size_t)input_lwe_ciphertext_count, (size_t)lwe_chunk_size,
safe_mul((size_t)(glwe_dimension + 1), (size_t)(glwe_dimension + 1)),
(size_t)level_count);
size_t num_blocks_acc_step_one =
safe_mul((size_t)level_count, (size_t)(glwe_dimension + 1),
(size_t)input_lwe_ciphertext_count);
size_t num_blocks_acc_step_two = safe_mul(
(size_t)input_lwe_ciphertext_count, (size_t)(glwe_dimension + 1));
size_t num_blocks_acc_cg =
safe_mul((size_t)level_count, (size_t)(glwe_dimension + 1),
(size_t)input_lwe_ciphertext_count);
auto num_blocks_keybundle = input_lwe_ciphertext_count * lwe_chunk_size *
(glwe_dimension + 1) * (glwe_dimension + 1) *
level_count;
auto num_blocks_acc_step_one =
level_count * (glwe_dimension + 1) * input_lwe_ciphertext_count;
auto num_blocks_acc_step_two =
input_lwe_ciphertext_count * (glwe_dimension + 1);
auto num_blocks_acc_cg =
level_count * (glwe_dimension + 1) * input_lwe_ciphertext_count;
// Keybundle
if (max_shared_memory < full_sm_keybundle)
d_mem_keybundle = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_keybundle, full_sm_keybundle), stream, gpu_index,
num_blocks_keybundle * full_sm_keybundle, stream, gpu_index,
size_tracker, allocate_gpu_memory);
switch (pbs_variant) {
@@ -372,52 +358,48 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::MULTI_BIT>
// Accumulator CG
if (max_shared_memory < partial_sm_cg_accumulate)
d_mem_acc_cg = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_cg, full_sm_cg_accumulate), stream,
gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_cg * full_sm_cg_accumulate, stream, gpu_index,
size_tracker, allocate_gpu_memory);
else if (max_shared_memory < full_sm_cg_accumulate)
d_mem_acc_cg = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_cg, partial_sm_cg_accumulate), stream,
gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_cg * partial_sm_cg_accumulate, stream, gpu_index,
size_tracker, allocate_gpu_memory);
break;
case PBS_VARIANT::DEFAULT:
// Accumulator step one
if (max_shared_memory < partial_sm_accumulate_step_one)
d_mem_acc_step_one = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_step_one, full_sm_accumulate_step_one),
stream, gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_step_one * full_sm_accumulate_step_one, stream,
gpu_index, size_tracker, allocate_gpu_memory);
else if (max_shared_memory < full_sm_accumulate_step_one)
d_mem_acc_step_one = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_step_one, partial_sm_accumulate_step_one),
stream, gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_step_one * partial_sm_accumulate_step_one, stream,
gpu_index, size_tracker, allocate_gpu_memory);
// Accumulator step two
if (max_shared_memory < full_sm_accumulate_step_two)
d_mem_acc_step_two = (int8_t *)cuda_malloc_with_size_tracking_async(
safe_mul(num_blocks_acc_step_two, full_sm_accumulate_step_two),
stream, gpu_index, size_tracker, allocate_gpu_memory);
num_blocks_acc_step_two * full_sm_accumulate_step_two, stream,
gpu_index, size_tracker, allocate_gpu_memory);
break;
default:
PANIC("Cuda error (PBS): unsupported implementation variant.")
}
keybundle_fft = (double *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<double>((size_t)num_blocks_keybundle,
(size_t)(polynomial_size / 2), (size_t)4),
num_blocks_keybundle * (polynomial_size / 2) * 4 * sizeof(double),
stream, gpu_index, size_tracker, allocate_gpu_memory);
global_accumulator = (__uint128_t *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<__uint128_t>((size_t)input_lwe_ciphertext_count,
(size_t)(glwe_dimension + 1),
(size_t)polynomial_size),
input_lwe_ciphertext_count * (glwe_dimension + 1) * polynomial_size *
sizeof(__uint128_t),
stream, gpu_index, size_tracker, allocate_gpu_memory);
global_join_buffer = (double *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<double>(
safe_mul((size_t)level_count, (size_t)(glwe_dimension + 1)),
(size_t)input_lwe_ciphertext_count,
safe_mul((size_t)(polynomial_size / 2), (size_t)4)),
level_count * (glwe_dimension + 1) * input_lwe_ciphertext_count *
(polynomial_size / 2) * 4 * sizeof(double),
stream, gpu_index, size_tracker, allocate_gpu_memory);
}
void release(cudaStream_t stream, uint32_t gpu_index) override {
void release(cudaStream_t stream, uint32_t gpu_index) {
if (d_mem_keybundle)
cuda_drop_with_size_tracking_async(d_mem_keybundle, stream, gpu_index,
@@ -446,7 +428,6 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::MULTI_BIT>
gpu_memory_allocated);
cuda_drop_with_size_tracking_async(global_join_buffer, stream, gpu_index,
gpu_memory_allocated);
cuda_synchronize_stream(stream, gpu_index);
}
};

View File

@@ -1,7 +1,6 @@
#ifndef CUDA_BOOTSTRAP_UTILITIES_H
#define CUDA_BOOTSTRAP_UTILITIES_H
#include "checked_arithmetic.h"
#include "device.h"
#include "pbs_enums.h"
#include "vector_types.h"
@@ -11,46 +10,45 @@ template <typename Torus>
uint64_t get_buffer_size_full_sm_programmable_bootstrap_step_one(
uint32_t polynomial_size) {
size_t double_count = (sizeof(Torus) == 16) ? 2 : 1;
return safe_mul_sizeof<Torus>(polynomial_size) + // accumulator_rotated
safe_mul_sizeof<double>(double_count,
(size_t)polynomial_size); // accumulator fft
return sizeof(Torus) * polynomial_size + // accumulator_rotated
sizeof(double) * 2 * double_count * polynomial_size /
2; // accumulator fft
}
template <typename Torus>
uint64_t get_buffer_size_full_sm_programmable_bootstrap_step_two(
uint32_t polynomial_size) {
size_t double_count = (sizeof(Torus) == 16) ? 2 : 1;
return safe_mul_sizeof<Torus>(polynomial_size) + // accumulator
safe_mul_sizeof<double>(double_count,
(size_t)polynomial_size); // accumulator fft
return sizeof(Torus) * polynomial_size + // accumulator
sizeof(double) * 2 * double_count * polynomial_size /
2; // accumulator fft
}
template <typename Torus>
uint64_t
get_buffer_size_partial_sm_programmable_bootstrap(uint32_t polynomial_size) {
size_t double_count = (sizeof(Torus) == 16) ? 2 : 1;
return safe_mul_sizeof<double>(double_count,
(size_t)polynomial_size); // accumulator fft
return sizeof(double) * 2 * double_count * polynomial_size /
2; // accumulator fft
}
template <typename Torus>
uint64_t
get_buffer_size_full_sm_programmable_bootstrap_tbc(uint32_t polynomial_size) {
return safe_mul_sizeof<Torus>(polynomial_size) + // accumulator_rotated
safe_mul_sizeof<Torus>(polynomial_size) + // accumulator
safe_mul_sizeof<double2>(polynomial_size / 2); // accumulator fft
return sizeof(Torus) * polynomial_size + // accumulator_rotated
sizeof(Torus) * polynomial_size + // accumulator
sizeof(double2) * polynomial_size / 2; // accumulator fft
}
template <typename Torus>
uint64_t get_buffer_size_partial_sm_programmable_bootstrap_tbc(
uint32_t polynomial_size) {
return safe_mul_sizeof<double2>(polynomial_size /
2); // accumulator fft mask & body
return sizeof(double2) * polynomial_size / 2; // accumulator fft mask & body
}
template <typename Torus>
uint64_t get_buffer_size_sm_dsm_plus_tbc_classic_programmable_bootstrap(
uint32_t polynomial_size) {
return safe_mul_sizeof<double2>(polynomial_size / 2); // tbc
return sizeof(double2) * polynomial_size / 2; // tbc
}
template <typename Torus>
@@ -58,40 +56,34 @@ uint64_t get_buffer_size_full_sm_programmable_bootstrap_tbc_2_2_params(
uint32_t polynomial_size) {
// In the first implementation with 2-2 params, we need up to 5 polynomials in
// shared memory we can optimize this later
return safe_mul_sizeof<Torus>((size_t)polynomial_size, (size_t)5);
return sizeof(Torus) * polynomial_size * 5;
}
template <typename Torus>
uint64_t
get_buffer_size_full_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {
size_t double_count = (sizeof(Torus) == 16) ? 2 : 1;
return safe_mul_sizeof<Torus>(polynomial_size) + // accumulator_rotated
safe_mul_sizeof<Torus>(polynomial_size) + // accumulator
safe_mul_sizeof<double>((size_t)polynomial_size,
double_count); // accumulator fft
return sizeof(Torus) * polynomial_size + // accumulator_rotated
sizeof(Torus) * polynomial_size + // accumulator
sizeof(double) * polynomial_size / 2 * 2 *
double_count; // accumulator fft
}
template <typename Torus>
uint64_t
get_buffer_size_partial_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {
size_t double_count = (sizeof(Torus) == 16) ? 2 : 1;
return safe_mul_sizeof<double>((size_t)polynomial_size,
double_count); // accumulator fft mask & body
return sizeof(double) * polynomial_size / 2 * 2 *
double_count; // accumulator fft mask & body
}
template <typename Torus>
bool supports_distributed_shared_memory_on_classic_programmable_bootstrap(
uint32_t polynomial_size, uint32_t max_shared_memory);
struct pbs_buffer_base {
virtual void release(cudaStream_t stream, uint32_t gpu_index) = 0;
virtual ~pbs_buffer_base() = default;
};
template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer;
template <typename Torus>
struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> : public pbs_buffer_base {
template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
int8_t *d_mem;
Torus *global_accumulator;
@@ -130,34 +122,27 @@ struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> : public pbs_buffer_base {
uint64_t device_mem = 0;
if (max_shared_memory < partial_sm) {
device_mem =
safe_mul(full_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = full_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
} else if (max_shared_memory < full_sm_step_two) {
device_mem = safe_mul(
partial_dm_step_two +
safe_mul(partial_dm_step_one, (size_t)level_count),
(size_t)input_lwe_ciphertext_count, (size_t)(glwe_dimension + 1));
device_mem = (partial_dm_step_two + partial_dm_step_one * level_count) *
input_lwe_ciphertext_count * (glwe_dimension + 1);
} else if (max_shared_memory < full_sm_step_one) {
device_mem =
safe_mul(partial_dm_step_one, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = partial_dm_step_one * input_lwe_ciphertext_count *
level_count * (glwe_dimension + 1);
}
// Otherwise, both kernels run all in shared memory
d_mem = (int8_t *)cuda_malloc_with_size_tracking_async(
device_mem, stream, gpu_index, size_tracker, allocate_gpu_memory);
global_join_buffer = (double2 *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<double2>(
safe_mul((size_t)(glwe_dimension + 1), (size_t)level_count),
(size_t)input_lwe_ciphertext_count,
(size_t)(polynomial_size / 2)),
(glwe_dimension + 1) * level_count * input_lwe_ciphertext_count *
(polynomial_size / 2) * sizeof(double2),
stream, gpu_index, size_tracker, allocate_gpu_memory);
global_accumulator = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>((size_t)(glwe_dimension + 1),
(size_t)input_lwe_ciphertext_count,
(size_t)polynomial_size),
(glwe_dimension + 1) * input_lwe_ciphertext_count * polynomial_size *
sizeof(Torus),
stream, gpu_index, size_tracker, allocate_gpu_memory);
} break;
case PBS_VARIANT::CG: {
@@ -173,13 +158,11 @@ struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> : public pbs_buffer_base {
uint64_t device_mem = 0;
if (max_shared_memory < partial_sm) {
device_mem =
safe_mul(full_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = full_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
} else if (max_shared_memory < full_sm) {
device_mem =
safe_mul(partial_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = partial_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
}
// Otherwise, both kernels run all in shared memory
@@ -187,10 +170,8 @@ struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> : public pbs_buffer_base {
device_mem, stream, gpu_index, size_tracker, allocate_gpu_memory);
global_join_buffer = (double2 *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<double2>(
safe_mul((size_t)(glwe_dimension + 1), (size_t)level_count),
(size_t)input_lwe_ciphertext_count,
(size_t)(polynomial_size / 2)),
(glwe_dimension + 1) * level_count * input_lwe_ciphertext_count *
polynomial_size / 2 * sizeof(double2),
stream, gpu_index, size_tracker, allocate_gpu_memory);
} break;
#if CUDA_ARCH >= 900
@@ -225,13 +206,11 @@ struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> : public pbs_buffer_base {
//
// NOSM mode actually requires minimum_sm_tbc shared memory bytes.
if (max_shared_memory < partial_sm + minimum_sm_tbc) {
device_mem =
safe_mul(full_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = full_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
} else if (max_shared_memory < full_sm + minimum_sm_tbc) {
device_mem =
safe_mul(partial_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = partial_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
}
// Otherwise, both kernels run all in shared memory
@@ -239,10 +218,8 @@ struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> : public pbs_buffer_base {
device_mem, stream, gpu_index, size_tracker, allocate_gpu_memory);
global_join_buffer = (double2 *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<double2>(
safe_mul((size_t)(glwe_dimension + 1), (size_t)level_count),
(size_t)input_lwe_ciphertext_count,
(size_t)(polynomial_size / 2)),
(glwe_dimension + 1) * level_count * input_lwe_ciphertext_count *
polynomial_size / 2 * sizeof(double2),
stream, gpu_index, size_tracker, allocate_gpu_memory);
} break;
#endif
@@ -251,7 +228,7 @@ struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> : public pbs_buffer_base {
}
}
void release(cudaStream_t stream, uint32_t gpu_index) override {
void release(cudaStream_t stream, uint32_t gpu_index) {
cuda_drop_with_size_tracking_async(d_mem, stream, gpu_index,
gpu_memory_allocated);
cuda_drop_with_size_tracking_async(global_join_buffer, stream, gpu_index,
@@ -260,15 +237,13 @@ struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> : public pbs_buffer_base {
if (pbs_variant == DEFAULT)
cuda_drop_with_size_tracking_async(global_accumulator, stream, gpu_index,
gpu_memory_allocated);
cuda_synchronize_stream(stream, gpu_index);
}
};
template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer_128;
template <typename InputTorus>
struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL>
: public pbs_buffer_base {
struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL> {
int8_t *d_mem;
__uint128_t *global_accumulator;
@@ -291,10 +266,9 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL>
this->pbs_variant = pbs_variant;
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
size_t global_join_buffer_size = safe_mul_sizeof<double>(
safe_mul((size_t)(glwe_dimension + 1), (size_t)level_count),
(size_t)input_lwe_ciphertext_count,
safe_mul((size_t)(polynomial_size / 2), (size_t)4));
size_t global_join_buffer_size = (glwe_dimension + 1) * level_count *
input_lwe_ciphertext_count *
polynomial_size / 2 * sizeof(double) * 4;
switch (pbs_variant) {
case PBS_VARIANT::DEFAULT: {
@@ -314,18 +288,14 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL>
uint64_t device_mem = 0;
if (max_shared_memory < partial_sm) {
device_mem =
safe_mul(full_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = full_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
} else if (max_shared_memory < full_sm_step_two) {
device_mem = safe_mul(
partial_dm_step_two +
safe_mul(partial_dm_step_one, (size_t)level_count),
(size_t)input_lwe_ciphertext_count, (size_t)(glwe_dimension + 1));
device_mem = (partial_dm_step_two + partial_dm_step_one * level_count) *
input_lwe_ciphertext_count * (glwe_dimension + 1);
} else if (max_shared_memory < full_sm_step_one) {
device_mem =
safe_mul(partial_dm_step_one, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = partial_dm_step_one * input_lwe_ciphertext_count *
level_count * (glwe_dimension + 1);
}
// Otherwise, both kernels run all in shared memory
d_mem = (int8_t *)cuda_malloc_with_size_tracking_async(
@@ -336,9 +306,8 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL>
allocate_gpu_memory);
global_accumulator = (__uint128_t *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<__uint128_t>((size_t)(glwe_dimension + 1),
(size_t)input_lwe_ciphertext_count,
(size_t)polynomial_size),
(glwe_dimension + 1) * input_lwe_ciphertext_count * polynomial_size *
sizeof(__uint128_t),
stream, gpu_index, size_tracker, allocate_gpu_memory);
} break;
case PBS_VARIANT::CG: {
@@ -354,13 +323,11 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL>
uint64_t device_mem = 0;
if (max_shared_memory < partial_sm) {
device_mem =
safe_mul(full_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = full_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
} else if (max_shared_memory < full_sm) {
device_mem =
safe_mul(partial_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = partial_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
}
// Otherwise, both kernels run all in shared memory
@@ -403,13 +370,11 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL>
//
// NOSM mode actually requires minimum_sm_tbc shared memory bytes.
if (max_shared_memory < partial_sm + minimum_sm_tbc) {
device_mem =
safe_mul(full_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = full_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
} else if (max_shared_memory < full_sm + minimum_sm_tbc) {
device_mem =
safe_mul(partial_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = partial_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
}
// Otherwise, both kernels run all in shared memory
@@ -426,7 +391,7 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL>
}
}
void release(cudaStream_t stream, uint32_t gpu_index) override {
void release(cudaStream_t stream, uint32_t gpu_index) {
cuda_drop_with_size_tracking_async(d_mem, stream, gpu_index,
gpu_memory_allocated);
cuda_drop_with_size_tracking_async(global_join_buffer, stream, gpu_index,
@@ -435,7 +400,6 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL>
if (pbs_variant == DEFAULT)
cuda_drop_with_size_tracking_async(global_accumulator, stream, gpu_index,
gpu_memory_allocated);
cuda_synchronize_stream(stream, gpu_index);
}
};
@@ -452,17 +416,15 @@ uint64_t get_buffer_size_programmable_bootstrap_cg(
uint64_t full_dm = full_sm;
uint64_t device_mem = 0;
if (max_shared_memory < partial_sm) {
device_mem = safe_mul(full_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = full_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
} else if (max_shared_memory < full_sm) {
device_mem = safe_mul(partial_dm, (size_t)input_lwe_ciphertext_count,
(size_t)level_count, (size_t)(glwe_dimension + 1));
device_mem = partial_dm * input_lwe_ciphertext_count * level_count *
(glwe_dimension + 1);
}
uint64_t buffer_size =
device_mem +
safe_mul_sizeof<double2>(
safe_mul((size_t)(glwe_dimension + 1), (size_t)level_count),
(size_t)input_lwe_ciphertext_count, (size_t)(polynomial_size / 2));
uint64_t buffer_size = device_mem + (glwe_dimension + 1) * level_count *
input_lwe_ciphertext_count *
polynomial_size / 2 * sizeof(double2);
return buffer_size + buffer_size % sizeof(double2);
}

View File

@@ -5,51 +5,77 @@
#include <stdint.h>
extern "C" {
void cuda_fourier_polynomial_mul_async(void *stream, uint32_t gpu_index,
void const *input1, void const *input2,
void *output, uint32_t polynomial_size,
uint32_t total_polynomials);
void cuda_fourier_polynomial_mul(void *stream, uint32_t gpu_index,
void const *input1, void const *input2,
void *output, uint32_t polynomial_size,
uint32_t total_polynomials);
void cuda_convert_lwe_programmable_bootstrap_key_32_async(
void cuda_convert_lwe_programmable_bootstrap_key_32(
void *stream, uint32_t gpu_index, void *dest, void const *src,
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
uint32_t polynomial_size);
void cuda_convert_lwe_programmable_bootstrap_key_64_async(
void cuda_convert_lwe_programmable_bootstrap_key_64(
void *stream, uint32_t gpu_index, void *dest, void const *src,
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
uint32_t polynomial_size);
void cuda_convert_lwe_programmable_bootstrap_key_128_async(
void cuda_convert_lwe_programmable_bootstrap_key_128(
void *stream, uint32_t gpu_index, void *dest, void const *src,
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
uint32_t polynomial_size);
uint64_t scratch_cuda_programmable_bootstrap_64_async(
uint64_t scratch_cuda_programmable_bootstrap_amortized_32(
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
uint64_t scratch_cuda_programmable_bootstrap_amortized_64(
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
void cuda_programmable_bootstrap_amortized_lwe_ciphertext_vector_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lut_vector,
void const *lut_vector_indexes, void const *lwe_array_in,
void const *lwe_input_indexes, void const *bootstrapping_key,
int8_t *pbs_buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_samples);
void cuda_programmable_bootstrap_amortized_lwe_ciphertext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lut_vector,
void const *lut_vector_indexes, void const *lwe_array_in,
void const *lwe_input_indexes, void const *bootstrapping_key,
int8_t *pbs_buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_samples);
void cleanup_cuda_programmable_bootstrap_amortized(void *stream,
uint32_t gpu_index,
int8_t **pbs_buffer);
uint64_t scratch_cuda_programmable_bootstrap_32(
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
uint64_t scratch_cuda_programmable_bootstrap_tbc_generic_64_async(
uint64_t scratch_cuda_programmable_bootstrap_64(
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
uint64_t scratch_cuda_programmable_bootstrap_tbc_2_2_64_async(
uint64_t scratch_cuda_programmable_bootstrap_128(
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
uint64_t scratch_cuda_programmable_bootstrap_128_async(
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_programmable_bootstrap_lwe_ciphertext_vector_32_async(
void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lut_vector,
void const *lut_vector_indexes, void const *lwe_array_in,
@@ -58,7 +84,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_32_async(
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride);
void cuda_programmable_bootstrap_64_async(
void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lut_vector,
void const *lut_vector_indexes, void const *lwe_array_in,
@@ -67,33 +93,15 @@ void cuda_programmable_bootstrap_64_async(
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride);
void cuda_programmable_bootstrap_tbc_64_generic_async(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lut_vector,
void const *lut_vector_indexes, void const *lwe_array_in,
void const *lwe_input_indexes, void const *bootstrapping_key,
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride);
void cuda_programmable_bootstrap_tbc_64_2_2_async(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lut_vector,
void const *lut_vector_indexes, void const *lwe_array_in,
void const *lwe_input_indexes, void const *bootstrapping_key,
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride);
void cuda_programmable_bootstrap_128_async(
void cuda_programmable_bootstrap_lwe_ciphertext_vector_128(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lut_vector, void const *lwe_array_in,
void const *bootstrapping_key, int8_t *buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
uint32_t level_count, uint32_t num_samples);
void cleanup_cuda_programmable_bootstrap_64(void *stream, uint32_t gpu_index,
int8_t **pbs_buffer);
void cleanup_cuda_programmable_bootstrap(void *stream, uint32_t gpu_index,
int8_t **pbs_buffer);
void cleanup_cuda_programmable_bootstrap_128(void *stream, uint32_t gpu_index,
int8_t **pbs_buffer);

View File

@@ -10,32 +10,22 @@ bool has_support_to_cuda_programmable_bootstrap_cg_multi_bit(
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t num_samples, uint32_t max_shared_memory);
void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64_async(
void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64(
void *stream, uint32_t gpu_index, void *dest, void const *src,
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
uint32_t polynomial_size, uint32_t grouping_factor);
void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_128_async(
void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_128(
void *stream, uint32_t gpu_index, void *dest, void const *src,
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
uint32_t polynomial_size, uint32_t grouping_factor);
uint64_t scratch_cuda_multi_bit_programmable_bootstrap_64_async(
uint64_t scratch_cuda_multi_bit_programmable_bootstrap_64(
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
uint64_t scratch_cuda_multi_bit_programmable_bootstrap_tbc_generic_64_async(
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
uint64_t scratch_cuda_multi_bit_programmable_bootstrap_tbc_2_2_64_async(
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
void cuda_multi_bit_programmable_bootstrap_64_async(
void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lut_vector,
void const *lut_vector_indexes, void const *lwe_array_in,
@@ -45,36 +35,16 @@ void cuda_multi_bit_programmable_bootstrap_64_async(
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
uint32_t lut_stride);
void cuda_multi_bit_programmable_bootstrap_tbc_64_generic_async(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lut_vector,
void const *lut_vector_indexes, void const *lwe_array_in,
void const *lwe_input_indexes, void const *bootstrapping_key,
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
uint32_t lut_stride);
void cleanup_cuda_multi_bit_programmable_bootstrap(void *stream,
uint32_t gpu_index,
int8_t **pbs_buffer);
void cuda_multi_bit_programmable_bootstrap_tbc_64_2_2_async(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lut_vector,
void const *lut_vector_indexes, void const *lwe_array_in,
void const *lwe_input_indexes, void const *bootstrapping_key,
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
uint32_t lut_stride);
void cleanup_cuda_multi_bit_programmable_bootstrap_64(void *stream,
uint32_t gpu_index,
int8_t **pbs_buffer);
uint64_t scratch_cuda_multi_bit_programmable_bootstrap_128_async(
uint64_t scratch_cuda_multi_bit_programmable_bootstrap_128_vector_64(
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
void cuda_multi_bit_programmable_bootstrap_128_async(
void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_128(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lut_vector,
void const *lwe_array_in, void const *lwe_input_indexes,

View File

@@ -4,7 +4,7 @@
#include "../integer/integer.h"
extern "C" {
uint64_t scratch_cuda_trivium_generate_keystream_64_async(
uint64_t scratch_cuda_trivium_64(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
@@ -12,14 +12,13 @@ uint64_t scratch_cuda_trivium_generate_keystream_64_async(
PBS_TYPE pbs_type, bool allocate_gpu_memory,
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_inputs);
void cuda_trivium_generate_keystream_64_async(
void cuda_trivium_generate_keystream_64(
CudaStreamsFFI streams, CudaRadixCiphertextFFI *keystream_output,
const CudaRadixCiphertextFFI *key, const CudaRadixCiphertextFFI *iv,
uint32_t num_inputs, uint32_t num_steps, int8_t *mem_ptr, void *const *bsks,
void *const *ksks);
void cleanup_cuda_trivium_generate_keystream_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
void cleanup_cuda_trivium_64(CudaStreamsFFI streams, int8_t **mem_ptr_void);
}
#endif

View File

@@ -33,7 +33,7 @@ template <typename Torus> struct int_trivium_lut_buffers {
auto active_streams_and =
streams.active_gpu_subset(total_lut_ops, params.pbs_type);
this->and_lut->generate_and_broadcast_bivariate_lut(
active_streams_and, {0}, {and_lambda}, LUT_0_FOR_ALL_BLOCKS);
active_streams_and, {0}, {and_lambda}, allocate_gpu_memory);
this->and_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
uint32_t total_flush_ops = num_trivium_inputs * BATCH_SIZE * 4;
@@ -48,7 +48,7 @@ template <typename Torus> struct int_trivium_lut_buffers {
auto active_streams_flush =
streams.active_gpu_subset(total_flush_ops, params.pbs_type);
this->flush_lut->generate_and_broadcast_lut(
active_streams_flush, {0}, {flush_lambda}, LUT_0_FOR_ALL_BLOCKS);
active_streams_flush, {0}, {flush_lambda}, allocate_gpu_memory);
this->flush_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
}
@@ -254,8 +254,6 @@ template <typename Torus> struct int_trivium_state_workspaces {
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
this->packed_flush_out, allocate_gpu_memory);
delete this->packed_flush_out;
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
}
};

View File

@@ -3,11 +3,10 @@
#include "../keyswitch/ks_enums.h"
#include "../pbs/pbs_enums.h"
#include "zk_enums.h"
#include <stdint.h>
extern "C" {
uint64_t scratch_cuda_expand_without_verification_64_async(
uint64_t scratch_cuda_expand_without_verification_64(
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t big_lwe_dimension,
uint32_t small_lwe_dimension, uint32_t computing_ks_level,
@@ -18,15 +17,14 @@ uint64_t scratch_cuda_expand_without_verification_64_async(
const bool *is_boolean_array, const uint32_t is_boolean_array_len,
uint32_t num_compact_lists, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, KS_TYPE casting_key_type,
bool allocate_gpu_memory, EXPAND_KIND expand_kind,
PBS_MS_REDUCTION_T noise_reduction_type);
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
void cuda_expand_without_verification_64_async(
void cuda_expand_without_verification_64(
CudaStreamsFFI streams, void *lwe_array_out,
const void *lwe_flattened_compact_array_in, int8_t *mem_ptr,
void *const *bsks, void *const *computing_ksks, void *const *casting_keys);
void cleanup_cuda_expand_without_verification_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
void cleanup_expand_without_verification_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);
}
#endif // ZK_H

View File

@@ -1,7 +0,0 @@
#ifndef CUDA_ZK_ENUMS_H
#define CUDA_ZK_ENUMS_H
#include <stdint.h>
// Additional to the two kinds of expand (no_casting and casting), we have a
// third that is used only in the noise tests
enum EXPAND_KIND { NO_CASTING = 0, CASTING = 1, SANITY_CHECK = 2 };
#endif // CUDA_ZK_ENUMS_H

View File

@@ -1,7 +1,7 @@
#ifndef ZK_UTILITIES_H
#define ZK_UTILITIES_H
#include "../integer/integer_utilities.h"
#include "checked_arithmetic.h"
#include "integer/integer.cuh"
#include <cstdint>
@@ -59,8 +59,8 @@ template <typename Torus> struct flattened_compact_lwe_lists {
uint32_t lwe_dimension)
: d_ptr(d_ptr), h_num_lwes_per_compact_list(h_num_lwes_per_compact_list),
num_compact_lists(num_compact_lists), lwe_dimension(lwe_dimension) {
ptr_array_to_d_compact_list = static_cast<Torus **>(
malloc(safe_mul_sizeof<Torus *>(num_compact_lists)));
ptr_array_to_d_compact_list =
static_cast<Torus **>(malloc(num_compact_lists * sizeof(Torus *)));
total_num_lwes = 0;
auto curr_list = d_ptr;
for (auto i = 0; i < num_compact_lists; ++i) {
@@ -80,7 +80,6 @@ template <typename Torus> struct flattened_compact_lwe_lists {
h_num_lwes_per_compact_list[compact_list_index]);
}
// nosemgrep: release-missing-cuda-synchronize
void release() { free(ptr_array_to_d_compact_list); }
};
@@ -106,7 +105,6 @@ template <typename Torus> struct zk_expand_mem {
uint32_t num_compact_lists;
int_radix_lut<Torus> *message_and_carry_extract_luts;
int_radix_lut<Torus> *identity_lut;
Torus *tmp_expanded_lwes;
Torus *tmp_ksed_small_to_big_expanded_lwes;
@@ -117,49 +115,33 @@ template <typename Torus> struct zk_expand_mem {
expand_job<Torus> *d_expand_jobs;
expand_job<Torus> *h_expand_jobs;
EXPAND_KIND expand_kind;
zk_expand_mem(CudaStreams streams, int_radix_params computing_params,
int_radix_params casting_params, KS_TYPE casting_key_type,
const uint32_t *num_lwes_per_compact_list,
const bool *is_boolean_array,
const uint32_t is_boolean_array_len, uint32_t num_compact_lists,
bool allocate_gpu_memory, uint64_t &size_tracker,
EXPAND_KIND expand_kind_in)
bool allocate_gpu_memory, uint64_t &size_tracker)
: computing_params(computing_params), casting_params(casting_params),
num_compact_lists(num_compact_lists),
casting_key_type(casting_key_type), expand_kind(expand_kind_in) {
casting_key_type(casting_key_type) {
gpu_memory_allocated = allocate_gpu_memory;
// We copy num_lwes_per_compact_list so we get protection against
// num_lwes_per_compact_list being freed while this buffer is still in use
this->num_lwes_per_compact_list =
(uint32_t *)malloc(safe_mul_sizeof<uint32_t>(num_compact_lists));
(uint32_t *)malloc(num_compact_lists * sizeof(uint32_t));
memcpy(this->num_lwes_per_compact_list, num_lwes_per_compact_list,
safe_mul_sizeof<uint32_t>(num_compact_lists));
num_compact_lists * sizeof(uint32_t));
num_lwes = 0;
for (int i = 0; i < num_compact_lists; i++) {
num_lwes += this->num_lwes_per_compact_list[i];
}
if (computing_params.carry_modulus != computing_params.message_modulus &&
expand_kind == EXPAND_KIND::CASTING) {
if (computing_params.carry_modulus != computing_params.message_modulus) {
PANIC("GPU backend requires carry_modulus equal to message_modulus")
}
// We create the identity LUT only if we are doing a SANITY_CHECK
if (expand_kind == EXPAND_KIND::SANITY_CHECK) {
identity_lut =
new int_radix_lut<Torus>(streams, computing_params, 1, 2 * num_lwes,
allocate_gpu_memory, size_tracker);
auto identity_lut_f = [](Torus x) -> Torus { return x; };
identity_lut->generate_and_broadcast_lut(streams, {0}, {identity_lut_f},
LUT_0_FOR_ALL_BLOCKS);
}
auto message_extract_lut_f = [casting_params](Torus x) -> Torus {
return x % casting_params.message_modulus;
};
@@ -202,19 +184,19 @@ template <typename Torus> struct zk_expand_mem {
// Adjust indexes to permute the output and access the correct LUT
auto h_indexes_in = static_cast<Torus *>(
malloc(safe_mul_sizeof<Torus>(num_packed_msgs, num_lwes)));
malloc(num_packed_msgs * num_lwes * sizeof(Torus)));
auto h_indexes_out = static_cast<Torus *>(
malloc(safe_mul_sizeof<Torus>(num_packed_msgs, num_lwes)));
malloc(num_packed_msgs * num_lwes * sizeof(Torus)));
auto h_lut_indexes = static_cast<Torus *>(
malloc(safe_mul_sizeof<Torus>(num_packed_msgs, num_lwes)));
malloc(num_packed_msgs * num_lwes * sizeof(Torus)));
d_expand_jobs =
static_cast<expand_job<Torus> *>(cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<expand_job<Torus>>(num_lwes), streams.stream(0),
num_lwes * sizeof(expand_job<Torus>), streams.stream(0),
streams.gpu_index(0), size_tracker, allocate_gpu_memory));
h_expand_jobs = static_cast<expand_job<Torus> *>(
malloc(safe_mul_sizeof<expand_job<Torus>>(num_lwes)));
malloc(num_lwes * sizeof(expand_job<Torus>)));
/*
* Each LWE contains encrypted data in both carry and message spaces
@@ -279,54 +261,45 @@ template <typename Torus> struct zk_expand_mem {
"Cuda error: index %d for is_boolean_array is out of "
"bounds (len is %d)",
h_indexes_out[lwe_index], is_boolean_array_len);
auto boolean_offset =
is_boolean_array[h_indexes_out[lwe_index]] ? num_packed_msgs : 0;
h_lut_indexes[lwe_index] = i / num_lwes_in_kth + boolean_offset;
PANIC_IF_FALSE(
h_lut_indexes[lwe_index] < 4,
"Cuda error: lut index is greater than the max possible value (3)");
}
offset += num_lwes_in_kth;
}
message_and_carry_extract_luts->set_lwe_indexes(
streams.stream(0), streams.gpu_index(0), h_indexes_in, h_indexes_out);
auto lut_indexes = message_and_carry_extract_luts->get_lut_indexes(0, 0);
cuda_memcpy_with_size_tracking_async_to_gpu(
lut_indexes, h_lut_indexes, num_packed_msgs * num_lwes * sizeof(Torus),
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
auto active_streams =
streams.active_gpu_subset(2 * num_lwes, params.pbs_type);
// Index generator for message/carry extraction LUTs
auto index_gen = [num_compact_lists,
num_lwes_per_compact_list =
this->num_lwes_per_compact_list,
num_packed_msgs, is_boolean_array,
h_indexes_out](Torus *h_lut_indexes, uint32_t) {
auto offset = 0;
for (int k = 0; k < num_compact_lists; k++) {
auto num_lwes_in_kth = num_lwes_per_compact_list[k];
for (int i = 0; i < num_packed_msgs * num_lwes_in_kth; i++) {
auto lwe_index = i + num_packed_msgs * offset;
auto boolean_offset =
is_boolean_array[h_indexes_out[lwe_index]] ? num_packed_msgs : 0;
h_lut_indexes[lwe_index] = i / num_lwes_in_kth + boolean_offset;
}
offset += num_lwes_in_kth;
}
};
message_and_carry_extract_luts->generate_and_broadcast_lut(
active_streams, {0, 1, 2, 3},
{message_extract_lut_f, carry_extract_lut_f,
message_extract_and_sanitize_bool_lut_f,
carry_extract_and_sanitize_bool_lut_f},
index_gen, true, {}, h_lut_indexes);
gpu_memory_allocated);
message_and_carry_extract_luts->allocate_lwe_vector_for_non_trivial_indexes(
active_streams, 2 * num_lwes, size_tracker, allocate_gpu_memory);
// The expanded LWEs will always be on the casting key format
tmp_expanded_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(num_lwes, casting_params.big_lwe_dimension + 1),
num_lwes * (casting_params.big_lwe_dimension + 1) * sizeof(Torus),
streams.stream(0), streams.gpu_index(0), size_tracker,
allocate_gpu_memory);
tmp_ksed_small_to_big_expanded_lwes =
(Torus *)cuda_malloc_with_size_tracking_async(
safe_mul_sizeof<Torus>(num_lwes,
casting_params.big_lwe_dimension + 1),
num_lwes * (casting_params.big_lwe_dimension + 1) * sizeof(Torus),
streams.stream(0), streams.gpu_index(0), size_tracker,
allocate_gpu_memory);
@@ -340,11 +313,6 @@ template <typename Torus> struct zk_expand_mem {
message_and_carry_extract_luts->release(streams);
delete message_and_carry_extract_luts;
if (expand_kind == EXPAND_KIND::SANITY_CHECK) {
identity_lut->release(streams);
delete identity_lut;
}
cuda_drop_with_size_tracking_async(tmp_expanded_lwes, streams.stream(0),
streams.gpu_index(0),
gpu_memory_allocated);

Some files were not shown because too many files have changed in this diff Show More