mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-11 15:48:20 -05:00
Compare commits
9 Commits
al/vectori
...
go/test/te
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7e1b77c564 | ||
|
|
f025c165d8 | ||
|
|
1fe0bb1727 | ||
|
|
4a73b7bb4b | ||
|
|
022cb3b18a | ||
|
|
c4feabbfa3 | ||
|
|
3c6ed37a18 | ||
|
|
fe6e81ff78 | ||
|
|
87c0d646a4 |
@@ -21,20 +21,20 @@ env:
|
||||
permissions: { }
|
||||
|
||||
jobs:
|
||||
verify-actor:
|
||||
verify-triggering-actor:
|
||||
name: benchmark_perf_regression/verify-actor
|
||||
if: (github.event_name == 'pull_request' &&
|
||||
(contains(github.event.label.name, 'bench-perfs-cpu') ||
|
||||
contains(github.event.label.name, 'bench-perfs-gpu'))) ||
|
||||
(github.event.issue.pull_request && startsWith(github.event.comment.body, '/bench'))
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
uses: ./.github/workflows/verify_triggering_actor.yml
|
||||
secrets:
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
prepare-benchmarks:
|
||||
name: benchmark_perf_regression/prepare-benchmarks
|
||||
needs: verify-actor
|
||||
needs: verify-triggering-actor
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
commands: ${{ steps.set_commands.outputs.commands }}
|
||||
|
||||
77
.github/workflows/coprocessor-benchmark-gpu.yml
vendored
77
.github/workflows/coprocessor-benchmark-gpu.yml
vendored
@@ -3,6 +3,22 @@ name: coprocessor-benchmark-gpu
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
profile:
|
||||
description: "Instance type"
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- "l40 (n3-L40x1)"
|
||||
- "4-l40 (n3-L40x4)"
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "4-h100 (n3-H100x4)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
- "multi-h100-sxm5_fallback (n3-H100x8-SXM5)"
|
||||
|
||||
schedule:
|
||||
# Weekly tests @ 1AM
|
||||
- cron: "0 1 * * 6"
|
||||
@@ -17,7 +33,9 @@ env:
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
PROFILE: "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
PROFILE_SCHEDULED_RUN: "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
PROFILE_MANUAL_RUN: ${{ inputs.profile }}
|
||||
IS_MANUAL_RUN: ${{ github.event_name == 'workflow_dispatch' }}
|
||||
BENCHMARK_TYPE: "ALL"
|
||||
OPTIMIZATION_TARGET: "throughput"
|
||||
BATCH_SIZE: "5000"
|
||||
@@ -40,15 +58,25 @@ jobs:
|
||||
- name: Parse profile
|
||||
id: parse_profile
|
||||
run: |
|
||||
if [[ ${IS_MANUAL_RUN} == true ]]; then
|
||||
PROFILE_RAW="${PROFILE_MANUAL_RUN}"
|
||||
else
|
||||
PROFILE_RAW="${PROFILE_SCHEDULED_RUN}"
|
||||
fi
|
||||
# shellcheck disable=SC2001
|
||||
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
|
||||
PROFILE_VAL=$(echo "${PROFILE_RAW}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
|
||||
echo "profile=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Parse hardware name
|
||||
id: parse_hardware_name
|
||||
run: |
|
||||
if [[ ${IS_MANUAL_RUN} == true ]]; then
|
||||
PROFILE_RAW="${PROFILE_MANUAL_RUN}"
|
||||
else
|
||||
PROFILE_RAW="${PROFILE}"
|
||||
fi
|
||||
# shellcheck disable=SC2001
|
||||
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
|
||||
PROFILE_VAL=$(echo "${PROFILE_RAW}" | sed 's|.*[[:space:]](\(.*\))|\1|')
|
||||
echo "name=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
@@ -130,6 +158,13 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
working-directory: tfhe-rs/
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./tfhe-rs/.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Check fhEVM and TFHE-rs repos
|
||||
run: |
|
||||
pwd
|
||||
@@ -140,13 +175,6 @@ jobs:
|
||||
run: git lfs checkout
|
||||
working-directory: fhevm/
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./fhevm/.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
@@ -154,7 +182,7 @@ jobs:
|
||||
|
||||
- name: Install cargo dependencies
|
||||
run: |
|
||||
sudo apt-get install -y protobuf-compiler cmake pkg-config libssl-dev \
|
||||
sudo apt-get install -y protobuf-compiler pkg-config libssl-dev \
|
||||
libclang-dev docker-compose-v2 docker.io acl
|
||||
sudo usermod -aG docker "$USER"
|
||||
newgrp docker
|
||||
@@ -181,9 +209,16 @@ jobs:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Login to Chainguard Registry
|
||||
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
|
||||
with:
|
||||
registry: cgr.dev
|
||||
username: ${{ secrets.CGR_USERNAME }}
|
||||
password: ${{ secrets.CGR_PASSWORD }}
|
||||
|
||||
- name: Init database
|
||||
run: make init_db
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Use Node.js
|
||||
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||
@@ -203,8 +238,12 @@ jobs:
|
||||
|
||||
- name: Profile erc20 no-cmux benchmark on GPU
|
||||
run: |
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "profile_erc20_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" \
|
||||
FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" \
|
||||
BENCHMARK_TYPE="THROUGHPUT_200" \
|
||||
OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" \
|
||||
make -e "profile_erc20_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Get nsys profile name
|
||||
id: nsys_profile_name
|
||||
@@ -215,7 +254,7 @@ jobs:
|
||||
REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
|
||||
run: |
|
||||
mv report1.nsys-rep ${{ env.REPORT_NAME }}
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Upload profile artifact
|
||||
env:
|
||||
@@ -223,17 +262,17 @@ jobs:
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ env.REPORT_NAME }}
|
||||
path: fhevm/coprocessor/fhevm-engine/coprocessor/${{ env.REPORT_NAME }}
|
||||
path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}
|
||||
|
||||
- name: Run latency benchmark on GPU
|
||||
run: |
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Run throughput benchmarks on GPU
|
||||
run: |
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="THROUGHPUT_200" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
@@ -246,7 +285,7 @@ jobs:
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${BENCH_DATE}" \
|
||||
--walk-subdirs \
|
||||
--crate "coprocessor/fhevm-engine/coprocessor" \
|
||||
--crate "coprocessor/fhevm-engine/tfhe-worker" \
|
||||
--name-suffix "operation_batch_size_${BATCH_SIZE}-schedule_${SCHEDULING_POLICY}-optimization_target_${OPTIMIZATION_TARGET}"
|
||||
working-directory: fhevm/
|
||||
|
||||
|
||||
@@ -86,7 +86,7 @@ jobs:
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: multi-gpu-test
|
||||
profile: 4-l40
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
|
||||
@@ -43,7 +43,7 @@ jobs:
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: multi-gpu-test
|
||||
profile: 4-l40
|
||||
|
||||
cuda-tests:
|
||||
name: gpu_integer_long_run_tests/cuda-tests
|
||||
|
||||
@@ -1,33 +1,31 @@
|
||||
# Publish new release of tfhe-rs on various platform.
|
||||
name: make_release
|
||||
# Common workflow to make crate release
|
||||
name: make_release_common
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
workflow_call:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: "Dry-run"
|
||||
package-name:
|
||||
type: string
|
||||
required: true
|
||||
dry-run:
|
||||
type: boolean
|
||||
default: true
|
||||
push_to_crates:
|
||||
description: "Push to crate"
|
||||
type: boolean
|
||||
default: true
|
||||
push_web_package:
|
||||
description: "Push web js package"
|
||||
type: boolean
|
||||
default: true
|
||||
push_node_package:
|
||||
description: "Push node js package"
|
||||
type: boolean
|
||||
default: true
|
||||
npm_latest_tag:
|
||||
description: "Set NPM tag as latest"
|
||||
type: boolean
|
||||
default: false
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN:
|
||||
required: true
|
||||
SLACK_CHANNEL:
|
||||
required: true
|
||||
BOT_USERNAME:
|
||||
required: true
|
||||
SLACK_WEBHOOK:
|
||||
required: true
|
||||
ALLOWED_TEAM:
|
||||
required: true
|
||||
READ_ORG_TOKEN:
|
||||
required: true
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
NPM_TAG: ""
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
@@ -36,18 +34,18 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify-tag:
|
||||
name: make_release/verify-tag
|
||||
verify-triggering-actor:
|
||||
name: make_release_common/verify-triggering-actor
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
uses: ./.github/workflows/verify_triggering_actor.yml
|
||||
secrets:
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
ALLOWED_TEAM: ${{ secrets.ALLOWED_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
name: make_release/package
|
||||
name: make_release_common/package
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify-tag
|
||||
needs: verify-triggering-actor
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
@@ -58,20 +56,23 @@ jobs:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
env:
|
||||
PACKAGE: ${{ inputs.package-name }}
|
||||
run: |
|
||||
cargo package -p tfhe
|
||||
cargo package -p "${PACKAGE}"
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate
|
||||
name: crate-${{ inputs.package-name }}
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
|
||||
provenance:
|
||||
name: make_release/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
name: make_release_common/provenance
|
||||
if: ${{ !inputs.dry-run }}
|
||||
needs: package
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
@@ -84,14 +85,14 @@ jobs:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
|
||||
publish_release:
|
||||
name: make_release/publish_release
|
||||
needs: [package] # for comparing hashes
|
||||
name: make_release_common/publish-release
|
||||
needs: package
|
||||
runs-on: ubuntu-latest
|
||||
# For provenance of npmjs publish
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write # also needed for OIDC token exchange on crates.io
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
@@ -99,28 +100,27 @@ jobs:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Create NPM version tag
|
||||
if: ${{ inputs.npm_latest_tag }}
|
||||
run: |
|
||||
echo "NPM_TAG=latest" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
|
||||
with:
|
||||
name: crate
|
||||
name: crate-${{ inputs.package-name }}
|
||||
path: target/package
|
||||
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
|
||||
- name: Publish crate.io package
|
||||
if: ${{ inputs.push_to_crates }}
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
PACKAGE: ${{ inputs.package-name }}
|
||||
DRY_RUN: ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
run: |
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe ${DRY_RUN}
|
||||
cargo publish -p "${PACKAGE}" ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
@@ -132,45 +132,12 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_MESSAGE: "SLSA tfhe crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
- name: Build web package
|
||||
if: ${{ inputs.push_web_package }}
|
||||
run: |
|
||||
make build_web_js_api_parallel
|
||||
|
||||
- name: Publish web package
|
||||
if: ${{ inputs.push_web_package }}
|
||||
uses: JS-DevTools/npm-publish@19c28f1ef146469e409470805ea4279d47c3d35c
|
||||
with:
|
||||
token: ${{ secrets.NPM_TOKEN }}
|
||||
package: tfhe/pkg/package.json
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
tag: ${{ env.NPM_TAG }}
|
||||
provenance: true
|
||||
|
||||
- name: Build Node package
|
||||
if: ${{ inputs.push_node_package }}
|
||||
run: |
|
||||
rm -rf tfhe/pkg
|
||||
|
||||
make build_node_js_api
|
||||
sed -i 's/"tfhe"/"node-tfhe"/g' tfhe/pkg/package.json
|
||||
|
||||
- name: Publish Node package
|
||||
if: ${{ inputs.push_node_package }}
|
||||
uses: JS-DevTools/npm-publish@19c28f1ef146469e409470805ea4279d47c3d35c
|
||||
with:
|
||||
token: ${{ secrets.NPM_TOKEN }}
|
||||
package: tfhe/pkg/package.json
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
tag: ${{ env.NPM_TAG }}
|
||||
provenance: true
|
||||
SLACK_MESSAGE: "SLSA ${{ inputs.package-name }} - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe release failed: (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "${{ inputs.package-name }} release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
8
.github/workflows/make_release_cuda.yml
vendored
8
.github/workflows/make_release_cuda.yml
vendored
@@ -18,17 +18,17 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify-tag:
|
||||
name: make_release_cuda/verify-tag
|
||||
verify-triggering-actor:
|
||||
name: make_release_cuda/verify-triggering-actor
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
uses: ./.github/workflows/verify_triggering_actor.yml
|
||||
secrets:
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: make_release_cuda/setup-instance
|
||||
needs: verify-tag
|
||||
needs: verify-triggering-actor
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
|
||||
103
.github/workflows/make_release_hpu.yml
vendored
103
.github/workflows/make_release_hpu.yml
vendored
@@ -18,99 +18,16 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify-tag:
|
||||
name: make_release_hpu/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
make-release:
|
||||
name: make_release_hpu/make-release
|
||||
uses: ./.github/workflows/make_release_common.yml
|
||||
with:
|
||||
package-name: "tfhe-hpu-backend"
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
name: make_release_hpu/package
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify-tag
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-hpu-backend
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_hpu/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
# Needed to create the provenance via GitHub OIDC
|
||||
id-token: write
|
||||
# Needed to upload assets/artifacts
|
||||
contents: write
|
||||
with:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: make_release_hpu/publish-release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [verify-tag, package] # for comparing hashes
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-hpu-backend ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Slack notification (hashes comparison)
|
||||
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_MESSAGE: "SLSA tfhe-hpu-backend crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-hpu-backend release failed: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
113
.github/workflows/make_release_tfhe.yml
vendored
Normal file
113
.github/workflows/make_release_tfhe.yml
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
# Publish new release of tfhe-rs on various platform.
|
||||
name: make_release_tfhe
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: "Dry-run"
|
||||
type: boolean
|
||||
default: true
|
||||
push_to_crates:
|
||||
description: "Push to crate"
|
||||
type: boolean
|
||||
default: true
|
||||
push_web_package:
|
||||
description: "Push web js package"
|
||||
type: boolean
|
||||
default: true
|
||||
push_node_package:
|
||||
description: "Push node js package"
|
||||
type: boolean
|
||||
default: true
|
||||
npm_latest_tag:
|
||||
description: "Set NPM tag as latest"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
NPM_TAG: ""
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
make-release:
|
||||
name: make_release_tfhe/make-release
|
||||
uses: ./.github/workflows/make_release_common.yml
|
||||
with:
|
||||
package-name: "tfhe"
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
make-release-js:
|
||||
name: make_release_tfhe/make-release-js
|
||||
needs: make-release
|
||||
runs-on: ubuntu-latest
|
||||
# For provenance of npmjs publish
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write # also needed for OIDC token exchange on crates.io
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Create NPM version tag
|
||||
if: ${{ inputs.npm_latest_tag }}
|
||||
run: |
|
||||
echo "NPM_TAG=latest" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Build web package
|
||||
if: ${{ inputs.push_web_package }}
|
||||
run: |
|
||||
make build_web_js_api_parallel
|
||||
|
||||
- name: Publish web package
|
||||
if: ${{ inputs.push_web_package }}
|
||||
uses: JS-DevTools/npm-publish@19c28f1ef146469e409470805ea4279d47c3d35c
|
||||
with:
|
||||
token: ${{ secrets.NPM_TOKEN }}
|
||||
package: tfhe/pkg/package.json
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
tag: ${{ env.NPM_TAG }}
|
||||
provenance: true
|
||||
|
||||
- name: Build Node package
|
||||
if: ${{ inputs.push_node_package }}
|
||||
run: |
|
||||
rm -rf tfhe/pkg
|
||||
|
||||
make build_node_js_api
|
||||
sed -i 's/"tfhe"/"node-tfhe"/g' tfhe/pkg/package.json
|
||||
|
||||
- name: Publish Node package
|
||||
if: ${{ inputs.push_node_package }}
|
||||
uses: JS-DevTools/npm-publish@19c28f1ef146469e409470805ea4279d47c3d35c
|
||||
with:
|
||||
token: ${{ secrets.NPM_TOKEN }}
|
||||
package: tfhe/pkg/package.json
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
tag: ${{ env.NPM_TAG }}
|
||||
provenance: true
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
111
.github/workflows/make_release_tfhe_csprng.yml
vendored
111
.github/workflows/make_release_tfhe_csprng.yml
vendored
@@ -8,110 +8,19 @@ on:
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify-tag:
|
||||
name: make_release_tfhe_csprng/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
make-release:
|
||||
name: make_release_tfhe_csprng/make-release
|
||||
uses: ./.github/workflows/make_release_common.yml
|
||||
with:
|
||||
package-name: "tfhe-csprng"
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
name: make_release_tfhe_csprng/package
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-csprng
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate-tfhe-csprng
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
|
||||
provenance:
|
||||
name: make_release_tfhe_csprng/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
# Needed to create the provenance via GitHub OIDC
|
||||
id-token: write
|
||||
# Needed to upload assets/artifacts
|
||||
contents: write
|
||||
with:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
|
||||
publish_release:
|
||||
name: make_release_tfhe_csprng/publish-release
|
||||
needs: [verify-tag, package]
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
|
||||
with:
|
||||
name: crate-tfhe-csprng
|
||||
path: target/package
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-csprng ${DRY_RUN}
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
- name: Slack notification (hashes comparison)
|
||||
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_MESSAGE: "SLSA tfhe-csprng - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-csprng release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
103
.github/workflows/make_release_tfhe_fft.yml
vendored
103
.github/workflows/make_release_tfhe_fft.yml
vendored
@@ -19,99 +19,16 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify-tag:
|
||||
name: make_release_tfhe_fft/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
make-release:
|
||||
name: make_release_tfhe_fft/make-release
|
||||
uses: ./.github/workflows/make_release_common.yml
|
||||
with:
|
||||
package-name: "tfhe-fft"
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
name: make_release_tfhe_fft/package
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify-tag
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-fft
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_tfhe_fft/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
# Needed to create the provenance via GitHub OIDC
|
||||
id-token: write
|
||||
# Needed to upload assets/artifacts
|
||||
contents: write
|
||||
with:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: make_release_tfhe_fft/publish-release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [verify-tag, package] # for comparing hashes
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-fft ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Slack notification (hashes comparison)
|
||||
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_MESSAGE: "SLSA tfhe-fft crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-fft release failed: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
103
.github/workflows/make_release_tfhe_ntt.yml
vendored
103
.github/workflows/make_release_tfhe_ntt.yml
vendored
@@ -19,99 +19,16 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify-tag:
|
||||
name: make_release_tfhe_ntt/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
make-release:
|
||||
name: make_release_tfhe_ntt/make-release
|
||||
uses: ./.github/workflows/make_release_common.yml
|
||||
with:
|
||||
package-name: "tfhe-ntt"
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
name: make_release_tfhe_ntt/package
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify-tag
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-ntt
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_tfhe_ntt/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
# Needed to create the provenance via GitHub OIDC
|
||||
id-token: write
|
||||
# Needed to upload assets/artifacts
|
||||
contents: write
|
||||
with:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: make_release_tfhe_ntt/publish-release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [verify-tag, package] # for comparing hashes
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-ntt ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Slack notification (hashes comparison)
|
||||
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_MESSAGE: "SLSA tfhe-ntt crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-ntt release failed: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
194
.github/workflows/make_release_tfhe_versionable.yml
vendored
194
.github/workflows/make_release_tfhe_versionable.yml
vendored
@@ -2,6 +2,11 @@ name: make_release_tfhe_versionable
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: "Dry-run"
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
@@ -13,174 +18,31 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify-tag:
|
||||
name: make_release_tfhe_versionable/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
make-release-derive:
|
||||
name: make_release_tfhe_versionable/make-release-derive
|
||||
uses: ./.github/workflows/make_release_common.yml
|
||||
with:
|
||||
package-name: "tfhe-versionable-derive"
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package-derive:
|
||||
name: make_release_tfhe_versionable/package-derive
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-versionable-derive
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate-tfhe-versionable-derive
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance-derive:
|
||||
name: make_release_tfhe_versionable/provenance-derive
|
||||
needs: [package-derive]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
# Needed to create the provenance via GitHub OIDC
|
||||
id-token: write
|
||||
# Needed to upload assets/artifacts
|
||||
contents: write
|
||||
make-release:
|
||||
name: make_release_tfhe_versionable/make-release
|
||||
needs: make-release-derive
|
||||
uses: ./.github/workflows/make_release_common.yml
|
||||
with:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package-derive.outputs.hash }}
|
||||
|
||||
publish_release-derive:
|
||||
name: make_release_tfhe_versionable/publish_release_derive
|
||||
needs: [ verify-tag, package-derive ] # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
|
||||
with:
|
||||
name: crate-tfhe-versionable-derive
|
||||
path: target/package
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
run: |
|
||||
cargo publish -p tfhe-versionable-derive
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
- name: Slack notification (hashes comparison)
|
||||
if: ${{ needs.package-derive.outputs.hash != steps.published_hash.outputs.pub_hash }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_MESSAGE: "SLSA tfhe-versionable-derive - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-versionable-derive release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
package:
|
||||
name: make_release_tfhe_versionable/package
|
||||
needs: publish_release-derive
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-versionable
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate-tfhe-versionable
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_tfhe_versionable/provenance
|
||||
needs: package
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
# Needed to create the provenance via GitHub OIDC
|
||||
id-token: write
|
||||
# Needed to upload assets/artifacts
|
||||
contents: write
|
||||
with:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: make_release_tfhe_versionable/publish-release
|
||||
needs: package # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
|
||||
with:
|
||||
name: crate-tfhe-versionable
|
||||
path: target/package
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
run: |
|
||||
cargo publish -p tfhe-versionable
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
- name: Slack notification (hashes comparison)
|
||||
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_MESSAGE: "SLSA tfhe-versionable - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-versionable release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
package-name: "tfhe-versionable"
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
103
.github/workflows/make_release_zk_pok.yml
vendored
103
.github/workflows/make_release_zk_pok.yml
vendored
@@ -18,99 +18,16 @@ env:
|
||||
permissions: { }
|
||||
|
||||
jobs:
|
||||
verify-tag:
|
||||
name: make_release_zk_pok/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
make-release:
|
||||
name: make_release_zk_pok/make-release
|
||||
uses: ./.github/workflows/make_release_common.yml
|
||||
with:
|
||||
package-name: "tfhe-zk-pok"
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
name: make_release_zk_pok/package
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify-tag
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-zk-pok
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate-zk-pok
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_zk_pok/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [ package ]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
# Needed to create the provenance via GitHub OIDC
|
||||
id-token: write
|
||||
# Needed to upload assets/artifacts
|
||||
contents: write
|
||||
with:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: make_release_zk_pok/publish-release
|
||||
needs: [ verify-tag, package ] # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
|
||||
with:
|
||||
name: crate-zk-pok
|
||||
path: target/package
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-zk-pok ${DRY_RUN}
|
||||
- name: Verify hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
- name: Slack notification (hashes comparison)
|
||||
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_MESSAGE: "SLSA tfhe-zk-pok crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-zk-pok release failed: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Verify a commit actor
|
||||
name: verify_commit_actor
|
||||
# Verify a triggering actor
|
||||
name: verify_triggering_actor
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -13,7 +13,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
check-actor:
|
||||
name: verify_commit_actor/check-actor
|
||||
name: verify_triggering_actor/check-actor
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# Check triggering actor membership
|
||||
5
Makefile
5
Makefile
@@ -1004,6 +1004,11 @@ test_list_gpu: install_rs_build_toolchain install_cargo_nextest
|
||||
--features=integer,internal-keycache,gpu,zk-pok -p tfhe \
|
||||
-E "test(/.*gpu.*/)"
|
||||
|
||||
.PHONY: build_one_hl_api_test_gpu
|
||||
build_one_hl_api_test_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
|
||||
--features=integer,gpu-debug -vv -p tfhe -- "$${TEST}" --test-threads=1 --nocapture
|
||||
|
||||
test_high_level_api_hpu: install_rs_build_toolchain install_cargo_nextest
|
||||
ifeq ($(HPU_CONFIG), v80)
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
|
||||
@@ -86,6 +86,7 @@ if(CMAKE_BUILD_TYPE_LOWERCASE STREQUAL "debug")
|
||||
message("Compiling in Debug mode")
|
||||
add_definitions(-DDEBUG)
|
||||
set(OPTIMIZATION_FLAGS "${OPTIMIZATION_FLAGS} -O0 -G -g")
|
||||
set(USE_NVTOOLS 1)
|
||||
else()
|
||||
# Release mode
|
||||
message("Compiling in Release mode")
|
||||
|
||||
@@ -56,7 +56,6 @@ typedef struct {
|
||||
uint32_t num_radix_blocks;
|
||||
uint32_t max_num_radix_blocks;
|
||||
uint32_t lwe_dimension;
|
||||
uint32_t num_radix_ciphertexts;
|
||||
} CudaRadixCiphertextFFI;
|
||||
|
||||
typedef struct {
|
||||
@@ -334,8 +333,8 @@ uint64_t scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, uint32_t uses_carry,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
uint64_t scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
@@ -343,8 +342,8 @@ uint64_t scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, uint32_t uses_carry,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_propagate_single_carry_kb_64_inplace(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array,
|
||||
|
||||
@@ -244,8 +244,6 @@ struct int_radix_params {
|
||||
uint32_t carry_modulus;
|
||||
PBS_MS_REDUCTION_T noise_reduction_type;
|
||||
|
||||
int_radix_params(){};
|
||||
|
||||
int_radix_params(PBS_TYPE pbs_type, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
@@ -262,6 +260,8 @@ struct int_radix_params {
|
||||
message_modulus(message_modulus), carry_modulus(carry_modulus),
|
||||
noise_reduction_type(noise_reduction_type){};
|
||||
|
||||
int_radix_params() = default;
|
||||
|
||||
void print() {
|
||||
printf("pbs_type: %u, glwe_dimension: %u, "
|
||||
"polynomial_size: %u, "
|
||||
@@ -756,18 +756,20 @@ template <typename Torus> struct int_radix_lut {
|
||||
CudaStreams streams, uint64_t max_num_radix_blocks,
|
||||
uint64_t &size_tracker, bool allocate_gpu_memory) {
|
||||
// We need to create the auxiliary array only in GPU 0
|
||||
lwe_aligned_vec.resize(active_streams.count());
|
||||
for (uint i = 0; i < active_streams.count(); i++) {
|
||||
uint64_t size_tracker_on_array_i = 0;
|
||||
auto inputs_on_gpu = std::max(
|
||||
THRESHOLD_MULTI_GPU, get_num_inputs_on_gpu(max_num_radix_blocks, i,
|
||||
active_streams.count()));
|
||||
Torus *d_array = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
inputs_on_gpu * (params.big_lwe_dimension + 1) * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker_on_array_i,
|
||||
allocate_gpu_memory);
|
||||
lwe_aligned_vec[i] = d_array;
|
||||
size_tracker += size_tracker_on_array_i;
|
||||
if (active_streams.count() > 1) {
|
||||
lwe_aligned_vec.resize(active_streams.count());
|
||||
for (uint i = 0; i < active_streams.count(); i++) {
|
||||
uint64_t size_tracker_on_array_i = 0;
|
||||
auto inputs_on_gpu = std::max(
|
||||
THRESHOLD_MULTI_GPU, get_num_inputs_on_gpu(max_num_radix_blocks, i,
|
||||
active_streams.count()));
|
||||
Torus *d_array = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
inputs_on_gpu * (params.big_lwe_dimension + 1) * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker_on_array_i,
|
||||
allocate_gpu_memory);
|
||||
lwe_aligned_vec[i] = d_array;
|
||||
size_tracker += size_tracker_on_array_i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1632,8 +1634,19 @@ template <typename Torus> struct int_sum_ciphertexts_vec_memory {
|
||||
luts_message_carry = new int_radix_lut<Torus>(
|
||||
streams, params, 2, pbs_count, true, size_tracker);
|
||||
allocated_luts_message_carry = true;
|
||||
uint64_t message_modulus_bits =
|
||||
(uint64_t)std::log2(params.message_modulus);
|
||||
uint64_t carry_modulus_bits = (uint64_t)std::log2(params.carry_modulus);
|
||||
uint64_t total_bits_per_block =
|
||||
message_modulus_bits + carry_modulus_bits;
|
||||
uint64_t denominator =
|
||||
(uint64_t)std::ceil((pow(2, total_bits_per_block) - 1) /
|
||||
(pow(2, message_modulus_bits) - 1));
|
||||
|
||||
uint64_t upper_bound_num_blocks =
|
||||
max_total_blocks_in_vec * 2 / denominator;
|
||||
luts_message_carry->allocate_lwe_vector_for_non_trivial_indexes(
|
||||
streams, this->max_total_blocks_in_vec, size_tracker, true);
|
||||
streams, upper_bound_num_blocks, size_tracker, true);
|
||||
}
|
||||
}
|
||||
if (allocated_luts_message_carry) {
|
||||
@@ -1731,9 +1744,17 @@ template <typename Torus> struct int_sum_ciphertexts_vec_memory {
|
||||
this->current_blocks = current_blocks;
|
||||
this->small_lwe_vector = small_lwe_vector;
|
||||
this->luts_message_carry = reused_lut;
|
||||
|
||||
uint64_t message_modulus_bits = (uint64_t)std::log2(params.message_modulus);
|
||||
uint64_t carry_modulus_bits = (uint64_t)std::log2(params.carry_modulus);
|
||||
uint64_t total_bits_per_block = message_modulus_bits + carry_modulus_bits;
|
||||
uint64_t denominator =
|
||||
(uint64_t)std::ceil((pow(2, total_bits_per_block) - 1) /
|
||||
(pow(2, message_modulus_bits) - 1));
|
||||
|
||||
uint64_t upper_bound_num_blocks = max_total_blocks_in_vec * 2 / denominator;
|
||||
this->luts_message_carry->allocate_lwe_vector_for_non_trivial_indexes(
|
||||
streams, this->max_total_blocks_in_vec, size_tracker,
|
||||
allocate_gpu_memory);
|
||||
streams, upper_bound_num_blocks, size_tracker, allocate_gpu_memory);
|
||||
setup_index_buffers(streams, size_tracker);
|
||||
}
|
||||
|
||||
@@ -2404,8 +2425,7 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
|
||||
int_sc_prop_memory(CudaStreams streams, int_radix_params params,
|
||||
uint32_t num_radix_blocks, uint32_t requested_flag_in,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory,
|
||||
uint64_t &size_tracker) {
|
||||
bool allocate_gpu_memory, uint64_t &size_tracker) {
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->params = params;
|
||||
auto glwe_dimension = params.glwe_dimension;
|
||||
@@ -3127,11 +3147,10 @@ template <typename Torus> struct int_mul_memory {
|
||||
streams, params, num_radix_blocks, 2 * num_radix_blocks,
|
||||
vector_result_sb, small_lwe_vector, luts_array, true,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
uint32_t uses_carry = 0;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
sc_prop_mem = new int_sc_prop_memory<Torus>(
|
||||
streams, params, num_radix_blocks, requested_flag, uses_carry,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
streams, params, num_radix_blocks, requested_flag, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
@@ -3731,36 +3750,13 @@ template <typename Torus> struct int_comparison_eq_buffer {
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->params = params;
|
||||
this->op = op;
|
||||
Torus total_modulus = params.message_modulus * params.carry_modulus;
|
||||
|
||||
are_all_block_true_buffer = new int_are_all_block_true_buffer<Torus>(
|
||||
streams, op, params, num_radix_blocks, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
|
||||
// Operator LUT
|
||||
auto operator_f = [op](Torus lhs, Torus rhs) -> Torus {
|
||||
if (op == COMPARISON_TYPE::EQ) {
|
||||
// EQ
|
||||
return (lhs == rhs);
|
||||
} else {
|
||||
// NE
|
||||
return (lhs != rhs);
|
||||
}
|
||||
};
|
||||
operator_lut =
|
||||
new int_radix_lut<Torus>(streams, params, 1, num_radix_blocks,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), operator_lut->get_lut(0, 0),
|
||||
operator_lut->get_degree(0), operator_lut->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, operator_f, gpu_memory_allocated);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
operator_lut->broadcast_lut(active_streams);
|
||||
|
||||
// f(x) -> x == 0
|
||||
Torus total_modulus = params.message_modulus * params.carry_modulus;
|
||||
auto is_non_zero_lut_f = [total_modulus](Torus x) -> Torus {
|
||||
return (x % total_modulus) != 0;
|
||||
};
|
||||
@@ -3775,38 +3771,74 @@ template <typename Torus> struct int_comparison_eq_buffer {
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, is_non_zero_lut_f, gpu_memory_allocated);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
is_non_zero_lut->broadcast_lut(active_streams);
|
||||
|
||||
// Scalar may have up to num_radix_blocks blocks
|
||||
scalar_comparison_luts = new int_radix_lut<Torus>(
|
||||
streams, params, total_modulus, num_radix_blocks, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
|
||||
for (int i = 0; i < total_modulus; i++) {
|
||||
auto lut_f = [i, operator_f](Torus x) -> Torus {
|
||||
return operator_f(i, x);
|
||||
if (op == COMPARISON_TYPE::EQ || COMPARISON_TYPE::NE) {
|
||||
// Operator LUT
|
||||
auto operator_f = [op](Torus lhs, Torus rhs) -> Torus {
|
||||
if (op == COMPARISON_TYPE::EQ) {
|
||||
return (lhs == rhs);
|
||||
} else if (op == COMPARISON_TYPE::NE) {
|
||||
return (lhs != rhs);
|
||||
PANIC("Cuda error (eq/ne): invalid comparison type")
|
||||
}
|
||||
};
|
||||
// Scalar may have up to num_radix_blocks blocks
|
||||
scalar_comparison_luts = new int_radix_lut<Torus>(
|
||||
streams, params, total_modulus, num_radix_blocks, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
scalar_comparison_luts->get_lut(0, i),
|
||||
scalar_comparison_luts->get_degree(i),
|
||||
scalar_comparison_luts->get_max_degree(i), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
lut_f, gpu_memory_allocated);
|
||||
for (int i = 0; i < total_modulus; i++) {
|
||||
auto lut_f = [i, operator_f](Torus x) -> Torus {
|
||||
return operator_f(i, x);
|
||||
};
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
scalar_comparison_luts->get_lut(0, i),
|
||||
scalar_comparison_luts->get_degree(i),
|
||||
scalar_comparison_luts->get_max_degree(i), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, lut_f, gpu_memory_allocated);
|
||||
}
|
||||
scalar_comparison_luts->broadcast_lut(active_streams);
|
||||
operator_lut =
|
||||
new int_radix_lut<Torus>(streams, params, 1, num_radix_blocks,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), operator_lut->get_lut(0, 0),
|
||||
operator_lut->get_degree(0), operator_lut->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, operator_f, gpu_memory_allocated);
|
||||
|
||||
operator_lut->broadcast_lut(active_streams);
|
||||
} else {
|
||||
scalar_comparison_luts = nullptr;
|
||||
operator_lut = nullptr;
|
||||
}
|
||||
scalar_comparison_luts->broadcast_lut(active_streams);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
operator_lut->release(streams);
|
||||
delete operator_lut;
|
||||
if (op == COMPARISON_TYPE::EQ || COMPARISON_TYPE::NE) {
|
||||
PANIC_IF_FALSE(operator_lut != nullptr,
|
||||
"Cuda error: no operator lut was created");
|
||||
operator_lut->release(streams);
|
||||
delete operator_lut;
|
||||
operator_lut = nullptr;
|
||||
PANIC_IF_FALSE(scalar_comparison_luts != nullptr,
|
||||
"Cuda error: no scalar comparison luts were created");
|
||||
scalar_comparison_luts->release(streams);
|
||||
delete scalar_comparison_luts;
|
||||
scalar_comparison_luts = nullptr;
|
||||
}
|
||||
is_non_zero_lut->release(streams);
|
||||
delete is_non_zero_lut;
|
||||
scalar_comparison_luts->release(streams);
|
||||
delete scalar_comparison_luts;
|
||||
is_non_zero_lut = nullptr;
|
||||
are_all_block_true_buffer->release(streams);
|
||||
delete are_all_block_true_buffer;
|
||||
are_all_block_true_buffer = nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -3926,8 +3958,7 @@ template <typename Torus> struct int_comparison_diff_buffer {
|
||||
case LE:
|
||||
return (x == IS_INFERIOR) || (x == IS_EQUAL);
|
||||
default:
|
||||
// We don't need a default case but we need to return something
|
||||
return 42;
|
||||
PANIC("Cuda error (comparisons): unknown comparison type")
|
||||
}
|
||||
};
|
||||
|
||||
@@ -4922,11 +4953,10 @@ template <typename Torus> struct int_scalar_mul_buffer {
|
||||
streams, params, num_radix_blocks, num_ciphertext_bits, true,
|
||||
allocate_gpu_memory, last_step_mem);
|
||||
}
|
||||
uint32_t uses_carry = 0;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
sc_prop_mem = new int_sc_prop_memory<Torus>(
|
||||
streams, params, num_radix_blocks, requested_flag, uses_carry,
|
||||
allocate_gpu_memory, last_step_mem);
|
||||
streams, params, num_radix_blocks, requested_flag, allocate_gpu_memory,
|
||||
last_step_mem);
|
||||
if (anticipated_buffer_drop) {
|
||||
size_tracker += std::max(anticipated_drop_mem, last_step_mem);
|
||||
} else {
|
||||
@@ -4982,10 +5012,9 @@ template <typename Torus> struct int_abs_buffer {
|
||||
streams, SHIFT_OR_ROTATE_TYPE::RIGHT_SHIFT, params, num_radix_blocks,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
scp_mem = new int_sc_prop_memory<Torus>(streams, params, num_radix_blocks,
|
||||
requested_flag, uses_carry,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
requested_flag, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
bitxor_mem = new int_bitop_buffer<Torus>(streams, BITOP_TYPE::BITXOR,
|
||||
params, num_radix_blocks,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
@@ -5061,13 +5090,12 @@ template <typename Torus> struct int_div_rem_memory {
|
||||
abs_mem_2 = new int_abs_buffer<Torus>(streams, params, num_blocks,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
scp_mem_1 = new int_sc_prop_memory<Torus>(
|
||||
streams, params, num_blocks, requested_flag, uses_carry,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
streams, params, num_blocks, requested_flag, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
scp_mem_2 = new int_sc_prop_memory<Torus>(
|
||||
streams, params, num_blocks, requested_flag, uses_carry,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
streams, params, num_blocks, requested_flag, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
|
||||
std::function<uint64_t(uint64_t)> quotient_predicate_lut_f =
|
||||
[](uint64_t x) -> uint64_t { return x == 1; };
|
||||
@@ -5251,7 +5279,7 @@ template <typename Torus> struct int_sub_and_propagate {
|
||||
this->allocate_gpu_memory = allocate_gpu_memory;
|
||||
|
||||
this->sc_prop_mem = new int_sc_prop_memory<Torus>(
|
||||
streams, params, num_radix_blocks, requested_flag_in, (uint32_t)0,
|
||||
streams, params, num_radix_blocks, requested_flag_in,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
this->neg_rhs_array = new CudaRadixCiphertextFFI;
|
||||
@@ -5391,8 +5419,8 @@ template <typename Torus> struct int_unsigned_scalar_div_mem {
|
||||
streams, params, num_radix_blocks, scalar_divisor_ffi->active_bits,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
scp_mem = new int_sc_prop_memory<Torus>(
|
||||
streams, params, num_radix_blocks, FLAG_NONE, (uint32_t)0,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
streams, params, num_radix_blocks, FLAG_NONE, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
sub_and_propagate_mem = new int_sub_and_propagate<Torus>(
|
||||
streams, params, num_radix_blocks, FLAG_NONE, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
@@ -5545,8 +5573,8 @@ template <typename Torus> struct int_signed_scalar_div_mem {
|
||||
streams, RIGHT_SHIFT, params, num_radix_blocks,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
scp_mem = new int_sc_prop_memory<Torus>(
|
||||
streams, params, num_radix_blocks, FLAG_NONE, (uint32_t)0,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
streams, params, num_radix_blocks, FLAG_NONE, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
|
||||
} else {
|
||||
|
||||
@@ -5567,7 +5595,7 @@ template <typename Torus> struct int_signed_scalar_div_mem {
|
||||
|
||||
if (scalar_divisor_ffi->is_chosen_multiplier_geq_two_pow_numerator) {
|
||||
scp_mem = new int_sc_prop_memory<Torus>(
|
||||
streams, params, num_radix_blocks, FLAG_NONE, (uint32_t)0,
|
||||
streams, params, num_radix_blocks, FLAG_NONE,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
}
|
||||
}
|
||||
@@ -5711,8 +5739,8 @@ template <typename Torus> struct int_signed_scalar_div_rem_buffer {
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
this->scp_mem = new int_sc_prop_memory<Torus>(
|
||||
streams, params, num_radix_blocks, FLAG_NONE, (uint32_t)0,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
streams, params, num_radix_blocks, FLAG_NONE, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
|
||||
bool is_divisor_one = scalar_divisor_ffi->is_abs_divisor_one &&
|
||||
!scalar_divisor_ffi->is_divisor_negative;
|
||||
@@ -5906,9 +5934,9 @@ template <typename Torus> struct int_count_of_consecutive_bits_buffer {
|
||||
streams, params, counter_num_blocks, num_radix_blocks, true,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
this->propagate_mem =
|
||||
new int_sc_prop_memory<Torus>(streams, params, counter_num_blocks, 0, 0,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
this->propagate_mem = new int_sc_prop_memory<Torus>(
|
||||
streams, params, counter_num_blocks, FLAG_NONE, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
#include <mutex>
|
||||
#ifdef USE_NVTOOLS
|
||||
#include <cuda_profiler_api.h>
|
||||
#endif
|
||||
|
||||
uint32_t cuda_get_device() {
|
||||
int device;
|
||||
@@ -83,6 +86,9 @@ void cuda_set_device(uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
// Mempools are initialized only once in all the GPUS available
|
||||
cuda_setup_mempool(gpu_index);
|
||||
#ifdef USE_NVTOOLS
|
||||
check_cuda_error(cudaProfilerStart());
|
||||
#endif
|
||||
}
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index) {
|
||||
|
||||
@@ -24,43 +24,33 @@ __host__ void host_integer_radix_bitop_kb(
|
||||
lwe_array_out->num_radix_blocks == lwe_array_2->num_radix_blocks,
|
||||
"Cuda error: input and output num radix blocks must be equal");
|
||||
|
||||
PANIC_IF_FALSE(
|
||||
lwe_array_out->num_radix_ciphertexts ==
|
||||
lwe_array_1->num_radix_ciphertexts &&
|
||||
lwe_array_out->num_radix_ciphertexts ==
|
||||
lwe_array_2->num_radix_ciphertexts,
|
||||
"Cuda error: input and output num radix ciphertexts must be equal");
|
||||
|
||||
PANIC_IF_FALSE(lwe_array_out->lwe_dimension == lwe_array_1->lwe_dimension &&
|
||||
lwe_array_out->lwe_dimension == lwe_array_2->lwe_dimension,
|
||||
"Cuda error: input and output lwe dimension must be equal");
|
||||
|
||||
auto lut = mem_ptr->lut;
|
||||
uint64_t degrees[lwe_array_1->num_radix_blocks *
|
||||
lwe_array_1->num_radix_ciphertexts];
|
||||
uint64_t degrees[lwe_array_1->num_radix_blocks];
|
||||
if (mem_ptr->op == BITOP_TYPE::BITAND) {
|
||||
update_degrees_after_bitand(
|
||||
degrees, lwe_array_1->degrees, lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks * lwe_array_1->num_radix_ciphertexts);
|
||||
update_degrees_after_bitand(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
} else if (mem_ptr->op == BITOP_TYPE::BITOR) {
|
||||
update_degrees_after_bitor(
|
||||
degrees, lwe_array_1->degrees, lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks * lwe_array_1->num_radix_ciphertexts);
|
||||
update_degrees_after_bitor(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
} else if (mem_ptr->op == BITOP_TYPE::BITXOR) {
|
||||
update_degrees_after_bitxor(
|
||||
degrees, lwe_array_1->degrees, lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks * lwe_array_1->num_radix_ciphertexts);
|
||||
update_degrees_after_bitxor(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
}
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, lwe_array_out, lwe_array_1, lwe_array_2, bsks, ksks,
|
||||
ms_noise_reduction_key, lut,
|
||||
lwe_array_out->num_radix_blocks * lwe_array_out->num_radix_ciphertexts,
|
||||
ms_noise_reduction_key, lut, lwe_array_out->num_radix_blocks,
|
||||
lut->params.message_modulus);
|
||||
|
||||
memcpy(lwe_array_out->degrees, degrees,
|
||||
lwe_array_out->num_radix_blocks *
|
||||
lwe_array_out->num_radix_ciphertexts * sizeof(uint64_t));
|
||||
lwe_array_out->num_radix_blocks * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -430,7 +430,6 @@ __host__ void tree_sign_reduction(
|
||||
"than the number of blocks to operate on")
|
||||
|
||||
auto params = tree_buffer->params;
|
||||
auto big_lwe_dimension = params.big_lwe_dimension;
|
||||
auto glwe_dimension = params.glwe_dimension;
|
||||
auto polynomial_size = params.polynomial_size;
|
||||
auto message_modulus = params.message_modulus;
|
||||
|
||||
@@ -51,8 +51,8 @@ uint64_t scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, uint32_t uses_carry,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
@@ -60,7 +60,7 @@ uint64_t scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
|
||||
return scratch_cuda_propagate_single_carry_kb_inplace<uint64_t>(
|
||||
CudaStreams(streams), (int_sc_prop_memory<uint64_t> **)mem_ptr,
|
||||
num_blocks, params, requested_flag, uses_carry, allocate_gpu_memory);
|
||||
num_blocks, params, requested_flag, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
@@ -69,8 +69,8 @@ uint64_t scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, uint32_t uses_carry,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
PBS_TYPE pbs_type, uint32_t requested_flag, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
@@ -78,7 +78,7 @@ uint64_t scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
|
||||
return scratch_cuda_propagate_single_carry_kb_inplace<uint64_t>(
|
||||
CudaStreams(streams), (int_sc_prop_memory<uint64_t> **)mem_ptr,
|
||||
num_blocks, params, requested_flag, uses_carry, allocate_gpu_memory);
|
||||
num_blocks, params, requested_flag, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
|
||||
@@ -242,8 +242,8 @@ __host__ void host_radix_cumulative_sum_in_groups(cudaStream_t stream,
|
||||
auto lwe_size = dest->lwe_dimension + 1;
|
||||
cuda_set_device(gpu_index);
|
||||
// Each CUDA block is responsible for a single group
|
||||
int num_blocks = (num_radix_blocks + group_size - 1) / group_size,
|
||||
num_threads = 512;
|
||||
int num_blocks = CEIL_DIV(num_radix_blocks, group_size);
|
||||
int num_threads = 512;
|
||||
device_radix_cumulative_sum_in_groups<Torus>
|
||||
<<<num_blocks, num_threads, 0, stream>>>(
|
||||
(Torus *)dest->ptr, (Torus *)src->ptr, num_radix_blocks, lwe_size,
|
||||
@@ -417,12 +417,9 @@ __host__ void host_pack_bivariate_blocks(
|
||||
lwe_array_out->lwe_dimension != lwe_array_2->lwe_dimension)
|
||||
PANIC("Cuda error: input and output radix ciphertexts should have the same "
|
||||
"lwe dimension")
|
||||
if (num_radix_blocks > lwe_array_out->num_radix_blocks *
|
||||
lwe_array_out->num_radix_ciphertexts ||
|
||||
num_radix_blocks >
|
||||
lwe_array_1->num_radix_blocks * lwe_array_1->num_radix_ciphertexts ||
|
||||
num_radix_blocks >
|
||||
lwe_array_2->num_radix_blocks * lwe_array_2->num_radix_ciphertexts)
|
||||
if (num_radix_blocks > lwe_array_out->num_radix_blocks ||
|
||||
num_radix_blocks > lwe_array_1->num_radix_blocks ||
|
||||
num_radix_blocks > lwe_array_2->num_radix_blocks)
|
||||
PANIC("Cuda error: num radix blocks on which packing is applied should be "
|
||||
"smaller or equal to the number of input & output radix blocks")
|
||||
|
||||
@@ -533,8 +530,7 @@ __host__ void integer_radix_apply_univariate_lookup_table_kb(
|
||||
if (num_radix_blocks > lut->num_blocks)
|
||||
PANIC("Cuda error: num radix blocks on which lut is applied should be "
|
||||
"smaller or equal to the number of lut radix blocks")
|
||||
if (num_radix_blocks >
|
||||
lwe_array_out->num_radix_blocks * lwe_array_out->num_radix_ciphertexts)
|
||||
if (num_radix_blocks > lwe_array_out->num_radix_blocks)
|
||||
PANIC("Cuda error: num radix blocks on which lut is applied should be "
|
||||
"smaller or equal to the number of input & output radix blocks")
|
||||
|
||||
@@ -760,14 +756,11 @@ __host__ void integer_radix_apply_bivariate_lookup_table_kb(
|
||||
if (num_radix_blocks > lut->num_blocks)
|
||||
PANIC("Cuda error: num radix blocks on which lut is applied should be "
|
||||
"smaller or equal to the number of lut radix blocks")
|
||||
if (num_radix_blocks > lwe_array_out->num_radix_blocks *
|
||||
lwe_array_out->num_radix_ciphertexts ||
|
||||
num_radix_blocks >
|
||||
lwe_array_1->num_radix_blocks * lwe_array_1->num_radix_ciphertexts ||
|
||||
num_radix_blocks >
|
||||
lwe_array_2->num_radix_blocks * lwe_array_2->num_radix_ciphertexts)
|
||||
if (num_radix_blocks > lwe_array_out->num_radix_blocks ||
|
||||
num_radix_blocks > lwe_array_1->num_radix_blocks ||
|
||||
num_radix_blocks > lwe_array_2->num_radix_blocks)
|
||||
PANIC("Cuda error: num radix blocks on which lut is applied should be "
|
||||
"smaller or equal to the number of total input & output radix blocks")
|
||||
"smaller or equal to the number of input & output radix blocks")
|
||||
|
||||
auto params = lut->params;
|
||||
auto pbs_type = params.pbs_type;
|
||||
@@ -1573,9 +1566,6 @@ void host_full_propagate_inplace(
|
||||
void *const *bsks, uint32_t num_blocks) {
|
||||
auto params = mem_ptr->lut->params;
|
||||
|
||||
int big_lwe_size = (params.glwe_dimension * params.polynomial_size + 1);
|
||||
int small_lwe_size = (params.small_lwe_dimension + 1);
|
||||
|
||||
// In the case of extracting a single LWE this parameters are dummy
|
||||
uint32_t num_many_lut = 1;
|
||||
uint32_t lut_stride = 0;
|
||||
@@ -1976,12 +1966,12 @@ template <typename Torus>
|
||||
uint64_t scratch_cuda_propagate_single_carry_kb_inplace(
|
||||
CudaStreams streams, int_sc_prop_memory<Torus> **mem_ptr,
|
||||
uint32_t num_radix_blocks, int_radix_params params, uint32_t requested_flag,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory) {
|
||||
PUSH_RANGE("scratch add & propagate sc")
|
||||
uint64_t size_tracker = 0;
|
||||
*mem_ptr = new int_sc_prop_memory<Torus>(streams, params, num_radix_blocks,
|
||||
requested_flag, uses_carry,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
requested_flag, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
POP_RANGE()
|
||||
return size_tracker;
|
||||
}
|
||||
@@ -2123,9 +2113,6 @@ void host_add_and_propagate_single_carry(
|
||||
|
||||
auto num_radix_blocks = lhs_array->num_radix_blocks;
|
||||
auto params = mem->params;
|
||||
auto glwe_dimension = params.glwe_dimension;
|
||||
auto polynomial_size = params.polynomial_size;
|
||||
uint32_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
auto lut_stride = mem->lut_stride;
|
||||
auto num_many_lut = mem->num_many_lut;
|
||||
CudaRadixCiphertextFFI output_flag;
|
||||
@@ -2397,7 +2384,6 @@ __host__ void integer_radix_apply_noise_squashing_kb(
|
||||
|
||||
PUSH_RANGE("apply noise squashing")
|
||||
auto params = lut->params;
|
||||
auto pbs_type = params.pbs_type;
|
||||
auto big_lwe_dimension = params.big_lwe_dimension;
|
||||
auto small_lwe_dimension = params.small_lwe_dimension;
|
||||
auto ks_level = params.ks_level;
|
||||
|
||||
@@ -25,7 +25,6 @@ void into_radix_ciphertext(CudaRadixCiphertextFFI *radix, void *lwe_array,
|
||||
radix->num_radix_blocks = num_radix_blocks;
|
||||
radix->max_num_radix_blocks = num_radix_blocks;
|
||||
radix->ptr = lwe_array;
|
||||
radix->num_radix_ciphertexts = 1;
|
||||
|
||||
radix->degrees = (uint64_t *)(calloc(num_radix_blocks, sizeof(uint64_t)));
|
||||
radix->noise_levels =
|
||||
|
||||
@@ -19,7 +19,6 @@ void create_zero_radix_ciphertext_async(cudaStream_t const stream,
|
||||
radix->lwe_dimension = lwe_dimension;
|
||||
radix->num_radix_blocks = num_radix_blocks;
|
||||
radix->max_num_radix_blocks = num_radix_blocks;
|
||||
radix->num_radix_ciphertexts = 1;
|
||||
uint64_t size = (lwe_dimension + 1) * num_radix_blocks * sizeof(Torus);
|
||||
radix->ptr = (void *)cuda_malloc_with_size_tracking_async(
|
||||
size, stream, gpu_index, size_tracker, allocate_gpu_memory);
|
||||
@@ -64,7 +63,6 @@ void as_radix_ciphertext_slice(CudaRadixCiphertextFFI *output_radix,
|
||||
|
||||
auto lwe_size = input_radix->lwe_dimension + 1;
|
||||
output_radix->num_radix_blocks = end_input_lwe_index - start_input_lwe_index;
|
||||
output_radix->num_radix_ciphertexts = input_radix->num_radix_ciphertexts;
|
||||
output_radix->max_num_radix_blocks = input_radix->max_num_radix_blocks;
|
||||
output_radix->lwe_dimension = input_radix->lwe_dimension;
|
||||
Torus *in_ptr = (Torus *)input_radix->ptr;
|
||||
|
||||
@@ -183,7 +183,6 @@ pub struct CudaRadixCiphertextFFI {
|
||||
pub num_radix_blocks: u32,
|
||||
pub max_num_radix_blocks: u32,
|
||||
pub lwe_dimension: u32,
|
||||
pub num_radix_ciphertexts: u32,
|
||||
}
|
||||
#[allow(clippy::unnecessary_operation, clippy::identity_op)]
|
||||
const _: () = {
|
||||
@@ -202,8 +201,6 @@ const _: () = {
|
||||
[::std::mem::offset_of!(CudaRadixCiphertextFFI, max_num_radix_blocks) - 28usize];
|
||||
["Offset of field: CudaRadixCiphertextFFI::lwe_dimension"]
|
||||
[::std::mem::offset_of!(CudaRadixCiphertextFFI, lwe_dimension) - 32usize];
|
||||
["Offset of field: CudaRadixCiphertextFFI::num_radix_ciphertexts"]
|
||||
[::std::mem::offset_of!(CudaRadixCiphertextFFI, num_radix_ciphertexts) - 36usize];
|
||||
};
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
@@ -811,7 +808,6 @@ unsafe extern "C" {
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
requested_flag: u32,
|
||||
uses_carry: u32,
|
||||
allocate_gpu_memory: bool,
|
||||
noise_reduction_type: PBS_MS_REDUCTION_T,
|
||||
) -> u64;
|
||||
@@ -834,7 +830,6 @@ unsafe extern "C" {
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
requested_flag: u32,
|
||||
uses_carry: u32,
|
||||
allocate_gpu_memory: bool,
|
||||
noise_reduction_type: PBS_MS_REDUCTION_T,
|
||||
) -> u64;
|
||||
|
||||
18
ci/slab.toml
18
ci/slab.toml
@@ -83,18 +83,6 @@ image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
|
||||
flavor_name = "n3-A100x8-NVLink"
|
||||
user = "ubuntu"
|
||||
|
||||
[backend.hyperstack.multi-gpu-test]
|
||||
environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
|
||||
flavor_name = "n3-L40x4"
|
||||
user = "ubuntu"
|
||||
|
||||
[backend.hyperstack.multi-gpu-test_fallback]
|
||||
environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
|
||||
flavor_name = "n3-RTX-A6000x2"
|
||||
user = "ubuntu"
|
||||
|
||||
[backend.hyperstack.l40]
|
||||
environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
|
||||
@@ -106,3 +94,9 @@ environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
|
||||
flavor_name = "n3-RTX-A6000x1"
|
||||
user = "ubuntu"
|
||||
|
||||
[backend.hyperstack.4-l40]
|
||||
environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R570 CUDA 12.8"
|
||||
flavor_name = "n3-L40x4"
|
||||
user = "ubuntu"
|
||||
|
||||
@@ -90,12 +90,6 @@ path = "benches/high_level_api/noise_squash.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "hlapi-arrays"
|
||||
path = "benches/high_level_api/arrays.rs"
|
||||
harness = false
|
||||
required-features = ["integer", "internal-keycache"]
|
||||
|
||||
[[bench]]
|
||||
name = "glwe_packing_compression-integer-bench"
|
||||
path = "benches/integer/glwe_packing_compression.rs"
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
use benchmark::params_aliases::BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
|
||||
use benchmark::utilities::{write_to_json, OperatorType};
|
||||
use criterion::Criterion;
|
||||
use rand::prelude::*;
|
||||
use tfhe::array::GpuFheUint64Array;
|
||||
use tfhe::keycache::NamedParam;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::{ClientKey, CompressedServerKey};
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
fn main() {
|
||||
let cks = {
|
||||
use tfhe::{set_server_key, ConfigBuilder};
|
||||
let config = ConfigBuilder::with_custom_parameters(
|
||||
BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
)
|
||||
.build();
|
||||
let cks = ClientKey::generate(config);
|
||||
let compressed_sks = CompressedServerKey::new(&cks);
|
||||
|
||||
set_server_key(compressed_sks.decompress_to_gpu());
|
||||
cks
|
||||
};
|
||||
|
||||
let array_dim = 32;
|
||||
let num_elems = array_dim * array_dim;
|
||||
let mut rng = thread_rng();
|
||||
let clear_xs = (0..num_elems as u64)
|
||||
.map(|_| rng.gen::<u64>())
|
||||
.collect::<Vec<_>>();
|
||||
let clear_ys = (0..num_elems as u64)
|
||||
.map(|_| rng.gen::<u64>())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let xs =
|
||||
GpuFheUint64Array::try_encrypt((clear_xs.as_slice(), vec![array_dim, array_dim]), &cks)
|
||||
.unwrap();
|
||||
let ys =
|
||||
GpuFheUint64Array::try_encrypt((clear_ys.as_slice(), vec![array_dim, array_dim]), &cks)
|
||||
.unwrap();
|
||||
|
||||
let mut c = Criterion::default().configure_from_args();
|
||||
let bench_id = format!("bench::hlapi::array::cuda::bitand::");
|
||||
c.bench_function(&bench_id, |b| {
|
||||
b.iter(|| {
|
||||
let _ = &xs & &ys;
|
||||
})
|
||||
});
|
||||
|
||||
let params = cks.computation_parameters();
|
||||
|
||||
write_to_json::<u64, _>(
|
||||
&bench_id,
|
||||
params,
|
||||
params.name(),
|
||||
"erc20-transfer",
|
||||
&OperatorType::Atomic,
|
||||
64,
|
||||
vec![],
|
||||
);
|
||||
|
||||
c.final_summary();
|
||||
}
|
||||
@@ -421,23 +421,32 @@ pub fn throughput_num_threads(num_block: usize, op_pbs_count: u64) -> u64 {
|
||||
let block_multiplicator = (ref_block_count as f64 / num_block as f64).ceil().min(1.0);
|
||||
// Some operations with a high serial workload (e.g. division) would yield an operation
|
||||
// loading value so low that the number of elements in the end wouldn't be meaningful.
|
||||
let minimum_loading = if num_block < 64 { 0.2 } else { 0.01 };
|
||||
let minimum_loading = if num_block < 64 { 1.0 } else { 0.015 };
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
{
|
||||
let num_sms_per_gpu = get_number_of_sms();
|
||||
let total_num_sm = num_sms_per_gpu * get_number_of_gpus();
|
||||
|
||||
let total_blocks_per_sm = 4u32; // Assume each SM can handle 4 blocks concurrently
|
||||
let total_num_sm = total_blocks_per_sm * total_num_sm;
|
||||
let total_blocks_per_sm = 4u64; // Assume each SM can handle 4 blocks concurrently
|
||||
let min_num_waves = 4u64; //Enforce at least 4 waves in the GPU
|
||||
let elements_per_wave = total_num_sm as u64 / (num_block as u64);
|
||||
|
||||
let block_factor = ((2.0f64 * num_block as f64) / 4.0f64).ceil() as u64;
|
||||
let elements_per_wave = total_blocks_per_sm * total_num_sm as u64 / block_factor;
|
||||
// We need to enable the new load for pbs benches and for sizes larger than 16 blocks in
|
||||
// demanding operations for the rest of operations we maintain a minimum of 200
|
||||
// elements
|
||||
let min_elements = if op_pbs_count == 1
|
||||
|| (op_pbs_count > (num_block * num_block) as u64 && num_block >= 16)
|
||||
{
|
||||
elements_per_wave * min_num_waves
|
||||
} else {
|
||||
200u64
|
||||
};
|
||||
let operation_loading = ((total_num_sm as u64 / op_pbs_count) as f64).max(minimum_loading);
|
||||
let elements = (total_num_sm as f64 * block_multiplicator * operation_loading) as u64;
|
||||
elements.min(elements_per_wave * min_num_waves) // This threshold is useful for operation
|
||||
// with both a small number of
|
||||
// block and low PBs count.
|
||||
elements.min(min_elements) // This threshold is useful for operation
|
||||
// with both a small number of
|
||||
// block and low PBs count.
|
||||
}
|
||||
#[cfg(feature = "hpu")]
|
||||
{
|
||||
|
||||
@@ -104,19 +104,12 @@ impl<T: UnsignedInteger> CudaLweCiphertextList<T> {
|
||||
.map(|list| list.0.lwe_ciphertext_count.0)
|
||||
.sum(),
|
||||
);
|
||||
|
||||
assert_ne!(
|
||||
lwe_ciphertext_count.0, 0,
|
||||
"Empty iterator of CudaLweCiphertextList"
|
||||
);
|
||||
|
||||
let stream_count = lwe_ciphertext_count.0.min(6);
|
||||
let mut new_streams: Vec<CudaStreams> = Vec::with_capacity(stream_count);
|
||||
|
||||
for _ in 0..stream_count {
|
||||
let stream = CudaStreams::new_single_gpu(streams.gpu_indexes[0]);
|
||||
new_streams.push(stream);
|
||||
}
|
||||
|
||||
let first_item = cuda_ciphertexts_list_vec.next().unwrap();
|
||||
let lwe_dimension = first_item.lwe_dimension();
|
||||
let mut d_vec = CudaVec::new(
|
||||
@@ -130,20 +123,25 @@ impl<T: UnsignedInteger> CudaLweCiphertextList<T> {
|
||||
* std::mem::size_of::<T>();
|
||||
// Concatenate gpu_index memory
|
||||
unsafe {
|
||||
for (i, list) in cuda_ciphertexts_list_vec.enumerate() {
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
ptr,
|
||||
first_item.0.d_vec.as_c_ptr(0),
|
||||
size as u64,
|
||||
streams.ptr[0],
|
||||
streams.gpu_indexes[0].get(),
|
||||
);
|
||||
ptr = ptr.wrapping_byte_add(size);
|
||||
for list in cuda_ciphertexts_list_vec {
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
ptr,
|
||||
list.0.d_vec.as_c_ptr(0),
|
||||
size as u64,
|
||||
new_streams[i % stream_count].ptr[0],
|
||||
new_streams[i % stream_count].gpu_indexes[0].get(),
|
||||
streams.ptr[0],
|
||||
streams.gpu_indexes[0].get(),
|
||||
);
|
||||
ptr = ptr.wrapping_byte_add(size);
|
||||
}
|
||||
}
|
||||
for s in new_streams.iter() {
|
||||
s.synchronize();
|
||||
}
|
||||
|
||||
let cuda_lwe_list = CudaLweList {
|
||||
d_vec,
|
||||
|
||||
@@ -840,7 +840,6 @@ pub unsafe fn add_lwe_ciphertext_vector_async<T: UnsignedInteger>(
|
||||
num_radix_blocks: num_samples,
|
||||
max_num_radix_blocks: num_samples,
|
||||
lwe_dimension: lwe_dimension.0 as u32,
|
||||
num_radix_ciphertexts: 1u32,
|
||||
};
|
||||
let lwe_array_in_1_data = CudaRadixCiphertextFFI {
|
||||
ptr: lwe_array_in_1.get_mut_c_ptr(0),
|
||||
@@ -849,7 +848,6 @@ pub unsafe fn add_lwe_ciphertext_vector_async<T: UnsignedInteger>(
|
||||
num_radix_blocks: num_samples,
|
||||
max_num_radix_blocks: num_samples,
|
||||
lwe_dimension: lwe_dimension.0 as u32,
|
||||
num_radix_ciphertexts: 1u32,
|
||||
};
|
||||
let lwe_array_in_2_data = CudaRadixCiphertextFFI {
|
||||
ptr: lwe_array_in_2.get_mut_c_ptr(0),
|
||||
@@ -858,7 +856,6 @@ pub unsafe fn add_lwe_ciphertext_vector_async<T: UnsignedInteger>(
|
||||
num_radix_blocks: num_samples,
|
||||
max_num_radix_blocks: num_samples,
|
||||
lwe_dimension: lwe_dimension.0 as u32,
|
||||
num_radix_ciphertexts: 1u32,
|
||||
};
|
||||
cuda_add_lwe_ciphertext_vector_64(
|
||||
streams.ptr[0],
|
||||
@@ -893,7 +890,6 @@ pub unsafe fn add_lwe_ciphertext_vector_assign_async<T: UnsignedInteger>(
|
||||
num_radix_blocks: num_samples,
|
||||
max_num_radix_blocks: num_samples,
|
||||
lwe_dimension: lwe_dimension.0 as u32,
|
||||
num_radix_ciphertexts: 1u32,
|
||||
};
|
||||
let lwe_array_in_data = CudaRadixCiphertextFFI {
|
||||
ptr: lwe_array_in.get_mut_c_ptr(0),
|
||||
@@ -902,7 +898,6 @@ pub unsafe fn add_lwe_ciphertext_vector_assign_async<T: UnsignedInteger>(
|
||||
num_radix_blocks: num_samples,
|
||||
max_num_radix_blocks: num_samples,
|
||||
lwe_dimension: lwe_dimension.0 as u32,
|
||||
num_radix_ciphertexts: 1u32,
|
||||
};
|
||||
cuda_add_lwe_ciphertext_vector_64(
|
||||
streams.ptr[0],
|
||||
|
||||
@@ -19,8 +19,7 @@ use crate::integer::block_decomposition::{
|
||||
DecomposableInto, RecomposableFrom, RecomposableSignedInteger,
|
||||
};
|
||||
use crate::integer::gpu::ciphertext::{
|
||||
CudaIntegerRadixCiphertext, CudaRadixCiphertext, CudaSignedRadixCiphertext,
|
||||
CudaUnsignedRadixCiphertext,
|
||||
CudaIntegerRadixCiphertext, CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext,
|
||||
};
|
||||
use crate::integer::server_key::radix_parallel::scalar_div_mod::SignedReciprocable;
|
||||
use crate::integer::server_key::{Reciprocable, ScalarMultiplier};
|
||||
@@ -84,12 +83,6 @@ impl<'a, T> TensorSlice<'a, GpuSlice<'a, T>> {
|
||||
pub fn par_iter(self) -> ParStridedIter<'a, T> {
|
||||
ParStridedIter::new(self.slice.0, self.dims.clone())
|
||||
}
|
||||
pub fn len(&self) -> usize {
|
||||
self.dims.flattened_len()
|
||||
}
|
||||
pub fn as_slice(&self) -> &'a [T] {
|
||||
self.slice.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> TensorSlice<'a, GpuSliceMut<'a, T>> {
|
||||
@@ -323,25 +316,7 @@ where
|
||||
lhs: TensorSlice<'_, Self::Slice<'a>>,
|
||||
rhs: TensorSlice<'_, Self::Slice<'a>>,
|
||||
) -> Self::Owned {
|
||||
GpuOwned(global_state::with_cuda_internal_keys(|cuda_key| {
|
||||
let streams = &cuda_key.streams;
|
||||
let num_ciphertexts = lhs.len() as u32;
|
||||
let lhs_slice: &[T] = lhs.as_slice();
|
||||
let rhs_slice: &[T] = rhs.as_slice();
|
||||
let mut lhs_aligned = T::from(CudaRadixCiphertext::from_radix_ciphertext_vec(
|
||||
lhs_slice, streams,
|
||||
));
|
||||
let rhs_aligned = T::from(CudaRadixCiphertext::from_radix_ciphertext_vec(
|
||||
rhs_slice, streams,
|
||||
));
|
||||
crate::integer::gpu::CudaServerKey::bitand_vec(
|
||||
cuda_key.pbs_key(),
|
||||
&mut lhs_aligned,
|
||||
&rhs_aligned,
|
||||
num_ciphertexts,
|
||||
streams,
|
||||
)
|
||||
}))
|
||||
par_map_sks_op_on_pair_of_elements(lhs, rhs, crate::integer::gpu::CudaServerKey::bitand)
|
||||
}
|
||||
|
||||
fn bitor<'a>(
|
||||
|
||||
@@ -28,12 +28,6 @@ impl<'a, T> TensorSlice<'a, &'a [T]> {
|
||||
pub fn par_iter(self) -> ParStridedIter<'a, T> {
|
||||
ParStridedIter::new(self.slice, self.dims.clone())
|
||||
}
|
||||
pub fn len(&self) -> usize {
|
||||
self.dims.flattened_len()
|
||||
}
|
||||
pub fn as_slice(&self) -> &'a [T] {
|
||||
self.slice
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> TensorSlice<'a, &'a mut [T]> {
|
||||
|
||||
@@ -7,7 +7,7 @@ pub mod squashed_noise;
|
||||
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::vec::CudaVec;
|
||||
use crate::core_crypto::gpu::{CudaLweList, CudaStreams};
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::{LweCiphertextList, LweCiphertextOwned};
|
||||
use crate::integer::gpu::ciphertext::info::{CudaBlockInfo, CudaRadixCiphertextInfo};
|
||||
use crate::integer::parameters::LweDimension;
|
||||
@@ -15,7 +15,6 @@ use crate::integer::{IntegerCiphertext, RadixCiphertext, SignedRadixCiphertext};
|
||||
use crate::shortint::{Ciphertext, EncryptionKeyChoice};
|
||||
use crate::GpuIndex;
|
||||
|
||||
use crate::shortint::parameters::LweCiphertextCount;
|
||||
pub use compressed_noise_squashed_ciphertext_list::*;
|
||||
|
||||
pub trait CudaIntegerRadixCiphertext: Sized {
|
||||
@@ -71,68 +70,8 @@ pub trait CudaIntegerRadixCiphertext: Sized {
|
||||
fn gpu_indexes(&self) -> &[GpuIndex] {
|
||||
&self.as_ref().d_blocks.0.d_vec.gpu_indexes
|
||||
}
|
||||
|
||||
// Converts a CudaIntegerRadixCiphertext with num_blocks * num_ciphertexts LWEs into a
|
||||
// Vec<CudaIntegerRadixCiphertext> of length num_radix_ciphertexts, where each ciphertext has
|
||||
// num_blocks LWEs
|
||||
fn to_integer_radix_ciphertext_vec(
|
||||
&self,
|
||||
num_radix_ciphertexts: u32,
|
||||
streams: &CudaStreams,
|
||||
) -> Vec<Self> {
|
||||
let total_blocks = self.as_ref().d_blocks.0.lwe_ciphertext_count.0;
|
||||
assert_eq!(total_blocks % num_radix_ciphertexts as usize, 0, "Total number of blocks ({total_blocks}) is not divisible by number of radix ciphertexts ({num_radix_ciphertexts})");
|
||||
|
||||
let num_blocks = total_blocks / num_radix_ciphertexts as usize;
|
||||
|
||||
let mut result = Vec::with_capacity(num_radix_ciphertexts as usize);
|
||||
let lwe_dimension = self.as_ref().d_blocks.lwe_dimension();
|
||||
|
||||
for i in 0..num_radix_ciphertexts as usize {
|
||||
let block_start = i * num_blocks;
|
||||
let block_end = block_start + num_blocks;
|
||||
|
||||
let d_vec = unsafe {
|
||||
let mut d_vec =
|
||||
CudaVec::new_async(lwe_dimension.to_lwe_size().0 * num_blocks, streams, 0);
|
||||
|
||||
let copy_start = block_start * lwe_dimension.to_lwe_size().0;
|
||||
let copy_end = block_end * lwe_dimension.to_lwe_size().0;
|
||||
d_vec.copy_src_range_gpu_to_gpu_async(
|
||||
copy_start..copy_end,
|
||||
&self.as_ref().d_blocks.0.d_vec,
|
||||
streams,
|
||||
0,
|
||||
);
|
||||
|
||||
streams.synchronize();
|
||||
d_vec
|
||||
};
|
||||
let lwe_list = CudaLweList::<u64> {
|
||||
d_vec,
|
||||
lwe_ciphertext_count: LweCiphertextCount(num_blocks),
|
||||
lwe_dimension,
|
||||
ciphertext_modulus: self.as_ref().d_blocks.ciphertext_modulus(),
|
||||
};
|
||||
|
||||
// Copy the associated block metadata
|
||||
let block_info = self.as_ref().info.blocks[block_start..block_end].to_vec();
|
||||
|
||||
let info = CudaRadixCiphertextInfo { blocks: block_info };
|
||||
|
||||
result.push(Self::from(CudaRadixCiphertext::new(
|
||||
CudaLweCiphertextList(lwe_list),
|
||||
info,
|
||||
)));
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
/// This struct corresponds to the pointers on GPU and
|
||||
/// metadata representing an array of LWEs corresponding
|
||||
/// to one or more RadixCiphertexts
|
||||
pub struct CudaRadixCiphertext {
|
||||
pub d_blocks: CudaLweCiphertextList<u64>,
|
||||
pub info: CudaRadixCiphertextInfo,
|
||||
|
||||
@@ -183,25 +183,6 @@ fn prepare_cuda_radix_ffi(
|
||||
num_radix_blocks: input.d_blocks.0.lwe_ciphertext_count.0 as u32,
|
||||
max_num_radix_blocks: input.d_blocks.0.lwe_ciphertext_count.0 as u32,
|
||||
lwe_dimension: input.d_blocks.0.lwe_dimension.0 as u32,
|
||||
num_radix_ciphertexts: 1u32,
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_cuda_radix_vec_ffi(
|
||||
input: &CudaRadixCiphertext,
|
||||
degrees_vec: &mut Vec<u64>,
|
||||
noise_levels_vec: &mut Vec<u64>,
|
||||
num_radix_ciphertexts: u32,
|
||||
) -> CudaRadixCiphertextFFI {
|
||||
CudaRadixCiphertextFFI {
|
||||
ptr: input.d_blocks.0.d_vec.get_mut_c_ptr(0),
|
||||
degrees: degrees_vec.as_mut_ptr(),
|
||||
noise_levels: noise_levels_vec.as_mut_ptr(),
|
||||
num_radix_blocks: input.d_blocks.0.lwe_ciphertext_count.0 as u32 / num_radix_ciphertexts,
|
||||
max_num_radix_blocks: input.d_blocks.0.lwe_ciphertext_count.0 as u32
|
||||
/ num_radix_ciphertexts,
|
||||
lwe_dimension: input.d_blocks.0.lwe_dimension.0 as u32,
|
||||
num_radix_ciphertexts,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -219,7 +200,6 @@ fn prepare_cuda_radix_ffi_from_slice<T: UnsignedInteger>(
|
||||
num_radix_blocks,
|
||||
max_num_radix_blocks: num_radix_blocks,
|
||||
lwe_dimension,
|
||||
num_radix_ciphertexts: 1u32,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -237,7 +217,6 @@ fn prepare_cuda_radix_ffi_from_slice_mut<T: UnsignedInteger>(
|
||||
num_radix_blocks,
|
||||
max_num_radix_blocks: num_radix_blocks,
|
||||
lwe_dimension,
|
||||
num_radix_ciphertexts: 1u32,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2344,7 +2323,6 @@ pub(crate) unsafe fn propagate_single_carry_assign_async<T: UnsignedInteger, B:
|
||||
carry_modulus.0 as u32,
|
||||
pbs_type as u32,
|
||||
requested_flag as u32,
|
||||
uses_carry,
|
||||
true,
|
||||
noise_reduction_type as u32,
|
||||
);
|
||||
@@ -2381,7 +2359,6 @@ pub(crate) fn get_propagate_single_carry_assign_async_size_on_gpu(
|
||||
pbs_type: PBSType,
|
||||
grouping_factor: LweBskGroupingFactor,
|
||||
requested_flag: OutputFlag,
|
||||
uses_carry: u32,
|
||||
ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
|
||||
) -> u64 {
|
||||
let noise_reduction_type = resolve_noise_reduction_type(ms_noise_reduction_configuration);
|
||||
@@ -2406,7 +2383,6 @@ pub(crate) fn get_propagate_single_carry_assign_async_size_on_gpu(
|
||||
carry_modulus.0 as u32,
|
||||
pbs_type as u32,
|
||||
requested_flag as u32,
|
||||
uses_carry,
|
||||
false,
|
||||
noise_reduction_type as u32,
|
||||
)
|
||||
@@ -2433,7 +2409,6 @@ pub(crate) fn get_add_and_propagate_single_carry_assign_async_size_on_gpu(
|
||||
pbs_type: PBSType,
|
||||
grouping_factor: LweBskGroupingFactor,
|
||||
requested_flag: OutputFlag,
|
||||
uses_carry: u32,
|
||||
ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
|
||||
) -> u64 {
|
||||
let noise_reduction_type = resolve_noise_reduction_type(ms_noise_reduction_configuration);
|
||||
@@ -2458,7 +2433,6 @@ pub(crate) fn get_add_and_propagate_single_carry_assign_async_size_on_gpu(
|
||||
carry_modulus.0 as u32,
|
||||
pbs_type as u32,
|
||||
requested_flag as u32,
|
||||
uses_carry,
|
||||
false,
|
||||
noise_reduction_type as u32,
|
||||
)
|
||||
@@ -2780,7 +2754,6 @@ pub(crate) unsafe fn add_and_propagate_single_carry_assign_async<T: UnsignedInte
|
||||
carry_modulus.0 as u32,
|
||||
pbs_type as u32,
|
||||
requested_flag as u32,
|
||||
uses_carry,
|
||||
true,
|
||||
noise_reduction_type as u32,
|
||||
);
|
||||
@@ -7625,143 +7598,3 @@ pub unsafe fn expand_async<T: UnsignedInteger, B: Numeric>(
|
||||
);
|
||||
cleanup_expand_without_verification_64(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
/// # Safety
|
||||
///
|
||||
/// This operation modifies raw GPU pointers on the GPU
|
||||
pub unsafe fn unchecked_bitop_vec_radix_kb_assign<T: UnsignedInteger, B: Numeric>(
|
||||
streams: &CudaStreams,
|
||||
radix_lwe_left: &mut CudaRadixCiphertext,
|
||||
radix_lwe_right: &CudaRadixCiphertext,
|
||||
bootstrapping_key: &CudaVec<B>,
|
||||
keyswitch_key: &CudaVec<T>,
|
||||
message_modulus: MessageModulus,
|
||||
carry_modulus: CarryModulus,
|
||||
glwe_dimension: GlweDimension,
|
||||
polynomial_size: PolynomialSize,
|
||||
big_lwe_dimension: LweDimension,
|
||||
small_lwe_dimension: LweDimension,
|
||||
ks_level: DecompositionLevelCount,
|
||||
ks_base_log: DecompositionBaseLog,
|
||||
pbs_level: DecompositionLevelCount,
|
||||
pbs_base_log: DecompositionBaseLog,
|
||||
op: BitOpType,
|
||||
num_blocks: u32,
|
||||
num_radix_ciphertexts: u32,
|
||||
pbs_type: PBSType,
|
||||
grouping_factor: LweBskGroupingFactor,
|
||||
ms_noise_reduction_configuration: Option<&CudaModulusSwitchNoiseReductionConfiguration>,
|
||||
) {
|
||||
assert_eq!(
|
||||
streams.gpu_indexes[0],
|
||||
radix_lwe_left.d_blocks.0.d_vec.gpu_index(0),
|
||||
"GPU error: first stream is on GPU {}, first lhs pointer is on GPU {}",
|
||||
streams.gpu_indexes[0].get(),
|
||||
radix_lwe_left.d_blocks.0.d_vec.gpu_index(0).get(),
|
||||
);
|
||||
assert_eq!(
|
||||
streams.gpu_indexes[0],
|
||||
radix_lwe_right.d_blocks.0.d_vec.gpu_index(0),
|
||||
"GPU error: first stream is on GPU {}, first rhs pointer is on GPU {}",
|
||||
streams.gpu_indexes[0].get(),
|
||||
radix_lwe_right.d_blocks.0.d_vec.gpu_index(0).get(),
|
||||
);
|
||||
assert_eq!(
|
||||
streams.gpu_indexes[0],
|
||||
bootstrapping_key.gpu_index(0),
|
||||
"GPU error: first stream is on GPU {}, first bsk pointer is on GPU {}",
|
||||
streams.gpu_indexes[0].get(),
|
||||
bootstrapping_key.gpu_index(0).get(),
|
||||
);
|
||||
assert_eq!(
|
||||
streams.gpu_indexes[0],
|
||||
keyswitch_key.gpu_index(0),
|
||||
"GPU error: first stream is on GPU {}, first ksk pointer is on GPU {}",
|
||||
streams.gpu_indexes[0].get(),
|
||||
keyswitch_key.gpu_index(0).get(),
|
||||
);
|
||||
let ct_modulus = radix_lwe_left
|
||||
.d_blocks
|
||||
.ciphertext_modulus()
|
||||
.raw_modulus_float();
|
||||
let (noise_reduction_type, ms_noise_reduction_key_ffi) =
|
||||
resolve_ms_noise_reduction_config(ms_noise_reduction_configuration, ct_modulus);
|
||||
|
||||
let mut mem_ptr: *mut i8 = std::ptr::null_mut();
|
||||
let mut radix_lwe_left_degrees = radix_lwe_left
|
||||
.info
|
||||
.blocks
|
||||
.iter()
|
||||
.map(|b| b.degree.0)
|
||||
.collect();
|
||||
let mut radix_lwe_left_noise_levels = radix_lwe_left
|
||||
.info
|
||||
.blocks
|
||||
.iter()
|
||||
.map(|b| b.noise_level.0)
|
||||
.collect();
|
||||
let mut cuda_ffi_radix_lwe_left = prepare_cuda_radix_vec_ffi(
|
||||
radix_lwe_left,
|
||||
&mut radix_lwe_left_degrees,
|
||||
&mut radix_lwe_left_noise_levels,
|
||||
num_radix_ciphertexts,
|
||||
);
|
||||
// Here even though the input is not modified, data is passed as mutable.
|
||||
// This avoids having to create two structs for the CudaRadixCiphertext pointers,
|
||||
// one const and the other mutable.
|
||||
// Having two structs on the Cuda side complicates things as we need to be sure we pass the
|
||||
// Const structure as input instead of the mutable structure, which leads to complicated
|
||||
// data manipulation on the C++ side to change mutability of data.
|
||||
let mut radix_lwe_right_degrees = radix_lwe_right
|
||||
.info
|
||||
.blocks
|
||||
.iter()
|
||||
.map(|b| b.degree.0)
|
||||
.collect();
|
||||
let mut radix_lwe_right_noise_levels = radix_lwe_right
|
||||
.info
|
||||
.blocks
|
||||
.iter()
|
||||
.map(|b| b.noise_level.0)
|
||||
.collect();
|
||||
let cuda_ffi_radix_lwe_right = prepare_cuda_radix_vec_ffi(
|
||||
radix_lwe_right,
|
||||
&mut radix_lwe_right_degrees,
|
||||
&mut radix_lwe_right_noise_levels,
|
||||
num_radix_ciphertexts,
|
||||
);
|
||||
scratch_cuda_integer_radix_bitop_kb_64(
|
||||
streams.ffi(),
|
||||
std::ptr::addr_of_mut!(mem_ptr),
|
||||
glwe_dimension.0 as u32,
|
||||
polynomial_size.0 as u32,
|
||||
big_lwe_dimension.0 as u32,
|
||||
small_lwe_dimension.0 as u32,
|
||||
ks_level.0 as u32,
|
||||
ks_base_log.0 as u32,
|
||||
pbs_level.0 as u32,
|
||||
pbs_base_log.0 as u32,
|
||||
grouping_factor.0 as u32,
|
||||
num_blocks * num_radix_ciphertexts,
|
||||
message_modulus.0 as u32,
|
||||
carry_modulus.0 as u32,
|
||||
pbs_type as u32,
|
||||
op as u32,
|
||||
true,
|
||||
noise_reduction_type as u32,
|
||||
);
|
||||
cuda_bitop_integer_radix_ciphertext_kb_64(
|
||||
streams.ffi(),
|
||||
&raw mut cuda_ffi_radix_lwe_left,
|
||||
&raw const cuda_ffi_radix_lwe_left,
|
||||
&raw const cuda_ffi_radix_lwe_right,
|
||||
mem_ptr,
|
||||
bootstrapping_key.ptr.as_ptr(),
|
||||
keyswitch_key.ptr.as_ptr(),
|
||||
&raw const ms_noise_reduction_key_ffi,
|
||||
);
|
||||
cleanup_cuda_integer_bitop(streams.ffi(), std::ptr::addr_of_mut!(mem_ptr));
|
||||
update_noise_degree(radix_lwe_left, &cuda_ffi_radix_lwe_left);
|
||||
streams.synchronize();
|
||||
}
|
||||
|
||||
@@ -214,7 +214,6 @@ impl CudaServerKey {
|
||||
PBSType::Classical,
|
||||
LweBskGroupingFactor(0),
|
||||
OutputFlag::None,
|
||||
0u32,
|
||||
d_bsk.ms_noise_reduction_configuration.as_ref(),
|
||||
)
|
||||
}
|
||||
@@ -234,7 +233,6 @@ impl CudaServerKey {
|
||||
PBSType::MultiBit,
|
||||
d_multibit_bsk.grouping_factor,
|
||||
OutputFlag::None,
|
||||
0u32,
|
||||
None,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -10,8 +10,7 @@ use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
|
||||
use crate::integer::gpu::server_key::CudaBootstrappingKey;
|
||||
use crate::integer::gpu::{
|
||||
get_bitop_integer_radix_kb_size_on_gpu, get_full_propagate_assign_size_on_gpu,
|
||||
unchecked_bitop_integer_radix_kb_assign_async, unchecked_bitop_vec_radix_kb_assign, BitOpType,
|
||||
CudaServerKey, PBSType,
|
||||
unchecked_bitop_integer_radix_kb_assign_async, BitOpType, CudaServerKey, PBSType,
|
||||
};
|
||||
|
||||
impl CudaServerKey {
|
||||
@@ -978,142 +977,4 @@ impl CudaServerKey {
|
||||
let bitnot_mem = (lwe_ciphertext_count.0 * size_of::<u64>()) as u64;
|
||||
full_prop_mem.max(bitnot_mem)
|
||||
}
|
||||
|
||||
pub fn unchecked_bitop_vec_assign<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
op: BitOpType,
|
||||
num_radix_ciphertexts: u32,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
assert_eq!(
|
||||
ct_left.as_ref().d_blocks.lwe_dimension(),
|
||||
ct_right.as_ref().d_blocks.lwe_dimension()
|
||||
);
|
||||
assert_eq!(
|
||||
ct_left.as_ref().d_blocks.lwe_ciphertext_count(),
|
||||
ct_right.as_ref().d_blocks.lwe_ciphertext_count()
|
||||
);
|
||||
|
||||
let num_blocks =
|
||||
ct_left.as_ref().d_blocks.lwe_ciphertext_count().0 as u32 / num_radix_ciphertexts;
|
||||
|
||||
unsafe {
|
||||
match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => {
|
||||
unchecked_bitop_vec_radix_kb_assign(
|
||||
streams,
|
||||
ct_left.as_mut(),
|
||||
ct_right.as_ref(),
|
||||
&d_bsk.d_vec,
|
||||
&self.key_switching_key.d_vec,
|
||||
self.message_modulus,
|
||||
self.carry_modulus,
|
||||
d_bsk.glwe_dimension,
|
||||
d_bsk.polynomial_size,
|
||||
self.key_switching_key
|
||||
.input_key_lwe_size()
|
||||
.to_lwe_dimension(),
|
||||
self.key_switching_key
|
||||
.output_key_lwe_size()
|
||||
.to_lwe_dimension(),
|
||||
self.key_switching_key.decomposition_level_count(),
|
||||
self.key_switching_key.decomposition_base_log(),
|
||||
d_bsk.decomp_level_count,
|
||||
d_bsk.decomp_base_log,
|
||||
op,
|
||||
num_blocks,
|
||||
num_radix_ciphertexts,
|
||||
PBSType::Classical,
|
||||
LweBskGroupingFactor(0),
|
||||
d_bsk.ms_noise_reduction_configuration.as_ref(),
|
||||
);
|
||||
}
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
unchecked_bitop_vec_radix_kb_assign(
|
||||
streams,
|
||||
ct_left.as_mut(),
|
||||
ct_right.as_ref(),
|
||||
&d_multibit_bsk.d_vec,
|
||||
&self.key_switching_key.d_vec,
|
||||
self.message_modulus,
|
||||
self.carry_modulus,
|
||||
d_multibit_bsk.glwe_dimension,
|
||||
d_multibit_bsk.polynomial_size,
|
||||
self.key_switching_key
|
||||
.input_key_lwe_size()
|
||||
.to_lwe_dimension(),
|
||||
self.key_switching_key
|
||||
.output_key_lwe_size()
|
||||
.to_lwe_dimension(),
|
||||
self.key_switching_key.decomposition_level_count(),
|
||||
self.key_switching_key.decomposition_base_log(),
|
||||
d_multibit_bsk.decomp_level_count,
|
||||
d_multibit_bsk.decomp_base_log,
|
||||
op,
|
||||
num_blocks,
|
||||
num_radix_ciphertexts,
|
||||
PBSType::MultiBit,
|
||||
d_multibit_bsk.grouping_factor,
|
||||
None,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unchecked_bitand_vec<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &T,
|
||||
ct_right: &T,
|
||||
num_radix_ciphertexts: u32,
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let mut result = unsafe { ct_left.duplicate_async(streams) };
|
||||
self.unchecked_bitop_vec_assign(
|
||||
&mut result,
|
||||
ct_right,
|
||||
BitOpType::And,
|
||||
num_radix_ciphertexts,
|
||||
streams,
|
||||
);
|
||||
result
|
||||
}
|
||||
pub fn bitand_vec<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
ct_right: &T,
|
||||
num_radix_ciphertexts: u32,
|
||||
streams: &CudaStreams,
|
||||
) -> Vec<T> {
|
||||
let mut tmp_rhs;
|
||||
|
||||
let (lhs, rhs) = unsafe {
|
||||
match (
|
||||
ct_left.block_carries_are_empty(),
|
||||
ct_right.block_carries_are_empty(),
|
||||
) {
|
||||
(true, true) => (ct_left, ct_right),
|
||||
(true, false) => {
|
||||
tmp_rhs = ct_right.duplicate_async(streams);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, streams);
|
||||
(ct_left, &tmp_rhs)
|
||||
}
|
||||
(false, true) => {
|
||||
self.full_propagate_assign_async(ct_left, streams);
|
||||
(ct_left, ct_right)
|
||||
}
|
||||
(false, false) => {
|
||||
tmp_rhs = ct_right.duplicate_async(streams);
|
||||
|
||||
self.full_propagate_assign_async(ct_left, streams);
|
||||
self.full_propagate_assign_async(&mut tmp_rhs, streams);
|
||||
(ct_left, &tmp_rhs)
|
||||
}
|
||||
}
|
||||
};
|
||||
let result = self.unchecked_bitand_vec(lhs, rhs, num_radix_ciphertexts, streams);
|
||||
result.to_integer_radix_ciphertext_vec(num_radix_ciphertexts, streams)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::{CiphertextModulus, LweBskGroupingFactor, LweCiphertextCount};
|
||||
use crate::core_crypto::prelude::{LweBskGroupingFactor, LweCiphertextCount};
|
||||
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
|
||||
use crate::integer::gpu::ciphertext::info::CudaRadixCiphertextInfo;
|
||||
use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaRadixCiphertext};
|
||||
@@ -38,7 +38,7 @@ impl CudaServerKey {
|
||||
let block = CudaLweCiphertextList::new(
|
||||
ct_left.as_ref().d_blocks.lwe_dimension(),
|
||||
LweCiphertextCount(1),
|
||||
CiphertextModulus::new_native(),
|
||||
self.ciphertext_modulus,
|
||||
streams,
|
||||
);
|
||||
let mut block_info = ct_left.as_ref().info.blocks[0];
|
||||
|
||||
@@ -261,7 +261,6 @@ impl CudaServerKey {
|
||||
PBSType::Classical,
|
||||
LweBskGroupingFactor(0),
|
||||
OutputFlag::None,
|
||||
0u32,
|
||||
d_bsk.ms_noise_reduction_configuration.as_ref(),
|
||||
)
|
||||
}
|
||||
@@ -281,7 +280,6 @@ impl CudaServerKey {
|
||||
PBSType::MultiBit,
|
||||
d_multibit_bsk.grouping_factor,
|
||||
OutputFlag::None,
|
||||
0u32,
|
||||
None,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
|
||||
use crate::core_crypto::gpu::vec::CudaVec;
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::{CiphertextModulus, LweBskGroupingFactor, LweCiphertextCount};
|
||||
use crate::core_crypto::prelude::{LweBskGroupingFactor, LweCiphertextCount};
|
||||
use crate::integer::block_decomposition::{BlockDecomposer, DecomposableInto};
|
||||
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
|
||||
use crate::integer::gpu::ciphertext::info::CudaRadixCiphertextInfo;
|
||||
@@ -160,7 +160,7 @@ impl CudaServerKey {
|
||||
let block = CudaLweCiphertextList::new(
|
||||
ct.as_ref().d_blocks.lwe_dimension(),
|
||||
LweCiphertextCount(1),
|
||||
CiphertextModulus::new_native(),
|
||||
self.ciphertext_modulus,
|
||||
streams,
|
||||
);
|
||||
let mut block_info = ct.as_ref().info.blocks[0];
|
||||
|
||||
@@ -8,7 +8,7 @@ use syn::{
|
||||
use crate::{
|
||||
add_lifetime_param, add_trait_where_clause, add_where_lifetime_bound_to_generics,
|
||||
extend_where_clause, filter_unsized_bounds, parse_const_str, DESERIALIZE_TRAIT_NAME,
|
||||
LIFETIME_NAME, SERIALIZE_TRAIT_NAME,
|
||||
FROM_TRAIT_NAME, LIFETIME_NAME, RESULT_TYPE_NAME, SERIALIZE_TRAIT_NAME, TRY_FROM_TRAIT_NAME,
|
||||
};
|
||||
|
||||
/// Generates an impl block for the From trait. This will be:
|
||||
@@ -28,9 +28,11 @@ pub(crate) fn generate_from_trait_impl(
|
||||
from_variable_name: &str,
|
||||
) -> syn::Result<ItemImpl> {
|
||||
let from_variable = Ident::new(from_variable_name, Span::call_site());
|
||||
let from_trait: Path = parse_const_str(FROM_TRAIT_NAME);
|
||||
|
||||
Ok(parse_quote! {
|
||||
#[automatically_derived]
|
||||
impl #impl_generics From<#src> for #dest #where_clause {
|
||||
impl #impl_generics #from_trait<#src> for #dest #where_clause {
|
||||
fn from(#from_variable: #src) -> Self {
|
||||
#constructor
|
||||
}
|
||||
@@ -57,11 +59,14 @@ pub(crate) fn generate_try_from_trait_impl(
|
||||
from_variable_name: &str,
|
||||
) -> syn::Result<ItemImpl> {
|
||||
let from_variable = Ident::new(from_variable_name, Span::call_site());
|
||||
let result_type: Path = parse_const_str(RESULT_TYPE_NAME);
|
||||
let try_from_trait: Path = parse_const_str(TRY_FROM_TRAIT_NAME);
|
||||
|
||||
Ok(parse_quote! {
|
||||
#[automatically_derived]
|
||||
impl #impl_generics TryFrom<#src> for #dest #where_clause {
|
||||
impl #impl_generics #try_from_trait<#src> for #dest #where_clause {
|
||||
type Error = #error;
|
||||
fn try_from(#from_variable: #src) -> Result<Self, Self::Error> {
|
||||
fn try_from(#from_variable: #src) -> #result_type<Self, Self::Error> {
|
||||
#constructor
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,6 +46,7 @@ pub(crate) const UNVERSIONIZE_ERROR_NAME: &str = crate_full_path!("UnversionizeE
|
||||
pub(crate) const SERIALIZE_TRAIT_NAME: &str = "::serde::Serialize";
|
||||
pub(crate) const DESERIALIZE_TRAIT_NAME: &str = "::serde::Deserialize";
|
||||
pub(crate) const DESERIALIZE_OWNED_TRAIT_NAME: &str = "::serde::de::DeserializeOwned";
|
||||
pub(crate) const TRY_FROM_TRAIT_NAME: &str = "::core::convert::TryFrom";
|
||||
pub(crate) const FROM_TRAIT_NAME: &str = "::core::convert::From";
|
||||
pub(crate) const TRY_INTO_TRAIT_NAME: &str = "::core::convert::TryInto";
|
||||
pub(crate) const INTO_TRAIT_NAME: &str = "::core::convert::Into";
|
||||
@@ -53,6 +54,8 @@ pub(crate) const ERROR_TRAIT_NAME: &str = "::core::error::Error";
|
||||
pub(crate) const SYNC_TRAIT_NAME: &str = "::core::marker::Sync";
|
||||
pub(crate) const SEND_TRAIT_NAME: &str = "::core::marker::Send";
|
||||
pub(crate) const DEFAULT_TRAIT_NAME: &str = "::core::default::Default";
|
||||
pub(crate) const RESULT_TYPE_NAME: &str = "::core::result::Result";
|
||||
pub(crate) const VEC_TYPE_NAME: &str = "::std::vec::Vec";
|
||||
pub(crate) const STATIC_LIFETIME_NAME: &str = "'static";
|
||||
|
||||
use associated::AssociatingTrait;
|
||||
@@ -240,6 +243,9 @@ pub fn derive_versionize(input: TokenStream) -> TokenStream {
|
||||
let unversionize_body = implementor.unversionize_method_body(&unversionize_arg_name);
|
||||
let unversionize_error: Path = parse_const_str(UNVERSIONIZE_ERROR_NAME);
|
||||
|
||||
let result_type: Path = parse_const_str(RESULT_TYPE_NAME);
|
||||
let vec_type: Path = parse_const_str(VEC_TYPE_NAME);
|
||||
|
||||
quote! {
|
||||
#version_trait_impl
|
||||
|
||||
@@ -269,7 +275,7 @@ pub fn derive_versionize(input: TokenStream) -> TokenStream {
|
||||
impl #trait_impl_generics #unversionize_trait for #input_ident #ty_generics
|
||||
#unversionize_trait_where_clause
|
||||
{
|
||||
fn unversionize(#unversionize_arg_name: Self::VersionedOwned) -> Result<Self, #unversionize_error> {
|
||||
fn unversionize(#unversionize_arg_name: Self::VersionedOwned) -> #result_type<Self, #unversionize_error> {
|
||||
#unversionize_body
|
||||
}
|
||||
}
|
||||
@@ -278,7 +284,7 @@ pub fn derive_versionize(input: TokenStream) -> TokenStream {
|
||||
impl #trait_impl_generics #versionize_slice_trait for #input_ident #ty_generics
|
||||
#versionize_trait_where_clause
|
||||
{
|
||||
type VersionedSlice<#lifetime> = Vec<<Self as #versionize_trait>::Versioned<#lifetime>> #versioned_type_where_clause;
|
||||
type VersionedSlice<#lifetime> = #vec_type<<Self as #versionize_trait>::Versioned<#lifetime>> #versioned_type_where_clause;
|
||||
|
||||
fn versionize_slice(slice: &[Self]) -> Self::VersionedSlice<'_> {
|
||||
slice.iter().map(|val| #versionize_trait::versionize(val)).collect()
|
||||
@@ -290,9 +296,9 @@ pub fn derive_versionize(input: TokenStream) -> TokenStream {
|
||||
#versionize_owned_trait_where_clause
|
||||
{
|
||||
|
||||
type VersionedVec = Vec<<Self as #versionize_owned_trait>::VersionedOwned> #versioned_owned_type_where_clause;
|
||||
type VersionedVec = #vec_type<<Self as #versionize_owned_trait>::VersionedOwned> #versioned_owned_type_where_clause;
|
||||
|
||||
fn versionize_vec(vec: Vec<Self>) -> Self::VersionedVec {
|
||||
fn versionize_vec(vec: #vec_type<Self>) -> Self::VersionedVec {
|
||||
vec.into_iter().map(|val| #versionize_owned_trait::versionize_owned(val)).collect()
|
||||
}
|
||||
}
|
||||
@@ -301,7 +307,7 @@ pub fn derive_versionize(input: TokenStream) -> TokenStream {
|
||||
impl #trait_impl_generics #unversionize_vec_trait for #input_ident #ty_generics
|
||||
#unversionize_trait_where_clause
|
||||
{
|
||||
fn unversionize_vec(versioned: Self::VersionedVec) -> Result<Vec<Self>, #unversionize_error> {
|
||||
fn unversionize_vec(versioned: Self::VersionedVec) -> #result_type<#vec_type<Self>, #unversionize_error> {
|
||||
versioned
|
||||
.into_iter()
|
||||
.map(|versioned| <Self as #unversionize_trait>::unversionize(versioned))
|
||||
@@ -346,6 +352,8 @@ pub fn derive_not_versioned(input: TokenStream) -> TokenStream {
|
||||
let unversionize_error: Path = parse_const_str(UNVERSIONIZE_ERROR_NAME);
|
||||
let lifetime = Lifetime::new(LIFETIME_NAME, Span::call_site());
|
||||
|
||||
let result_type: Path = parse_const_str(RESULT_TYPE_NAME);
|
||||
|
||||
quote! {
|
||||
#[automatically_derived]
|
||||
impl #impl_generics #versionize_trait for #input_ident #ty_generics #versionize_where_clause {
|
||||
@@ -367,7 +375,7 @@ pub fn derive_not_versioned(input: TokenStream) -> TokenStream {
|
||||
|
||||
#[automatically_derived]
|
||||
impl #impl_generics #unversionize_trait for #input_ident #ty_generics #versionize_owned_where_clause {
|
||||
fn unversionize(versioned: Self::VersionedOwned) -> Result<Self, #unversionize_error> {
|
||||
fn unversionize(versioned: Self::VersionedOwned) -> #result_type<Self, #unversionize_error> {
|
||||
Ok(versioned)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user