mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-11 07:38:08 -05:00
Compare commits
16 Commits
edm/tbc_on
...
hw-team/de
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
52b8e81ccb | ||
|
|
b19a7773bb | ||
|
|
0342b0466d | ||
|
|
edc9ef0026 | ||
|
|
92df46f8f2 | ||
|
|
effb7ada6d | ||
|
|
49be544297 | ||
|
|
23600eb8e1 | ||
|
|
9708cc7fe9 | ||
|
|
4cdfccb659 | ||
|
|
031c3fe34f | ||
|
|
ea99307cf5 | ||
|
|
ca2a79f1fb | ||
|
|
0a59e86675 | ||
|
|
312ce494bf | ||
|
|
5f2e7e31f1 |
@@ -66,7 +66,7 @@ jobs:
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'true' # Needed to pull lfs data
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
4
.github/workflows/aws_tfhe_fast_tests.yml
vendored
4
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -63,7 +63,7 @@ jobs:
|
||||
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -171,7 +171,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
4
.github/workflows/aws_tfhe_integer_tests.yml
vendored
4
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
steps.changed-files.outputs.integer_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -112,7 +112,7 @@ jobs:
|
||||
timeout-minutes: 480 # 8 hours
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/aws_tfhe_noise_checks.yml
vendored
2
.github/workflows/aws_tfhe_noise_checks.yml
vendored
@@ -60,7 +60,7 @@ jobs:
|
||||
timeout-minutes: 1440
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
steps.changed-files.outputs.integer_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -112,7 +112,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
4
.github/workflows/aws_tfhe_tests.yml
vendored
4
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -72,7 +72,7 @@ jobs:
|
||||
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -182,7 +182,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
2
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -64,7 +64,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
4
.github/workflows/benchmark_cpu_common.yml
vendored
4
.github/workflows/benchmark_cpu_common.yml
vendored
@@ -149,7 +149,7 @@ jobs:
|
||||
params_type: ${{ fromJSON(needs.prepare-matrix.outputs.params_type) }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -229,7 +229,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
4
.github/workflows/benchmark_ct_key_sizes.yml
vendored
4
.github/workflows/benchmark_ct_key_sizes.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
21
.github/workflows/benchmark_documentation.yml
vendored
21
.github/workflows/benchmark_documentation.yml
vendored
@@ -8,8 +8,13 @@ on:
|
||||
description: "Run CPU benchmarks"
|
||||
type: boolean
|
||||
default: true
|
||||
run-gpu-benchmarks:
|
||||
description: "Run GPU benchmarks"
|
||||
# GPU benchmarks are split because of resource scarcity.
|
||||
run-gpu-integer-benchmarks:
|
||||
description: "Run GPU integer benchmarks"
|
||||
type: boolean
|
||||
default: true
|
||||
run-gpu-core-crypto-benchmarks:
|
||||
description: "Run GPU core-crypto benchmarks"
|
||||
type: boolean
|
||||
default: true
|
||||
run-hpu-benchmarks:
|
||||
@@ -52,7 +57,7 @@ jobs:
|
||||
run-benchmarks-gpu-integer:
|
||||
name: benchmark_documentation/run-benchmarks-gpu-integer
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
if: inputs.run-gpu-benchmarks
|
||||
if: inputs.run-gpu-integer-benchmarks
|
||||
with:
|
||||
profile: multi-h100-sxm5
|
||||
hardware_name: n3-H100-SXM5x8
|
||||
@@ -113,7 +118,7 @@ jobs:
|
||||
run-benchmarks-gpu-core-crypto:
|
||||
name: benchmark_documentation/run-benchmarks-gpu-core-crypto
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
if: inputs.run-gpu-benchmarks
|
||||
if: inputs.run-gpu-core-crypto-benchmarks
|
||||
with:
|
||||
profile: multi-h100-sxm5
|
||||
hardware_name: n3-H100-SXM5x8
|
||||
@@ -133,7 +138,7 @@ jobs:
|
||||
generate-svgs-with-benchmarks-run:
|
||||
name: benchmark-documentation/generate-svgs-with-benchmarks-run
|
||||
if: ${{ always() &&
|
||||
(inputs.run-cpu-benchmarks || inputs.run-gpu-benchmarks ||inputs.run-hpu-benchmarks) &&
|
||||
(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks ||inputs.run-hpu-benchmarks) &&
|
||||
inputs.generate-svgs }}
|
||||
needs: [
|
||||
run-benchmarks-cpu-integer, run-benchmarks-gpu-integer, run-benchmarks-hpu-integer,
|
||||
@@ -143,7 +148,7 @@ jobs:
|
||||
with:
|
||||
time_span_days: 5
|
||||
generate-cpu-svgs: ${{ inputs.run-cpu-benchmarks }}
|
||||
generate-gpu-svgs: ${{ inputs.run-gpu-benchmarks }}
|
||||
generate-gpu-svgs: ${{ inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks }}
|
||||
generate-hpu-svgs: ${{ inputs.run-hpu-benchmarks }}
|
||||
secrets:
|
||||
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
|
||||
@@ -152,7 +157,7 @@ jobs:
|
||||
|
||||
generate-svgs-without-benchmarks-run:
|
||||
name: benchmark-documentation/generate-svgs-without-benchmarks-run
|
||||
if: ${{ !(inputs.run-cpu-benchmarks || inputs.run-gpu-benchmarks || inputs.run-hpu-benchmarks) &&
|
||||
if: ${{ !(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-hpu-benchmarks) &&
|
||||
inputs.generate-svgs }}
|
||||
uses: ./.github/workflows/generate_svgs.yml
|
||||
with:
|
||||
@@ -175,7 +180,7 @@ jobs:
|
||||
PATH_TO_DOC_ASSETS: tfhe/docs/.gitbook/assets
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
|
||||
8
.github/workflows/benchmark_gpu_4090.yml
vendored
8
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -123,7 +123,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
6
.github/workflows/benchmark_gpu_common.yml
vendored
6
.github/workflows/benchmark_gpu_common.yml
vendored
@@ -175,7 +175,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -209,7 +209,7 @@ jobs:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -287,7 +287,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
git lfs install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
path: tfhe-rs
|
||||
persist-credentials: false
|
||||
@@ -141,7 +141,7 @@ jobs:
|
||||
ls
|
||||
|
||||
- name: Checkout fhevm
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
repository: zama-ai/fhevm
|
||||
persist-credentials: 'false'
|
||||
@@ -223,7 +223,7 @@ jobs:
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Use Node.js
|
||||
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0
|
||||
with:
|
||||
node-version: 20.x
|
||||
|
||||
@@ -299,7 +299,7 @@ jobs:
|
||||
path: fhevm/$${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
4
.github/workflows/benchmark_hpu_common.yml
vendored
4
.github/workflows/benchmark_hpu_common.yml
vendored
@@ -126,7 +126,7 @@ jobs:
|
||||
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
10
.github/workflows/benchmark_perf_regression.yml
vendored
10
.github/workflows/benchmark_perf_regression.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
pull-requests: write # Needed to write a comment in a pull-request
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
@@ -164,7 +164,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
command: ${{ fromJson(needs.prepare-benchmarks.outputs.commands) }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0 # Needed to get commit hash
|
||||
persist-credentials: 'false'
|
||||
@@ -245,7 +245,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -305,7 +305,7 @@ jobs:
|
||||
REF_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
4
.github/workflows/benchmark_tfhe_fft.yml
vendored
4
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -55,7 +55,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -102,7 +102,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
4
.github/workflows/benchmark_tfhe_ntt.yml
vendored
4
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -55,7 +55,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -102,7 +102,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
6
.github/workflows/benchmark_wasm_client.yml
vendored
6
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -91,7 +91,7 @@ jobs:
|
||||
browser: [ chrome, firefox ]
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -175,7 +175,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
2
.github/workflows/cargo_audit.yml
vendored
2
.github/workflows/cargo_audit.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
name: cargo_audit/audit
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/cargo_build.yml
vendored
2
.github/workflows/cargo_build.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
outputs:
|
||||
matrix_command: ${{ steps.set-pcc-commands-matrix.outputs.commands }}
|
||||
steps:
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/cargo_build_common.yml
vendored
2
.github/workflows/cargo_build_common.yml
vendored
@@ -140,7 +140,7 @@ jobs:
|
||||
result: ${{ steps.set_builds_result.outputs.result }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/cargo_build_tfhe_fft.yml
vendored
2
.github/workflows/cargo_build_tfhe_fft.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
2
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
6
.github/workflows/cargo_test_fft.yml
vendored
6
.github/workflows/cargo_test_fft.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
fft_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.fft_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
runner_type: [ ubuntu-latest, macos-latest, windows-latest ]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -92,7 +92,7 @@ jobs:
|
||||
if: needs.should-run.outputs.fft_test == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
4
.github/workflows/cargo_test_ntt.yml
vendored
4
.github/workflows/cargo_test_ntt.yml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
ntt_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ntt_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: "false"
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
os: ${{fromJson(needs.setup-instance.outputs.matrix_os)}}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/ci_lint.yml
vendored
2
.github/workflows/ci_lint.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/code_coverage.yml
vendored
2
.github/workflows/code_coverage.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
timeout-minutes: 5760 # 4 days
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
4
.github/workflows/generate_svg_common.yml
vendored
4
.github/workflows/generate_svg_common.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
python3 -m pip install -r ci/data_extractor/requirements.txt
|
||||
python3 ci/data_extractor/src/data_extractor.py "${OUTPUT_FILENAME}" \
|
||||
--generate-svg \
|
||||
--backend-comparison\
|
||||
--backends-comparison \
|
||||
--time-span-days "${TIME_SPAN}"
|
||||
env:
|
||||
OUTPUT_FILENAME: ${{ inputs.output_filename }}
|
||||
|
||||
2
.github/workflows/gpu_4090_tests.yml
vendored
2
.github/workflows/gpu_4090_tests.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -23,8 +23,8 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# every 3 months
|
||||
- cron: "0 0 1 */3 *"
|
||||
# every month
|
||||
- cron: "0 0 1 * *"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: gpu-test
|
||||
profile: single-h100
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
@@ -79,7 +79,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
4
.github/workflows/gpu_fast_h100_tests.yml
vendored
4
.github/workflows/gpu_fast_h100_tests.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -129,7 +129,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
4
.github/workflows/gpu_fast_tests.yml
vendored
4
.github/workflows/gpu_fast_tests.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -114,7 +114,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/gpu_full_h100_tests.yml
vendored
2
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -68,7 +68,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -116,7 +116,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72 hours
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/gpu_memory_sanitizer.yml
vendored
2
.github/workflows/gpu_memory_sanitizer.yml
vendored
@@ -78,7 +78,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -78,7 +78,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/gpu_pcc.yml
vendored
2
.github/workflows/gpu_pcc.yml
vendored
@@ -74,7 +74,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -116,7 +116,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -129,7 +129,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -117,7 +117,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -116,7 +116,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -129,7 +129,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -117,7 +117,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
4
.github/workflows/hpu_hlapi_tests.yml
vendored
4
.github/workflows/hpu_hlapi_tests.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
hpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.hpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/integer_long_run_tests.yml
vendored
2
.github/workflows/integer_long_run_tests.yml
vendored
@@ -53,7 +53,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72 hours
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/m1_tests.yml
vendored
2
.github/workflows/m1_tests.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
timeout-minutes: 720
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
4
.github/workflows/make_release_common.yml
vendored
4
.github/workflows/make_release_common.yml
vendored
@@ -52,7 +52,7 @@ jobs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
id-token: write # Needed for OIDC token exchange on crates.io
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
|
||||
2
.github/workflows/make_release_cuda.yml
vendored
2
.github/workflows/make_release_cuda.yml
vendored
@@ -64,7 +64,7 @@ jobs:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: "false"
|
||||
|
||||
4
.github/workflows/make_release_tfhe.yml
vendored
4
.github/workflows/make_release_tfhe.yml
vendored
@@ -68,7 +68,7 @@ jobs:
|
||||
id-token: write # also needed for OIDC token exchange on crates.io and npmjs.com
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -85,7 +85,7 @@ jobs:
|
||||
make build_web_js_api_parallel
|
||||
|
||||
- name: Authenticate on NPM
|
||||
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0
|
||||
with:
|
||||
node-version: '24'
|
||||
registry-url: 'https://registry.npmjs.org'
|
||||
|
||||
4
.github/workflows/parameters_check.yml
vendored
4
.github/workflows/parameters_check.yml
vendored
@@ -60,7 +60,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
toolchain: stable
|
||||
|
||||
- name: Checkout lattice-estimator
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
repository: malb/lattice-estimator
|
||||
path: lattice_estimator
|
||||
|
||||
2
.github/workflows/unverified_prs.yml
vendored
2
.github/workflows/unverified_prs.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
issues: read # Needed to fetch all issues
|
||||
pull-requests: write # Needed to write message and close the PR
|
||||
steps:
|
||||
- uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
|
||||
- uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # v10.1.1
|
||||
with:
|
||||
stale-pr-message: 'This PR is unverified and has been open for 2 days, it will now be closed. If you want to contribute please sign the CLA as indicated by the bot.'
|
||||
days-before-stale: 2
|
||||
|
||||
@@ -35,7 +35,8 @@ template <typename Torus> struct int_aes_lut_buffers {
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, and_lambda, allocate_gpu_memory);
|
||||
auto active_streams_and_lut = streams.active_gpu_subset(
|
||||
SBOX_MAX_AND_GATES * num_aes_inputs * sbox_parallelism);
|
||||
SBOX_MAX_AND_GATES * num_aes_inputs * sbox_parallelism,
|
||||
params.pbs_type);
|
||||
this->and_lut->broadcast_lut(active_streams_and_lut);
|
||||
this->and_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
|
||||
|
||||
@@ -50,8 +51,8 @@ template <typename Torus> struct int_aes_lut_buffers {
|
||||
this->flush_lut->get_degree(0), this->flush_lut->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, flush_lambda, allocate_gpu_memory);
|
||||
auto active_streams_flush_lut =
|
||||
streams.active_gpu_subset(AES_STATE_BITS * num_aes_inputs);
|
||||
auto active_streams_flush_lut = streams.active_gpu_subset(
|
||||
AES_STATE_BITS * num_aes_inputs, params.pbs_type);
|
||||
this->flush_lut->broadcast_lut(active_streams_flush_lut);
|
||||
this->flush_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
|
||||
|
||||
@@ -65,7 +66,8 @@ template <typename Torus> struct int_aes_lut_buffers {
|
||||
this->carry_lut->get_degree(0), this->carry_lut->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, carry_lambda, allocate_gpu_memory);
|
||||
auto active_streams_carry_lut = streams.active_gpu_subset(num_aes_inputs);
|
||||
auto active_streams_carry_lut =
|
||||
streams.active_gpu_subset(num_aes_inputs, params.pbs_type);
|
||||
this->carry_lut->broadcast_lut(active_streams_carry_lut);
|
||||
this->carry_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
|
||||
}
|
||||
|
||||
@@ -8,7 +8,8 @@
|
||||
|
||||
extern std::mutex m;
|
||||
extern bool p2p_enabled;
|
||||
extern const int THRESHOLD_MULTI_GPU;
|
||||
extern const int THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS;
|
||||
extern const int THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;
|
||||
|
||||
extern "C" {
|
||||
int32_t cuda_setup_multi_gpu(int device_0_id);
|
||||
@@ -39,7 +40,8 @@ get_variant_element(const std::variant<std::vector<Torus>, Torus> &variant,
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t get_active_gpu_count(uint32_t num_inputs, uint32_t gpu_count);
|
||||
uint32_t get_active_gpu_count(uint32_t num_inputs, uint32_t gpu_count,
|
||||
PBS_TYPE pbs_type);
|
||||
|
||||
int get_num_inputs_on_gpu(int total_num_inputs, int gpu_index, int gpu_count);
|
||||
|
||||
@@ -73,9 +75,10 @@ public:
|
||||
|
||||
// Returns a subset of this set as an active subset. An active subset is one
|
||||
// that is temporarily used to perform some computation
|
||||
CudaStreams active_gpu_subset(int num_radix_blocks) {
|
||||
return CudaStreams(_streams, _gpu_indexes,
|
||||
get_active_gpu_count(num_radix_blocks, _gpu_count));
|
||||
CudaStreams active_gpu_subset(int num_radix_blocks, PBS_TYPE pbs_type) {
|
||||
return CudaStreams(
|
||||
_streams, _gpu_indexes,
|
||||
get_active_gpu_count(num_radix_blocks, _gpu_count, pbs_type));
|
||||
}
|
||||
|
||||
// Returns a CudaStreams struct containing only the ith stream
|
||||
|
||||
@@ -20,7 +20,8 @@ template <typename Torus> struct boolean_bitop_buffer {
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->op = op;
|
||||
this->params = params;
|
||||
auto active_streams = streams.active_gpu_subset(lwe_ciphertext_count);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(lwe_ciphertext_count, params.pbs_type);
|
||||
this->unchecked = is_unchecked;
|
||||
switch (op) {
|
||||
case BITAND:
|
||||
@@ -119,7 +120,8 @@ template <typename Torus> struct int_bitop_buffer {
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->op = op;
|
||||
this->params = params;
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
switch (op) {
|
||||
case BITAND:
|
||||
case BITOR:
|
||||
@@ -216,7 +218,8 @@ template <typename Torus> struct boolean_bitnot_buffer {
|
||||
message_extract_lut->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
lut_f_message_extract, gpu_memory_allocated);
|
||||
auto active_streams = streams.active_gpu_subset(lwe_ciphertext_count);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(lwe_ciphertext_count, params.pbs_type);
|
||||
message_extract_lut->broadcast_lut(active_streams);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,7 +39,8 @@ template <typename Torus> struct int_extend_radix_with_sign_msb_buffer {
|
||||
},
|
||||
allocate_gpu_memory);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
lut->broadcast_lut(active_streams);
|
||||
|
||||
this->last_block = new CudaRadixCiphertextFFI;
|
||||
|
||||
@@ -14,7 +14,8 @@ template <typename Torus> struct int_zero_out_if_buffer {
|
||||
uint64_t &size_tracker) {
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->params = params;
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
|
||||
tmp = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
@@ -114,9 +115,11 @@ template <typename Torus> struct int_cmux_buffer {
|
||||
predicate_lut->get_lut_indexes(0, 0), h_lut_indexes,
|
||||
2 * num_radix_blocks * sizeof(Torus), streams.stream(0),
|
||||
streams.gpu_index(0), allocate_gpu_memory);
|
||||
auto active_streams_pred = streams.active_gpu_subset(2 * num_radix_blocks);
|
||||
auto active_streams_pred =
|
||||
streams.active_gpu_subset(2 * num_radix_blocks, params.pbs_type);
|
||||
predicate_lut->broadcast_lut(active_streams_pred);
|
||||
auto active_streams_msg = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams_msg =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
message_extract_lut->broadcast_lut(active_streams_msg);
|
||||
}
|
||||
|
||||
|
||||
@@ -52,7 +52,8 @@ template <typename Torus> struct int_are_all_block_true_buffer {
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, is_max_value_f, gpu_memory_allocated);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(max_chunks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(max_chunks, params.pbs_type);
|
||||
is_max_value->broadcast_lut(active_streams);
|
||||
}
|
||||
|
||||
@@ -108,7 +109,8 @@ template <typename Torus> struct int_comparison_eq_buffer {
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, is_non_zero_lut_f, gpu_memory_allocated);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
is_non_zero_lut->broadcast_lut(active_streams);
|
||||
|
||||
// Scalar may have up to num_radix_blocks blocks
|
||||
@@ -238,7 +240,8 @@ template <typename Torus> struct int_tree_sign_reduction_buffer {
|
||||
tree_inner_leaf_lut->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
block_selector_f, gpu_memory_allocated);
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
tree_inner_leaf_lut->broadcast_lut(active_streams);
|
||||
}
|
||||
|
||||
@@ -390,7 +393,8 @@ template <typename Torus> struct int_comparison_buffer {
|
||||
this->op = op;
|
||||
this->is_signed = is_signed;
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
|
||||
identity_lut_f = [](Torus x) -> Torus { return x; };
|
||||
|
||||
@@ -523,7 +527,7 @@ template <typename Torus> struct int_comparison_buffer {
|
||||
signed_lut->get_degree(0), signed_lut->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, signed_lut_f, gpu_memory_allocated);
|
||||
auto active_streams = streams.active_gpu_subset(1);
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
signed_lut->broadcast_lut(active_streams);
|
||||
}
|
||||
preallocated_h_lut = (Torus *)malloc(
|
||||
|
||||
@@ -116,7 +116,8 @@ template <typename Torus> struct int_decompression {
|
||||
effective_compression_carry_modulus,
|
||||
encryption_params.message_modulus, encryption_params.carry_modulus,
|
||||
decompression_rescale_f, gpu_memory_allocated);
|
||||
auto active_streams = streams.active_gpu_subset(num_blocks_to_decompress);
|
||||
auto active_streams = streams.active_gpu_subset(
|
||||
num_blocks_to_decompress, decompression_rescale_lut->params.pbs_type);
|
||||
decompression_rescale_lut->broadcast_lut(active_streams);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -356,7 +356,8 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
|
||||
luts[j]->get_degree(0), luts[j]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, lut_f_message_extract, gpu_memory_allocated);
|
||||
auto active_streams = streams.active_gpu_subset(num_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
luts[j]->broadcast_lut(active_streams);
|
||||
}
|
||||
}
|
||||
@@ -1012,7 +1013,7 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
masking_luts_1[i]->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
lut_f_masking, gpu_memory_allocated);
|
||||
auto active_streams_1 = streams.active_gpu_subset(1);
|
||||
auto active_streams_1 = streams.active_gpu_subset(1, params.pbs_type);
|
||||
masking_luts_1[i]->broadcast_lut(active_streams_1);
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
@@ -1021,7 +1022,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
masking_luts_2[i]->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
lut_f_masking, gpu_memory_allocated);
|
||||
auto active_streams_2 = streams.active_gpu_subset(num_blocks);
|
||||
auto active_streams_2 =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
masking_luts_2[i]->broadcast_lut(active_streams_2);
|
||||
}
|
||||
|
||||
@@ -1040,7 +1042,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
|
||||
int_radix_lut<Torus> *luts[2] = {message_extract_lut_1,
|
||||
message_extract_lut_2};
|
||||
auto active_streams = streams.active_gpu_subset(num_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
for (int j = 0; j < 2; j++) {
|
||||
generate_device_accumulator<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), luts[j]->get_lut(0, 0),
|
||||
@@ -1128,7 +1131,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
|
||||
// merge_overflow_flags_luts
|
||||
merge_overflow_flags_luts = new int_radix_lut<Torus> *[num_bits_in_message];
|
||||
auto active_gpu_count_for_bits = streams.active_gpu_subset(1);
|
||||
auto active_gpu_count_for_bits =
|
||||
streams.active_gpu_subset(1, params.pbs_type);
|
||||
for (int i = 0; i < num_bits_in_message; i++) {
|
||||
auto lut_f_bit = [i](Torus x, Torus y) -> Torus {
|
||||
return (x == 0 && y == 0) << i;
|
||||
@@ -1152,7 +1156,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
uint32_t num_blocks, bool allocate_gpu_memory,
|
||||
uint64_t &size_tracker) {
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
auto active_streams = streams.active_gpu_subset(2 * num_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(2 * num_blocks, params.pbs_type);
|
||||
this->params = params;
|
||||
|
||||
if (params.message_modulus == 4 && params.carry_modulus == 4 &&
|
||||
@@ -1473,7 +1478,8 @@ template <typename Torus> struct int_div_rem_memory {
|
||||
bool allocate_gpu_memory, uint64_t &size_tracker) {
|
||||
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->active_streams = streams.active_gpu_subset(num_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
this->params = params;
|
||||
this->is_signed = is_signed;
|
||||
|
||||
@@ -1559,7 +1565,7 @@ template <typename Torus> struct int_div_rem_memory {
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
f_compare_extracted_signed_bits, gpu_memory_allocated);
|
||||
auto active_gpu_count_cmp =
|
||||
streams.active_gpu_subset(1); // only 1 block needed
|
||||
streams.active_gpu_subset(1, params.pbs_type); // only 1 block needed
|
||||
compare_signed_bits_lut->broadcast_lut(active_gpu_count_cmp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,7 +20,8 @@ template <typename Torus> struct int_prepare_count_of_consecutive_bits_buffer {
|
||||
this->allocate_gpu_memory = allocate_gpu_memory;
|
||||
this->direction = direction;
|
||||
this->bit_value = bit_value;
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
this->univ_lut_mem =
|
||||
new int_radix_lut<Torus>(streams, params, 1, num_radix_blocks,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
@@ -246,7 +247,8 @@ template <typename Torus> struct int_ilog2_buffer {
|
||||
params.glwe_dimension, params.polynomial_size,
|
||||
params.message_modulus, params.carry_modulus,
|
||||
lut_message_lambda, allocate_gpu_memory);
|
||||
auto active_streams = streams.active_gpu_subset(counter_num_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(counter_num_blocks, params.pbs_type);
|
||||
lut_message_not->broadcast_lut(active_streams);
|
||||
|
||||
this->lut_carry_not =
|
||||
|
||||
@@ -371,7 +371,8 @@ struct int_radix_lut_custom_input_output {
|
||||
this->num_input_blocks = num_input_blocks;
|
||||
this->gpu_memory_allocated = allocate_gpu_memory;
|
||||
|
||||
this->active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
}
|
||||
|
||||
void setup_degrees() {
|
||||
@@ -382,14 +383,18 @@ struct int_radix_lut_custom_input_output {
|
||||
|
||||
void allocate_pbs_buffers(int_radix_params params, uint32_t num_radix_blocks,
|
||||
bool allocate_gpu_memory, uint64_t &size_tracker) {
|
||||
|
||||
int threshold = (params.pbs_type == PBS_TYPE::MULTI_BIT)
|
||||
? THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS
|
||||
: THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;
|
||||
|
||||
for (uint i = 0; i < active_streams.count(); i++) {
|
||||
cuda_set_device(active_streams.gpu_index(i));
|
||||
int8_t *gpu_pbs_buffer;
|
||||
auto num_blocks_on_gpu =
|
||||
std::min((int)num_radix_blocks,
|
||||
std::max(THRESHOLD_MULTI_GPU,
|
||||
get_num_inputs_on_gpu(num_radix_blocks, i,
|
||||
active_streams.count())));
|
||||
auto num_blocks_on_gpu = std::min(
|
||||
(int)num_radix_blocks,
|
||||
std::max(threshold, get_num_inputs_on_gpu(num_radix_blocks, i,
|
||||
active_streams.count())));
|
||||
|
||||
uint64_t size = 0;
|
||||
execute_scratch_pbs<OutputTorus>(
|
||||
@@ -424,18 +429,22 @@ struct int_radix_lut_custom_input_output {
|
||||
/// back to the original indexing
|
||||
multi_gpu_alloc_lwe_async(active_streams, lwe_array_in_vec,
|
||||
num_radix_blocks, params.big_lwe_dimension + 1,
|
||||
size_tracker, allocate_gpu_memory);
|
||||
size_tracker, params.pbs_type,
|
||||
allocate_gpu_memory);
|
||||
multi_gpu_alloc_lwe_async(active_streams, lwe_after_ks_vec,
|
||||
num_radix_blocks, params.small_lwe_dimension + 1,
|
||||
size_tracker, allocate_gpu_memory);
|
||||
size_tracker, params.pbs_type,
|
||||
allocate_gpu_memory);
|
||||
if (num_many_lut > 1) {
|
||||
multi_gpu_alloc_lwe_many_lut_output_async(
|
||||
active_streams, lwe_after_pbs_vec, num_radix_blocks, num_many_lut,
|
||||
params.big_lwe_dimension + 1, size_tracker, allocate_gpu_memory);
|
||||
params.big_lwe_dimension + 1, size_tracker, params.pbs_type,
|
||||
allocate_gpu_memory);
|
||||
} else {
|
||||
multi_gpu_alloc_lwe_async(active_streams, lwe_after_pbs_vec,
|
||||
num_radix_blocks, params.big_lwe_dimension + 1,
|
||||
size_tracker, allocate_gpu_memory);
|
||||
size_tracker, params.pbs_type,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
multi_gpu_alloc_array_async(active_streams, lwe_trivial_indexes_vec,
|
||||
num_radix_blocks, size_tracker,
|
||||
@@ -451,12 +460,14 @@ struct int_radix_lut_custom_input_output {
|
||||
}
|
||||
|
||||
void setup_gemm_batch_ks_temp_buffers(uint64_t &size_tracker) {
|
||||
int threshold = (params.pbs_type == PBS_TYPE::MULTI_BIT)
|
||||
? THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS
|
||||
: THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;
|
||||
|
||||
auto inputs_on_gpu =
|
||||
std::min((int)num_input_blocks,
|
||||
std::max(THRESHOLD_MULTI_GPU,
|
||||
get_num_inputs_on_gpu(num_input_blocks, 0,
|
||||
active_streams.count())));
|
||||
auto inputs_on_gpu = std::min(
|
||||
(int)num_input_blocks,
|
||||
std::max(threshold, get_num_inputs_on_gpu(num_input_blocks, 0,
|
||||
active_streams.count())));
|
||||
|
||||
if (inputs_on_gpu >= get_threshold_ks_gemm()) {
|
||||
for (auto i = 0; i < active_streams.count(); ++i) {
|
||||
@@ -798,16 +809,20 @@ struct int_radix_lut_custom_input_output {
|
||||
void allocate_lwe_vector_for_non_trivial_indexes(
|
||||
CudaStreams streams, uint64_t max_num_radix_blocks,
|
||||
uint64_t &size_tracker, bool allocate_gpu_memory) {
|
||||
|
||||
int threshold = (params.pbs_type == PBS_TYPE::MULTI_BIT)
|
||||
? THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS
|
||||
: THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;
|
||||
|
||||
// We need to create the auxiliary array only in GPU 0
|
||||
if (active_streams.count() > 1) {
|
||||
lwe_aligned_vec.resize(active_streams.count());
|
||||
for (uint i = 0; i < active_streams.count(); i++) {
|
||||
uint64_t size_tracker_on_array_i = 0;
|
||||
auto inputs_on_gpu =
|
||||
std::min((int)max_num_radix_blocks,
|
||||
std::max(THRESHOLD_MULTI_GPU,
|
||||
get_num_inputs_on_gpu(max_num_radix_blocks, i,
|
||||
active_streams.count())));
|
||||
auto inputs_on_gpu = std::min(
|
||||
(int)max_num_radix_blocks,
|
||||
std::max(threshold, get_num_inputs_on_gpu(max_num_radix_blocks, i,
|
||||
active_streams.count())));
|
||||
InputTorus *d_array =
|
||||
(InputTorus *)cuda_malloc_with_size_tracking_async(
|
||||
inputs_on_gpu * (params.big_lwe_dimension + 1) *
|
||||
@@ -998,8 +1013,8 @@ template <typename Torus> struct int_bit_extract_luts_buffer {
|
||||
num_radix_blocks * bits_per_block * sizeof(Torus), streams.stream(0),
|
||||
streams.gpu_index(0), allocate_gpu_memory);
|
||||
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(bits_per_block * num_radix_blocks);
|
||||
auto active_streams = streams.active_gpu_subset(
|
||||
bits_per_block * num_radix_blocks, params.pbs_type);
|
||||
lut->broadcast_lut(active_streams);
|
||||
|
||||
/**
|
||||
@@ -1266,7 +1281,8 @@ template <typename Torus> struct int_sum_ciphertexts_vec_memory {
|
||||
luts_message_carry->get_max_degree(1), params.glwe_dimension,
|
||||
params.polynomial_size, message_modulus, params.carry_modulus,
|
||||
lut_f_carry, gpu_memory_allocated);
|
||||
auto active_gpu_count_mc = streams.active_gpu_subset(pbs_count);
|
||||
auto active_gpu_count_mc =
|
||||
streams.active_gpu_subset(pbs_count, params.pbs_type);
|
||||
luts_message_carry->broadcast_lut(active_gpu_count_mc);
|
||||
}
|
||||
}
|
||||
@@ -1436,7 +1452,8 @@ template <typename Torus> struct int_seq_group_prop_memory {
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
seq_lut_indexes, h_seq_lut_indexes, num_seq_luts * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
|
||||
auto active_streams = streams.active_gpu_subset(num_seq_luts);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_seq_luts, params.pbs_type);
|
||||
lut_sequential_algorithm->broadcast_lut(active_streams);
|
||||
free(h_seq_lut_indexes);
|
||||
};
|
||||
@@ -1490,7 +1507,8 @@ template <typename Torus> struct int_hs_group_prop_memory {
|
||||
lut_hillis_steele->get_max_degree(0), glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, f_lut_hillis_steele,
|
||||
gpu_memory_allocated);
|
||||
auto active_streams = streams.active_gpu_subset(num_groups);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_groups, params.pbs_type);
|
||||
lut_hillis_steele->broadcast_lut(active_streams);
|
||||
};
|
||||
void release(CudaStreams streams) {
|
||||
@@ -1667,7 +1685,8 @@ template <typename Torus> struct int_shifted_blocks_and_states_memory {
|
||||
lut_indexes, h_lut_indexes, lut_indexes_size, streams.stream(0),
|
||||
streams.gpu_index(0), allocate_gpu_memory);
|
||||
// Do I need to do something else for the multi-gpu?
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
luts_array_first_step->broadcast_lut(active_streams);
|
||||
};
|
||||
void release(CudaStreams streams) {
|
||||
@@ -1932,7 +1951,8 @@ template <typename Torus> struct int_prop_simu_group_carries_memory {
|
||||
scalar_array_cum_sum, h_scalar_array_cum_sum,
|
||||
num_radix_blocks * sizeof(Torus), streams.stream(0),
|
||||
streams.gpu_index(0), allocate_gpu_memory);
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
luts_array_second_step->broadcast_lut(active_streams);
|
||||
|
||||
if (use_sequential_algorithm_to_resolve_group_carries) {
|
||||
@@ -1957,7 +1977,8 @@ template <typename Torus> struct int_prop_simu_group_carries_memory {
|
||||
cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
|
||||
lut_indexes, new_lut_indexes, new_num_blocks * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), gpu_memory_allocated);
|
||||
auto new_active_streams = streams.active_gpu_subset(new_num_blocks);
|
||||
auto new_active_streams = streams.active_gpu_subset(
|
||||
new_num_blocks, luts_array_second_step->params.pbs_type);
|
||||
// We just need to update the lut indexes so we use false here
|
||||
luts_array_second_step->broadcast_lut(new_active_streams, false);
|
||||
|
||||
@@ -2124,7 +2145,7 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
polynomial_size, message_modulus, carry_modulus, f_overflow_fp,
|
||||
gpu_memory_allocated);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(1);
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
lut_overflow_flag_prep->broadcast_lut(active_streams);
|
||||
}
|
||||
|
||||
@@ -2196,7 +2217,8 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
(num_radix_blocks + 1) * sizeof(Torus), streams.stream(0),
|
||||
streams.gpu_index(0), allocate_gpu_memory);
|
||||
}
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks + 1);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks + 1, params.pbs_type);
|
||||
lut_message_extract->broadcast_lut(active_streams);
|
||||
};
|
||||
|
||||
@@ -2393,7 +2415,8 @@ template <typename Torus> struct int_shifted_blocks_and_borrow_states_memory {
|
||||
lut_indexes, h_lut_indexes, lut_indexes_size, streams.stream(0),
|
||||
streams.gpu_index(0), allocate_gpu_memory);
|
||||
// Do I need to do something else for the multi-gpu?
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
luts_array_first_step->broadcast_lut(active_streams);
|
||||
};
|
||||
|
||||
@@ -2404,7 +2427,8 @@ template <typename Torus> struct int_shifted_blocks_and_borrow_states_memory {
|
||||
cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
|
||||
lut_indexes, new_lut_indexes, new_num_blocks * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), gpu_memory_allocated);
|
||||
auto new_active_streams = streams.active_gpu_subset(new_num_blocks);
|
||||
auto new_active_streams = streams.active_gpu_subset(
|
||||
new_num_blocks, luts_array_first_step->params.pbs_type);
|
||||
// We just need to update the lut indexes so we use false here
|
||||
luts_array_first_step->broadcast_lut(new_active_streams, false);
|
||||
}
|
||||
@@ -2499,7 +2523,8 @@ template <typename Torus> struct int_borrow_prop_memory {
|
||||
lut_message_extract->get_max_degree(0), glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, f_message_extract,
|
||||
gpu_memory_allocated);
|
||||
active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
|
||||
lut_message_extract->broadcast_lut(active_streams);
|
||||
|
||||
@@ -2520,7 +2545,8 @@ template <typename Torus> struct int_borrow_prop_memory {
|
||||
lut_borrow_flag->broadcast_lut(active_streams);
|
||||
}
|
||||
|
||||
active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
internal_streams.create_internal_cuda_streams_on_same_gpus(active_streams,
|
||||
2);
|
||||
};
|
||||
|
||||
@@ -45,7 +45,8 @@ template <typename Torus> struct int_mul_memory {
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
zero_out_predicate_lut_f, gpu_memory_allocated);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
zero_out_predicate_lut->broadcast_lut(active_streams);
|
||||
|
||||
zero_out_mem = new int_zero_out_if_buffer<Torus>(
|
||||
@@ -122,7 +123,8 @@ template <typename Torus> struct int_mul_memory {
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
luts_array->get_lut_indexes(0, lsb_vector_block_count), 1,
|
||||
msb_vector_block_count);
|
||||
auto active_streams = streams.active_gpu_subset(total_block_count);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(total_block_count, params.pbs_type);
|
||||
luts_array->broadcast_lut(active_streams);
|
||||
// create memory object for sum ciphertexts
|
||||
sum_ciphertexts_mem = new int_sum_ciphertexts_vec_memory<Torus>(
|
||||
|
||||
@@ -126,7 +126,8 @@ template <typename Torus> struct int_grouped_oprf_memory {
|
||||
luts->get_lut_indexes(0, 0), this->h_lut_indexes,
|
||||
num_blocks_to_process * sizeof(Torus), streams.stream(0),
|
||||
streams.gpu_index(0), allocate_gpu_memory);
|
||||
auto active_streams = streams.active_gpu_subset(num_blocks_to_process);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_blocks_to_process, params.pbs_type);
|
||||
luts->broadcast_lut(active_streams);
|
||||
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
|
||||
@@ -91,7 +91,8 @@ template <typename Torus> struct int_logical_scalar_shift_buffer {
|
||||
cur_lut_bivariate->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
shift_lut_f, gpu_memory_allocated);
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
cur_lut_bivariate->broadcast_lut(active_streams);
|
||||
|
||||
lut_buffers_bivariate.push_back(cur_lut_bivariate);
|
||||
@@ -177,7 +178,8 @@ template <typename Torus> struct int_logical_scalar_shift_buffer {
|
||||
cur_lut_bivariate->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
shift_lut_f, gpu_memory_allocated);
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
cur_lut_bivariate->broadcast_lut(active_streams);
|
||||
|
||||
lut_buffers_bivariate.push_back(cur_lut_bivariate);
|
||||
@@ -220,7 +222,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
|
||||
uint64_t &size_tracker) {
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(1);
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
// In the arithmetic shift, a PBS has to be applied to the last rotated
|
||||
// block twice: once to shift it, once to compute the padding block to be
|
||||
// copied onto all blocks to the left of the last rotated block
|
||||
@@ -276,7 +278,8 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
|
||||
shift_last_block_lut_univariate->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, last_block_lut_f, gpu_memory_allocated);
|
||||
auto active_streams_shift_last = streams.active_gpu_subset(1);
|
||||
auto active_streams_shift_last =
|
||||
streams.active_gpu_subset(1, params.pbs_type);
|
||||
shift_last_block_lut_univariate->broadcast_lut(active_streams_shift_last);
|
||||
|
||||
lut_buffers_univariate.push_back(shift_last_block_lut_univariate);
|
||||
@@ -302,7 +305,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
|
||||
padding_block_lut_univariate->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
padding_block_lut_f, gpu_memory_allocated);
|
||||
// auto active_streams = streams.active_gpu_subset(1);
|
||||
// auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
padding_block_lut_univariate->broadcast_lut(active_streams);
|
||||
|
||||
lut_buffers_univariate.push_back(padding_block_lut_univariate);
|
||||
@@ -344,7 +347,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
blocks_lut_f, gpu_memory_allocated);
|
||||
auto active_streams_shift_blocks =
|
||||
streams.active_gpu_subset(num_radix_blocks);
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
shift_blocks_lut_bivariate->broadcast_lut(active_streams_shift_blocks);
|
||||
|
||||
lut_buffers_bivariate.push_back(shift_blocks_lut_bivariate);
|
||||
|
||||
@@ -119,8 +119,8 @@ template <typename Torus> struct int_shift_and_rotate_buffer {
|
||||
mux_lut->get_degree(0), mux_lut->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, mux_lut_f, gpu_memory_allocated);
|
||||
auto active_gpu_count_mux =
|
||||
streams.active_gpu_subset(bits_per_block * num_radix_blocks);
|
||||
auto active_gpu_count_mux = streams.active_gpu_subset(
|
||||
bits_per_block * num_radix_blocks, params.pbs_type);
|
||||
mux_lut->broadcast_lut(active_gpu_count_mux);
|
||||
|
||||
auto cleaning_lut_f = [params](Torus x) -> Torus {
|
||||
@@ -132,7 +132,7 @@ template <typename Torus> struct int_shift_and_rotate_buffer {
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, cleaning_lut_f, gpu_memory_allocated);
|
||||
auto active_gpu_count_cleaning =
|
||||
streams.active_gpu_subset(num_radix_blocks);
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
cleaning_lut->broadcast_lut(active_gpu_count_cleaning);
|
||||
}
|
||||
|
||||
|
||||
@@ -108,7 +108,8 @@ template <typename Torus> struct int_overflowing_sub_memory {
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
f_message_acc, gpu_memory_allocated);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
luts_array->broadcast_lut(active_streams);
|
||||
luts_borrow_propagation_sum->broadcast_lut(active_streams);
|
||||
message_acc->broadcast_lut(active_streams);
|
||||
|
||||
@@ -38,7 +38,8 @@ template <typename Torus> struct int_unchecked_all_eq_slices_buffer {
|
||||
num_streams_to_use = 1;
|
||||
|
||||
this->num_streams = num_streams_to_use;
|
||||
this->active_streams = streams.active_gpu_subset(num_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
|
||||
uint32_t num_gpus = active_streams.count();
|
||||
|
||||
|
||||
@@ -40,7 +40,8 @@ template <typename Torus> struct int_equality_selectors_buffer {
|
||||
|
||||
this->num_streams = num_streams_to_use;
|
||||
|
||||
this->active_streams = streams.active_gpu_subset(num_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
|
||||
this->internal_cuda_streams.create_internal_cuda_streams_on_same_gpus(
|
||||
active_streams, num_streams_to_use);
|
||||
@@ -154,7 +155,8 @@ template <typename Torus> struct int_possible_results_buffer {
|
||||
|
||||
this->num_streams = num_streams_to_use;
|
||||
|
||||
this->active_streams = streams.active_gpu_subset(num_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
|
||||
this->internal_cuda_streams.create_internal_cuda_streams_on_same_gpus(
|
||||
active_streams, num_streams_to_use);
|
||||
@@ -207,7 +209,8 @@ template <typename Torus> struct int_possible_results_buffer {
|
||||
params.message_modulus, params.carry_modulus, fns,
|
||||
allocate_gpu_memory);
|
||||
|
||||
current_lut->broadcast_lut(streams.active_gpu_subset(1));
|
||||
current_lut->broadcast_lut(
|
||||
streams.active_gpu_subset(1, params.pbs_type));
|
||||
stream_luts[lut_count++] = current_lut;
|
||||
lut_value_start += luts_in_this_call;
|
||||
}
|
||||
@@ -282,7 +285,8 @@ template <typename Torus> struct int_aggregate_one_hot_buffer {
|
||||
|
||||
this->num_streams = num_streams_to_use;
|
||||
|
||||
this->active_streams = streams.active_gpu_subset(num_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
|
||||
this->internal_cuda_streams.create_internal_cuda_streams_on_same_gpus(
|
||||
active_streams, num_streams);
|
||||
@@ -300,7 +304,8 @@ template <typename Torus> struct int_aggregate_one_hot_buffer {
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
id_fn, allocate_gpu_memory);
|
||||
|
||||
lut->broadcast_lut(streams.active_gpu_subset(num_blocks));
|
||||
lut->broadcast_lut(
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type));
|
||||
this->stream_identity_luts[i] = lut;
|
||||
}
|
||||
|
||||
@@ -321,7 +326,7 @@ template <typename Torus> struct int_aggregate_one_hot_buffer {
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
msg_fn, allocate_gpu_memory);
|
||||
this->message_extract_lut->broadcast_lut(
|
||||
streams.active_gpu_subset(num_blocks));
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type));
|
||||
|
||||
this->carry_extract_lut = new int_radix_lut<Torus>(
|
||||
streams, params, 1, num_blocks, allocate_gpu_memory, size_tracker);
|
||||
@@ -333,7 +338,7 @@ template <typename Torus> struct int_aggregate_one_hot_buffer {
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
carry_fn, allocate_gpu_memory);
|
||||
this->carry_extract_lut->broadcast_lut(
|
||||
streams.active_gpu_subset(num_blocks));
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type));
|
||||
|
||||
this->partial_aggregated_vectors =
|
||||
new CudaRadixCiphertextFFI *[num_streams];
|
||||
@@ -628,7 +633,8 @@ template <typename Torus> struct int_unchecked_contains_buffer {
|
||||
num_streams_to_use = 1;
|
||||
|
||||
this->num_streams = num_streams_to_use;
|
||||
this->active_streams = streams.active_gpu_subset(num_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
|
||||
this->internal_cuda_streams.create_internal_cuda_streams_on_same_gpus(
|
||||
active_streams, num_streams_to_use);
|
||||
@@ -703,7 +709,8 @@ template <typename Torus> struct int_unchecked_contains_clear_buffer {
|
||||
num_streams_to_use = 1;
|
||||
|
||||
this->num_streams = num_streams_to_use;
|
||||
this->active_streams = streams.active_gpu_subset(num_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
|
||||
this->internal_cuda_streams.create_internal_cuda_streams_on_same_gpus(
|
||||
active_streams, num_streams_to_use);
|
||||
@@ -1094,7 +1101,8 @@ template <typename Torus> struct int_unchecked_first_index_of_clear_buffer {
|
||||
num_streams_to_use = 1;
|
||||
|
||||
this->num_streams = num_streams_to_use;
|
||||
this->active_streams = streams.active_gpu_subset(num_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
|
||||
this->internal_cuda_streams.create_internal_cuda_streams_on_same_gpus(
|
||||
active_streams, num_streams_to_use);
|
||||
@@ -1184,7 +1192,8 @@ template <typename Torus> struct int_unchecked_first_index_of_clear_buffer {
|
||||
this->prefix_sum_lut->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
prefix_sum_fn, allocate_gpu_memory);
|
||||
this->prefix_sum_lut->broadcast_lut(streams.active_gpu_subset(num_inputs));
|
||||
this->prefix_sum_lut->broadcast_lut(
|
||||
streams.active_gpu_subset(num_inputs, params.pbs_type));
|
||||
|
||||
auto cleanup_fn = [ALREADY_SEEN, params](Torus x) -> Torus {
|
||||
Torus val = x % params.message_modulus;
|
||||
@@ -1200,7 +1209,8 @@ template <typename Torus> struct int_unchecked_first_index_of_clear_buffer {
|
||||
this->cleanup_lut->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
cleanup_fn, allocate_gpu_memory);
|
||||
this->cleanup_lut->broadcast_lut(streams.active_gpu_subset(num_inputs));
|
||||
this->cleanup_lut->broadcast_lut(
|
||||
streams.active_gpu_subset(num_inputs, params.pbs_type));
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
@@ -1292,7 +1302,8 @@ template <typename Torus> struct int_unchecked_first_index_of_buffer {
|
||||
num_streams_to_use = 1;
|
||||
|
||||
this->num_streams = num_streams_to_use;
|
||||
this->active_streams = streams.active_gpu_subset(num_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
|
||||
this->internal_cuda_streams.create_internal_cuda_streams_on_same_gpus(
|
||||
active_streams, num_streams_to_use);
|
||||
@@ -1372,7 +1383,8 @@ template <typename Torus> struct int_unchecked_first_index_of_buffer {
|
||||
this->prefix_sum_lut->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
prefix_sum_fn, allocate_gpu_memory);
|
||||
this->prefix_sum_lut->broadcast_lut(streams.active_gpu_subset(num_inputs));
|
||||
this->prefix_sum_lut->broadcast_lut(
|
||||
streams.active_gpu_subset(num_inputs, params.pbs_type));
|
||||
|
||||
auto cleanup_fn = [ALREADY_SEEN, params](Torus x) -> Torus {
|
||||
Torus val = x % params.message_modulus;
|
||||
@@ -1388,7 +1400,8 @@ template <typename Torus> struct int_unchecked_first_index_of_buffer {
|
||||
this->cleanup_lut->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
cleanup_fn, allocate_gpu_memory);
|
||||
this->cleanup_lut->broadcast_lut(streams.active_gpu_subset(num_inputs));
|
||||
this->cleanup_lut->broadcast_lut(
|
||||
streams.active_gpu_subset(num_inputs, params.pbs_type));
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
@@ -1462,7 +1475,8 @@ template <typename Torus> struct int_unchecked_index_of_buffer {
|
||||
num_streams_to_use = 1;
|
||||
|
||||
this->num_streams = num_streams_to_use;
|
||||
this->active_streams = streams.active_gpu_subset(num_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
|
||||
this->internal_cuda_streams.create_internal_cuda_streams_on_same_gpus(
|
||||
active_streams, num_streams_to_use);
|
||||
@@ -1523,7 +1537,8 @@ template <typename Torus> struct int_unchecked_index_of_clear_buffer {
|
||||
num_streams_to_use = 1;
|
||||
|
||||
this->num_streams = num_streams_to_use;
|
||||
this->active_streams = streams.active_gpu_subset(num_blocks);
|
||||
this->active_streams =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
|
||||
this->internal_cuda_streams.create_internal_cuda_streams_on_same_gpus(
|
||||
active_streams, num_streams_to_use);
|
||||
|
||||
@@ -47,7 +47,7 @@ template <typename Torus> struct compact_lwe_list {
|
||||
|
||||
template <typename Torus> struct flattened_compact_lwe_lists {
|
||||
Torus *d_ptr;
|
||||
Torus **d_ptr_to_compact_list;
|
||||
Torus **ptr_array_to_d_compact_list;
|
||||
const uint32_t *h_num_lwes_per_compact_list;
|
||||
uint32_t num_compact_lists;
|
||||
uint32_t lwe_dimension;
|
||||
@@ -59,13 +59,13 @@ template <typename Torus> struct flattened_compact_lwe_lists {
|
||||
uint32_t lwe_dimension)
|
||||
: d_ptr(d_ptr), h_num_lwes_per_compact_list(h_num_lwes_per_compact_list),
|
||||
num_compact_lists(num_compact_lists), lwe_dimension(lwe_dimension) {
|
||||
d_ptr_to_compact_list =
|
||||
static_cast<Torus **>(malloc(num_compact_lists * sizeof(Torus **)));
|
||||
ptr_array_to_d_compact_list =
|
||||
static_cast<Torus **>(malloc(num_compact_lists * sizeof(Torus *)));
|
||||
total_num_lwes = 0;
|
||||
auto curr_list = d_ptr;
|
||||
for (auto i = 0; i < num_compact_lists; ++i) {
|
||||
total_num_lwes += h_num_lwes_per_compact_list[i];
|
||||
d_ptr_to_compact_list[i] = curr_list;
|
||||
ptr_array_to_d_compact_list[i] = curr_list;
|
||||
curr_list += lwe_dimension + h_num_lwes_per_compact_list[i];
|
||||
}
|
||||
}
|
||||
@@ -75,10 +75,12 @@ template <typename Torus> struct flattened_compact_lwe_lists {
|
||||
PANIC("index out of range in flattened_compact_lwe_lists::get");
|
||||
}
|
||||
|
||||
return compact_lwe_list(d_ptr_to_compact_list[compact_list_index],
|
||||
return compact_lwe_list(ptr_array_to_d_compact_list[compact_list_index],
|
||||
lwe_dimension,
|
||||
h_num_lwes_per_compact_list[compact_list_index]);
|
||||
}
|
||||
|
||||
void release() { free(ptr_array_to_d_compact_list); }
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -121,7 +123,6 @@ template <typename Torus> struct zk_expand_mem {
|
||||
: computing_params(computing_params), casting_params(casting_params),
|
||||
num_compact_lists(num_compact_lists),
|
||||
casting_key_type(casting_key_type) {
|
||||
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
|
||||
// We copy num_lwes_per_compact_list so we get protection against
|
||||
@@ -289,7 +290,8 @@ template <typename Torus> struct zk_expand_mem {
|
||||
lut_indexes, h_lut_indexes, num_packed_msgs * num_lwes * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(2 * num_lwes);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(2 * num_lwes, params.pbs_type);
|
||||
message_and_carry_extract_luts->broadcast_lut(active_streams);
|
||||
|
||||
message_and_carry_extract_luts->allocate_lwe_vector_for_non_trivial_indexes(
|
||||
@@ -313,7 +315,6 @@ template <typename Torus> struct zk_expand_mem {
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
|
||||
message_and_carry_extract_luts->release(streams);
|
||||
delete message_and_carry_extract_luts;
|
||||
|
||||
|
||||
@@ -153,7 +153,8 @@ __host__ void are_all_comparisons_block_true(
|
||||
cuda_memcpy_async_to_gpu(is_max_value_lut->get_lut_indexes(0, 0),
|
||||
h_lut_indexes, num_chunks * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0));
|
||||
auto active_streams = streams.active_gpu_subset(num_chunks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_chunks, params.pbs_type);
|
||||
is_max_value_lut->broadcast_lut(active_streams);
|
||||
}
|
||||
lut = is_max_value_lut;
|
||||
@@ -172,8 +173,8 @@ __host__ void are_all_comparisons_block_true(
|
||||
is_max_value_lut->h_lut_indexes,
|
||||
is_max_value_lut->num_blocks * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0));
|
||||
auto active_gpu_count_is_max =
|
||||
streams.active_gpu_subset(is_max_value_lut->num_blocks);
|
||||
auto active_gpu_count_is_max = streams.active_gpu_subset(
|
||||
is_max_value_lut->num_blocks, params.pbs_type);
|
||||
is_max_value_lut->broadcast_lut(active_gpu_count_is_max, false);
|
||||
|
||||
reset_radix_ciphertext_blocks(lwe_array_out, 1);
|
||||
@@ -488,7 +489,7 @@ tree_sign_reduction(CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
polynomial_size, message_modulus, carry_modulus, f, true,
|
||||
tree_buffer->preallocated_h_lut);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(1);
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
last_lut->broadcast_lut(active_streams);
|
||||
|
||||
// Last leaf
|
||||
|
||||
@@ -339,7 +339,9 @@ host_integer_decompress(CudaStreams streams,
|
||||
/// dimension to a big LWE dimension
|
||||
auto encryption_params = h_mem_ptr->encryption_params;
|
||||
auto lut = h_mem_ptr->decompression_rescale_lut;
|
||||
auto active_streams = streams.active_gpu_subset(num_blocks_to_decompress);
|
||||
auto active_streams = streams.active_gpu_subset(
|
||||
num_blocks_to_decompress,
|
||||
h_mem_ptr->decompression_rescale_lut->params.pbs_type);
|
||||
if (active_streams.count() == 1) {
|
||||
execute_pbs_async<Torus, Torus>(
|
||||
active_streams, (Torus *)d_lwe_array_out->ptr, lut->lwe_indexes_out,
|
||||
|
||||
@@ -542,7 +542,8 @@ __host__ void integer_radix_apply_univariate_lookup_table(
|
||||
std::vector<Torus *> lwe_after_pbs_vec = lut->lwe_after_pbs_vec;
|
||||
std::vector<Torus *> lwe_trivial_indexes_vec = lut->lwe_trivial_indexes_vec;
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
if (active_streams.count() == 1) {
|
||||
execute_keyswitch_async<Torus>(
|
||||
streams.get_ith(0), lwe_after_ks_vec[0], lwe_trivial_indexes_vec[0],
|
||||
@@ -645,7 +646,8 @@ __host__ void integer_radix_apply_many_univariate_lookup_table(
|
||||
std::vector<Torus *> lwe_after_pbs_vec = lut->lwe_after_pbs_vec;
|
||||
std::vector<Torus *> lwe_trivial_indexes_vec = lut->lwe_trivial_indexes_vec;
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
if (active_streams.count() == 1) {
|
||||
execute_keyswitch_async<Torus>(
|
||||
streams.get_ith(0), lwe_after_ks_vec[0], lwe_trivial_indexes_vec[0],
|
||||
@@ -764,7 +766,8 @@ __host__ void integer_radix_apply_bivariate_lookup_table(
|
||||
std::vector<Torus *> lwe_after_pbs_vec = lut->lwe_after_pbs_vec;
|
||||
std::vector<Torus *> lwe_trivial_indexes_vec = lut->lwe_trivial_indexes_vec;
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
if (active_streams.count() == 1) {
|
||||
execute_keyswitch_async<Torus>(
|
||||
streams.get_ith(0), lwe_after_ks_vec[0], lwe_trivial_indexes_vec[0],
|
||||
@@ -1812,7 +1815,8 @@ uint64_t scratch_cuda_apply_univariate_lut(
|
||||
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
|
||||
*(*mem_ptr)->get_degree(0) = lut_degree;
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
(*mem_ptr)->broadcast_lut(active_streams);
|
||||
POP_RANGE()
|
||||
return size_tracker;
|
||||
@@ -1847,7 +1851,8 @@ uint64_t scratch_cuda_apply_many_univariate_lut(
|
||||
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
|
||||
*(*mem_ptr)->get_degree(0) = lut_degree;
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
(*mem_ptr)->broadcast_lut(active_streams);
|
||||
POP_RANGE()
|
||||
return size_tracker;
|
||||
@@ -1883,7 +1888,8 @@ uint64_t scratch_cuda_apply_bivariate_lut(
|
||||
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
|
||||
*(*mem_ptr)->get_degree(0) = lut_degree;
|
||||
auto active_streams = streams.active_gpu_subset(num_radix_blocks);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
(*mem_ptr)->broadcast_lut(active_streams);
|
||||
POP_RANGE()
|
||||
return size_tracker;
|
||||
@@ -2336,8 +2342,8 @@ integer_radix_apply_noise_squashing(CudaStreams streams,
|
||||
|
||||
// Since the radix ciphertexts are packed, we have to use the num_radix_blocks
|
||||
// from the output ct
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(lwe_array_out->num_radix_blocks);
|
||||
auto active_streams = streams.active_gpu_subset(
|
||||
lwe_array_out->num_radix_blocks, params.pbs_type);
|
||||
if (active_streams.count() == 1) {
|
||||
execute_keyswitch_async<InputTorus>(
|
||||
streams.get_ith(0), lwe_after_ks_vec[0], lwe_trivial_indexes_vec[0],
|
||||
|
||||
@@ -388,7 +388,8 @@ __host__ void host_integer_partial_sum_ciphertexts_vec(
|
||||
current_columns.next_accumulation(total_ciphertexts, total_messages,
|
||||
needs_processing);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(total_ciphertexts);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(total_ciphertexts, mem_ptr->params.pbs_type);
|
||||
GPU_ASSERT(total_ciphertexts <= mem_ptr->luts_message_carry->num_blocks,
|
||||
"SUM CT");
|
||||
|
||||
@@ -442,7 +443,8 @@ __host__ void host_integer_partial_sum_ciphertexts_vec(
|
||||
streams.stream(0), streams.gpu_index(0), current_blocks,
|
||||
num_radix_blocks, num_radix_blocks + 1);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(2 * num_radix_blocks);
|
||||
auto active_streams = streams.active_gpu_subset(2 * num_radix_blocks,
|
||||
mem_ptr->params.pbs_type);
|
||||
|
||||
if (active_streams.count() == 1) {
|
||||
execute_keyswitch_async<Torus>(
|
||||
|
||||
@@ -29,7 +29,8 @@ void host_integer_grouped_oprf(CudaStreams streams,
|
||||
int_grouped_oprf_memory<Torus> *mem_ptr,
|
||||
void *const *bsks) {
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(num_blocks_to_process);
|
||||
auto active_streams = streams.active_gpu_subset(num_blocks_to_process,
|
||||
mem_ptr->params.pbs_type);
|
||||
auto lut = mem_ptr->luts;
|
||||
|
||||
if (active_streams.count() == 1) {
|
||||
|
||||
@@ -34,49 +34,49 @@ void cuda_rerand_64(
|
||||
|
||||
switch (rerand_buffer->params.big_lwe_dimension) {
|
||||
case 256:
|
||||
rerand_inplace<uint64_t, AmortizedDegree<256>>(
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<256>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 512:
|
||||
rerand_inplace<uint64_t, AmortizedDegree<512>>(
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<512>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 1024:
|
||||
rerand_inplace<uint64_t, AmortizedDegree<1024>>(
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<1024>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 2048:
|
||||
rerand_inplace<uint64_t, AmortizedDegree<2048>>(
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<2048>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 4096:
|
||||
rerand_inplace<uint64_t, AmortizedDegree<4096>>(
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<4096>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 8192:
|
||||
rerand_inplace<uint64_t, AmortizedDegree<8192>>(
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<8192>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 16384:
|
||||
rerand_inplace<uint64_t, AmortizedDegree<16384>>(
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<16384>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
@@ -88,9 +88,6 @@ void cuda_rerand_64(
|
||||
" in the interval [256..16384].");
|
||||
break;
|
||||
}
|
||||
|
||||
cuda_synchronize_stream(static_cast<cudaStream_t>(streams.streams[0]),
|
||||
streams.gpu_indexes[0]);
|
||||
}
|
||||
|
||||
void cleanup_cuda_rerand(CudaStreamsFFI streams, int8_t **mem_ptr_void) {
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#include "zk/zk_utilities.h"
|
||||
|
||||
template <typename Torus, class params>
|
||||
void rerand_inplace(
|
||||
void host_rerand_inplace(
|
||||
CudaStreams const streams, Torus *lwe_array,
|
||||
const Torus *lwe_flattened_encryptions_of_zero_compact_array_in,
|
||||
Torus *const *ksk, int_rerand_mem<Torus> *mem_ptr) {
|
||||
@@ -73,6 +73,7 @@ void rerand_inplace(
|
||||
message_modulus, carry_modulus);
|
||||
release_cpu_radix_ciphertext_async(&lwes_ffi);
|
||||
release_cpu_radix_ciphertext_async(&ksed_zero_lwes_ffi);
|
||||
compact_lwe_lists.release();
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -45,7 +45,8 @@ host_scalar_bitop(CudaStreams streams, CudaRadixCiphertextFFI *output,
|
||||
cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(0, 0), clear_blocks,
|
||||
num_clear_blocks * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0));
|
||||
auto active_streams = streams.active_gpu_subset(num_clear_blocks);
|
||||
auto active_streams = streams.active_gpu_subset(
|
||||
num_clear_blocks, mem_ptr->lut->params.pbs_type);
|
||||
lut->broadcast_lut(active_streams, false);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table<Torus>(
|
||||
|
||||
@@ -146,7 +146,7 @@ __host__ void integer_radix_unsigned_scalar_difference_check(
|
||||
lut->get_degree(0), lut->get_max_degree(0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus, scalar_last_leaf_lut_f,
|
||||
true, mem_ptr->diff_buffer->tree_buffer->preallocated_h_lut);
|
||||
auto active_streams = streams.active_gpu_subset(1);
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
lut->broadcast_lut(active_streams);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table<Torus>(
|
||||
@@ -240,7 +240,7 @@ __host__ void integer_radix_unsigned_scalar_difference_check(
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_bivariate_last_leaf_lut_f, true,
|
||||
mem_ptr->diff_buffer->tree_buffer->preallocated_h_lut);
|
||||
auto active_streams = streams.active_gpu_subset(1);
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
lut->broadcast_lut(active_streams);
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table<Torus>(
|
||||
@@ -274,7 +274,7 @@ __host__ void integer_radix_unsigned_scalar_difference_check(
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, one_block_lut_f, true,
|
||||
mem_ptr->preallocated_h_lut);
|
||||
auto active_streams = streams.active_gpu_subset(1);
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
one_block_lut->broadcast_lut(active_streams);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table<Torus>(
|
||||
@@ -419,7 +419,7 @@ __host__ void integer_radix_signed_scalar_difference_check(
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_bivariate_last_leaf_lut_f, true,
|
||||
mem_ptr->diff_buffer->tree_buffer->preallocated_h_lut);
|
||||
auto active_streams = streams.active_gpu_subset(1);
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
lut->broadcast_lut(active_streams);
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table<Torus>(
|
||||
@@ -521,7 +521,7 @@ __host__ void integer_radix_signed_scalar_difference_check(
|
||||
signed_msb_lut->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
lut_f, true, mem_ptr->preallocated_h_lut);
|
||||
auto active_streams = streams.active_gpu_subset(1);
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
signed_msb_lut->broadcast_lut(active_streams);
|
||||
|
||||
CudaRadixCiphertextFFI sign_block;
|
||||
@@ -567,7 +567,7 @@ __host__ void integer_radix_signed_scalar_difference_check(
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, one_block_lut_f, true,
|
||||
mem_ptr->preallocated_h_lut);
|
||||
auto active_streams = streams.active_gpu_subset(1);
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
one_block_lut->broadcast_lut(active_streams);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table<Torus>(
|
||||
@@ -785,8 +785,8 @@ __host__ void host_scalar_equality_check(
|
||||
num_halved_scalar_blocks * sizeof(Torus), lsb_streams.stream(0),
|
||||
lsb_streams.gpu_index(0));
|
||||
}
|
||||
auto active_streams =
|
||||
lsb_streams.active_gpu_subset(num_halved_scalar_blocks);
|
||||
auto active_streams = lsb_streams.active_gpu_subset(
|
||||
num_halved_scalar_blocks, params.pbs_type);
|
||||
// We use false cause we only will broadcast the indexes
|
||||
scalar_comparison_luts->broadcast_lut(active_streams, false);
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
Torus *global_accumulator, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t grouping_factor, uint32_t lwe_offset,
|
||||
uint64_t lwe_chunk_size, uint64_t keybundle_size_per_input,
|
||||
uint32_t lwe_chunk_size, uint64_t keybundle_size_per_input,
|
||||
int8_t *device_mem, uint64_t device_memory_size_per_block,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
@@ -321,8 +321,9 @@ __host__ void execute_cg_external_product_loop(
|
||||
lwe_chunk_size * level_count * (glwe_dimension + 1) *
|
||||
(glwe_dimension + 1) * (polynomial_size / 2);
|
||||
|
||||
uint64_t chunk_size = std::min(
|
||||
lwe_chunk_size, (uint64_t)(lwe_dimension / grouping_factor) - lwe_offset);
|
||||
uint32_t chunk_size = (uint32_t)(std::min(
|
||||
lwe_chunk_size,
|
||||
(uint64_t)(lwe_dimension / grouping_factor) - lwe_offset));
|
||||
|
||||
auto d_mem = buffer->d_mem_acc_cg;
|
||||
auto keybundle_fft = buffer->keybundle_fft;
|
||||
|
||||
@@ -373,7 +373,7 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
Torus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
|
||||
const double2 *__restrict__ keybundle_array, Torus *global_accumulator,
|
||||
double2 *join_buffer, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, uint32_t iteration, uint64_t lwe_chunk_size,
|
||||
uint32_t level_count, uint32_t iteration, uint32_t lwe_chunk_size,
|
||||
int8_t *device_mem, uint64_t device_memory_size_per_block,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
// We use shared memory for the polynomials that are used often during the
|
||||
@@ -790,7 +790,7 @@ execute_step_two(cudaStream_t stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
uint32_t lut_stride) {
|
||||
cuda_set_device(gpu_index);
|
||||
|
||||
auto lwe_chunk_size = buffer->lwe_chunk_size;
|
||||
uint32_t lwe_chunk_size = (uint32_t)(buffer->lwe_chunk_size);
|
||||
uint64_t full_sm_accumulate_step_two =
|
||||
get_buffer_size_full_sm_multibit_programmable_bootstrap_step_two<Torus>(
|
||||
polynomial_size);
|
||||
|
||||
@@ -626,7 +626,7 @@ __host__ bool supports_thread_block_clusters_on_classic_programmable_bootstrap(
|
||||
uint32_t num_samples, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, uint32_t max_shared_memory) {
|
||||
|
||||
if (!cuda_check_support_thread_block_clusters() || num_samples > 128)
|
||||
if (!cuda_check_support_thread_block_clusters())
|
||||
return false;
|
||||
|
||||
uint64_t full_sm = get_buffer_size_full_sm_programmable_bootstrap_tbc<Torus>(
|
||||
|
||||
@@ -30,7 +30,7 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
Torus *global_accumulator, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t grouping_factor, uint32_t lwe_offset,
|
||||
uint64_t lwe_chunk_size, uint64_t keybundle_size_per_input,
|
||||
uint32_t lwe_chunk_size, uint64_t keybundle_size_per_input,
|
||||
int8_t *device_mem, uint64_t device_memory_size_per_block,
|
||||
bool support_dsm, uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
@@ -205,10 +205,10 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
const Torus *__restrict__ lut_vector_indexes,
|
||||
const Torus *__restrict__ lwe_array_in,
|
||||
const Torus *__restrict__ lwe_input_indexes,
|
||||
const double2 *__restrict__ keybundle_array, double2 *join_buffer,
|
||||
Torus *global_accumulator, uint32_t lwe_dimension, uint32_t lwe_offset,
|
||||
uint64_t lwe_chunk_size, uint64_t keybundle_size_per_input,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
const double2 *__restrict__ keybundle_array, Torus *global_accumulator,
|
||||
uint32_t lwe_dimension, uint32_t lwe_offset, uint32_t lwe_chunk_size,
|
||||
uint64_t keybundle_size_per_input, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
constexpr uint32_t level_count = 1;
|
||||
constexpr uint32_t grouping_factor = 4;
|
||||
@@ -548,8 +548,9 @@ __host__ void execute_tbc_external_product_loop(
|
||||
lwe_chunk_size * level_count * (glwe_dimension + 1) *
|
||||
(glwe_dimension + 1) * (polynomial_size / 2);
|
||||
|
||||
uint64_t chunk_size = std::min(
|
||||
lwe_chunk_size, (uint64_t)(lwe_dimension / grouping_factor) - lwe_offset);
|
||||
uint32_t chunk_size = (uint32_t)(std::min(
|
||||
lwe_chunk_size,
|
||||
(uint64_t)(lwe_dimension / grouping_factor) - lwe_offset));
|
||||
|
||||
auto d_mem = buffer->d_mem_acc_tbc;
|
||||
auto keybundle_fft = buffer->keybundle_fft;
|
||||
@@ -624,9 +625,9 @@ __host__ void execute_tbc_external_product_loop(
|
||||
device_multi_bit_programmable_bootstrap_tbc_accumulate_2_2_params<
|
||||
Torus, params, FULLSM>,
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
lwe_array_in, lwe_input_indexes, keybundle_fft, buffer_fft,
|
||||
global_accumulator, lwe_dimension, lwe_offset, chunk_size,
|
||||
keybundle_size_per_input, num_many_lut, lut_stride));
|
||||
lwe_array_in, lwe_input_indexes, keybundle_fft, global_accumulator,
|
||||
lwe_dimension, lwe_offset, chunk_size, keybundle_size_per_input,
|
||||
num_many_lut, lut_stride));
|
||||
} else {
|
||||
check_cuda_error(cudaLaunchKernelEx(
|
||||
&config,
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
|
||||
std::mutex m;
|
||||
bool p2p_enabled = false;
|
||||
const int THRESHOLD_MULTI_GPU = 12;
|
||||
const int THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS = 12;
|
||||
const int THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS = 68;
|
||||
|
||||
// Enable bidirectional p2p access between all available GPUs and device_0_id
|
||||
int32_t cuda_setup_multi_gpu(int device_0_id) {
|
||||
@@ -39,10 +40,13 @@ int32_t cuda_setup_multi_gpu(int device_0_id) {
|
||||
return (int32_t)(num_used_gpus);
|
||||
}
|
||||
|
||||
uint32_t get_active_gpu_count(uint32_t num_inputs, uint32_t gpu_count) {
|
||||
uint32_t get_active_gpu_count(uint32_t num_inputs, uint32_t gpu_count,
|
||||
PBS_TYPE pbs_type) {
|
||||
int threshold = (pbs_type == MULTI_BIT)
|
||||
? THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS
|
||||
: THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;
|
||||
uint32_t ceil_div_inputs =
|
||||
std::max((uint32_t)1,
|
||||
(num_inputs + THRESHOLD_MULTI_GPU - 1) / THRESHOLD_MULTI_GPU);
|
||||
std::max((uint32_t)1, (num_inputs + threshold - 1) / threshold);
|
||||
uint32_t active_gpu_count = std::min(ceil_div_inputs, gpu_count);
|
||||
return active_gpu_count;
|
||||
}
|
||||
|
||||
@@ -59,15 +59,20 @@ template <typename Torus>
|
||||
void multi_gpu_alloc_lwe_async(CudaStreams streams, std::vector<Torus *> &dest,
|
||||
uint32_t num_inputs, uint32_t lwe_size,
|
||||
uint64_t &size_tracker_on_gpu_0,
|
||||
bool allocate_gpu_memory) {
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
PANIC_IF_FALSE(dest.empty(),
|
||||
"Cuda error: Requested multi-GPU vector is already allocated");
|
||||
|
||||
int threshold = (pbs_type == MULTI_BIT)
|
||||
? THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS
|
||||
: THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;
|
||||
|
||||
dest.resize(streams.count());
|
||||
for (uint i = 0; i < streams.count(); i++) {
|
||||
uint64_t size_tracker_on_gpu_i = 0;
|
||||
auto inputs_on_gpu = std::min(
|
||||
(int)num_inputs,
|
||||
std::max(THRESHOLD_MULTI_GPU,
|
||||
std::max((int)threshold,
|
||||
get_num_inputs_on_gpu(num_inputs, i, streams.count())));
|
||||
Torus *d_array = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
inputs_on_gpu * lwe_size * sizeof(Torus), streams.stream(i),
|
||||
@@ -81,7 +86,7 @@ void multi_gpu_alloc_lwe_async(CudaStreams streams, std::vector<Torus *> &dest,
|
||||
|
||||
template void multi_gpu_alloc_lwe_async<__uint128_t>(
|
||||
CudaStreams streams, std::vector<__uint128_t *> &dest, uint32_t num_inputs,
|
||||
uint32_t lwe_size, uint64_t &size_tracker_on_gpu_0,
|
||||
uint32_t lwe_size, uint64_t &size_tracker_on_gpu_0, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
/// Allocates the input/output vector for all devices
|
||||
@@ -91,16 +96,21 @@ template <typename Torus>
|
||||
void multi_gpu_alloc_lwe_many_lut_output_async(
|
||||
CudaStreams streams, std::vector<Torus *> &dest, uint32_t num_inputs,
|
||||
uint32_t num_many_lut, uint32_t lwe_size, uint64_t &size_tracker_on_gpu_0,
|
||||
bool allocate_gpu_memory) {
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
|
||||
PANIC_IF_FALSE(dest.empty(),
|
||||
"Cuda error: Requested multi-GPU vector is already allocated");
|
||||
|
||||
int threshold = (pbs_type == MULTI_BIT)
|
||||
? THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS
|
||||
: THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;
|
||||
|
||||
dest.resize(streams.count());
|
||||
for (uint i = 0; i < streams.count(); i++) {
|
||||
uint64_t size_tracker = 0;
|
||||
auto inputs_on_gpu = std::min(
|
||||
(int)num_inputs,
|
||||
std::max(THRESHOLD_MULTI_GPU,
|
||||
std::max((int)threshold,
|
||||
get_num_inputs_on_gpu(num_inputs, i, streams.count())));
|
||||
Torus *d_array = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
num_many_lut * inputs_on_gpu * lwe_size * sizeof(Torus),
|
||||
|
||||
@@ -100,6 +100,7 @@ __host__ void host_expand_without_verification(
|
||||
2 * num_lwes);
|
||||
release_cpu_radix_ciphertext_async(&input);
|
||||
release_cpu_radix_ciphertext_async(&output);
|
||||
compact_lwe_lists.release();
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -113,6 +113,7 @@ pub fn iop_add_simd(prog: &mut Program) {
|
||||
prog,
|
||||
crate::asm::iop::SIMD_N,
|
||||
fw_impl::llt::iop_add_ripple_rtl,
|
||||
None,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -227,14 +228,18 @@ pub fn iop_muls(prog: &mut Program) {
|
||||
pub fn iop_erc_20(prog: &mut Program) {
|
||||
// Add Comment header
|
||||
prog.push_comment("ERC_20 (new_from, new_to) <- (from, to, amount)".to_string());
|
||||
iop_erc_20_rtl(prog, 0).add_to_prog(prog);
|
||||
// TODO: Make sweep of kogge_blk_w
|
||||
// All these little parameters would be very handy to write an
|
||||
// exploration/compilation program which would try to minimize latency by
|
||||
// playing with these.
|
||||
iop_erc_20_rtl(prog, 0, Some(10)).add_to_prog(prog);
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", skip(prog))]
|
||||
pub fn iop_erc_20_simd(prog: &mut Program) {
|
||||
// Add Comment header
|
||||
prog.push_comment("ERC_20_SIMD (new_from, new_to) <- (from, to, amount)".to_string());
|
||||
simd(prog, crate::asm::iop::SIMD_N, fw_impl::llt::iop_erc_20_rtl);
|
||||
simd(prog, crate::asm::iop::SIMD_N, fw_impl::llt::iop_erc_20_rtl, None);
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", skip(prog))]
|
||||
@@ -381,7 +386,7 @@ pub fn iop_rotate_scalar_left(prog: &mut Program) {
|
||||
/// (dst_from[0], dst_to[0], ..., dst_from[N-1], dst_to[N-1])
|
||||
/// Where N is the batch size
|
||||
#[instrument(level = "trace", skip(prog))]
|
||||
pub fn iop_erc_20_rtl(prog: &mut Program, batch_index: u8) -> Rtl {
|
||||
pub fn iop_erc_20_rtl(prog: &mut Program, batch_index: u8, kogge_blk_w: Option<usize>) -> Rtl {
|
||||
// Allocate metavariables:
|
||||
// Dest -> Operand
|
||||
let dst_from = prog.iop_template_var(OperandKind::Dst, 2 * batch_index);
|
||||
@@ -392,13 +397,6 @@ pub fn iop_erc_20_rtl(prog: &mut Program, batch_index: u8) -> Rtl {
|
||||
// Src Amount -> Operand
|
||||
let src_amount = prog.iop_template_var(OperandKind::Src, 3 * batch_index + 2);
|
||||
|
||||
// TODO: Make this a parameter or sweep this
|
||||
// All these little parameters would be very handy to write an
|
||||
// exploration/compilation program which would try to minimize latency by
|
||||
// playing with these.
|
||||
let kogge_blk_w = 10;
|
||||
let ripple = true;
|
||||
|
||||
{
|
||||
let props = prog.params();
|
||||
let tfhe_params: asm::DigitParameters = props.clone().into();
|
||||
@@ -429,19 +427,20 @@ pub fn iop_erc_20_rtl(prog: &mut Program, batch_index: u8) -> Rtl {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if ripple {
|
||||
if let Some(blk_w) = kogge_blk_w {
|
||||
kogge::add(prog, dst_to, src_to, src_amount.clone(), None, blk_w)
|
||||
+ kogge::sub(prog, dst_from, src_from, src_amount, blk_w)
|
||||
} else { // Default to ripple carry
|
||||
kogge::ripple_add(dst_to, src_to, src_amount.clone(), None)
|
||||
+ kogge::ripple_sub(prog, dst_from, src_from, src_amount)
|
||||
} else {
|
||||
kogge::add(prog, dst_to, src_to, src_amount.clone(), None, kogge_blk_w)
|
||||
+ kogge::sub(prog, dst_from, src_from, src_amount, kogge_blk_w)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A SIMD implementation of add for maximum throughput
|
||||
/// NB: No use of kogge_blk_w here, impl force to use ripple carry
|
||||
#[instrument(level = "trace", skip(prog))]
|
||||
pub fn iop_add_ripple_rtl(prog: &mut Program, i: u8) -> Rtl {
|
||||
pub fn iop_add_ripple_rtl(prog: &mut Program, i: u8, _kogge_blk_w: Option<usize>) -> Rtl {
|
||||
// Allocate metavariables:
|
||||
let dst = prog.iop_template_var(OperandKind::Dst, i);
|
||||
let src_a = prog.iop_template_var(OperandKind::Src, 2 * i);
|
||||
@@ -899,13 +898,13 @@ fn bw_inv(prog: &mut Program, b: Vec<VarCell>) -> Vec<VarCell> {
|
||||
/// Maybe this should go into a SIMD firmware implementation... At some point we
|
||||
/// would need a mechanism to choose between implementations on the fly to make
|
||||
/// real good use of all of this.
|
||||
fn simd<F>(prog: &mut Program, batch_size: usize, rtl_closure: F)
|
||||
fn simd<F>(prog: &mut Program, batch_size: usize, rtl_closure: F, kogge_blk_w: Option<usize>)
|
||||
where
|
||||
F: Fn(&mut Program, u8) -> Rtl,
|
||||
F: Fn(&mut Program, u8, Option<usize>) -> Rtl,
|
||||
{
|
||||
(0..batch_size)
|
||||
.map(|i| i as u8)
|
||||
.map(|i| rtl_closure(prog, i))
|
||||
.map(|i| rtl_closure(prog, i, kogge_blk_w))
|
||||
.sum::<Rtl>()
|
||||
.add_to_prog(prog);
|
||||
}
|
||||
|
||||
@@ -32,10 +32,9 @@ fi
|
||||
RUSTFLAGS="$RUSTFLAGS" cargo nextest list --cargo-profile "${CARGO_PROFILE}" \
|
||||
--features=integer,internal-keycache,gpu-debug,zk-pok -p tfhe &> /tmp/test_list.txt
|
||||
|
||||
# Filter the tests to get only the HL and a subset of core crypto ones
|
||||
TESTS_TO_RUN=$(sed -e $'s/\x1b\[[0-9;]*m//g' < /tmp/test_list.txt | grep -E 'high_level_api::.*gpu.*|core_crypto::.*gpu.*' | grep -v 'array' | grep -v 'modulus_switch' | grep -v '3_3' | grep -v 'noise_distribution' | grep -v 'flip')
|
||||
|
||||
if [[ "${RUN_VALGRIND}" == "1" ]]; then
|
||||
TESTS_TO_RUN=$(sed -e $'s/\x1b\[[0-9;]*m//g' < /tmp/test_list.txt | grep -E 'high_level_api::.*gpu.*' | grep -v 'array' | grep -v 'flip')
|
||||
|
||||
# Build the tests but don't run them
|
||||
RUSTFLAGS="$RUSTFLAGS" cargo test --no-run --profile "${CARGO_PROFILE}" \
|
||||
--features=integer,internal-keycache,gpu-debug,zk-pok -p tfhe
|
||||
@@ -57,6 +56,7 @@ if [[ "${RUN_VALGRIND}" == "1" ]]; then
|
||||
fi
|
||||
|
||||
if [[ "${RUN_COMPUTE_SANITIZER}" == "1" ]]; then
|
||||
TESTS_TO_RUN=$(sed -e $'s/\x1b\[[0-9;]*m//g' < /tmp/test_list.txt | grep -E 'high_level_api::.*gpu.*|core_crypto::.*gpu.*' | grep -v 'array' | grep -v 'modulus_switch' | grep -v '3_3' | grep -v 'noise_distribution' | grep -v 'flip')
|
||||
# Build the tests but don't run them
|
||||
RUSTFLAGS="$RUSTFLAGS" cargo test --no-run --profile "${CARGO_PROFILE}" \
|
||||
--features=integer,internal-keycache,gpu,zk-pok -p tfhe
|
||||
|
||||
@@ -28,13 +28,12 @@ pub fn transfer_whitepaper<FheType>(
|
||||
amount: &FheType,
|
||||
) -> (FheType, FheType)
|
||||
where
|
||||
FheType: Add<Output = FheType> + for<'a> FheOrd<&'a FheType> + FheTrivialEncrypt<u64>,
|
||||
FheBool: IfThenElse<FheType>,
|
||||
FheType: Add<Output = FheType> + for<'a> FheOrd<&'a FheType>,
|
||||
FheBool: IfThenZero<FheType>,
|
||||
for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
|
||||
{
|
||||
let has_enough_funds = (from_amount).ge(amount);
|
||||
let zero_amount = FheType::encrypt_trivial(0u64);
|
||||
let amount_to_transfer = has_enough_funds.select(amount, &zero_amount);
|
||||
let amount_to_transfer = has_enough_funds.if_then_zero(amount);
|
||||
|
||||
let new_to_amount = to_amount + &amount_to_transfer;
|
||||
let new_from_amount = from_amount - &amount_to_transfer;
|
||||
@@ -51,12 +50,13 @@ pub fn par_transfer_whitepaper<FheType>(
|
||||
where
|
||||
FheType:
|
||||
Add<Output = FheType> + for<'a> FheOrd<&'a FheType> + Send + Sync + FheTrivialEncrypt<u64>,
|
||||
FheBool: IfThenElse<FheType>,
|
||||
FheBool: IfThenZero<FheType>,
|
||||
for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
|
||||
{
|
||||
let has_enough_funds = (from_amount).ge(amount);
|
||||
let zero_amount = FheType::encrypt_trivial(0u64);
|
||||
let amount_to_transfer = has_enough_funds.select(amount, &zero_amount);
|
||||
//let zero_amount = FheType::encrypt_trivial(0u64);
|
||||
//let amount_to_transfer = has_enough_funds.select(amount, &zero_amount);
|
||||
let amount_to_transfer = has_enough_funds.if_then_zero(amount);
|
||||
|
||||
let (new_to_amount, new_from_amount) = rayon::join(
|
||||
|| to_amount + &amount_to_transfer,
|
||||
|
||||
@@ -33,8 +33,8 @@ impl ProofConfig {
|
||||
|
||||
fn default_proof_config() -> Vec<ProofConfig> {
|
||||
vec![
|
||||
ProofConfig::new(64usize, &[64usize]),
|
||||
ProofConfig::new(2048, &[4 * 64, 10 * 64, 2048]),
|
||||
ProofConfig::new(64, &[64]),
|
||||
ProofConfig::new(2048, &[64, 4 * 64, 2048]),
|
||||
ProofConfig::new(4096, &[4096]),
|
||||
]
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ use crate::high_level_api::integers::{FheInt, FheIntId, FheUint, FheUintId};
|
||||
use crate::high_level_api::keys::InternalServerKey;
|
||||
use crate::high_level_api::re_randomization::ReRandomizationMetadata;
|
||||
use crate::high_level_api::traits::{
|
||||
FheEq, Flip, IfThenElse, ReRandomize, ScalarIfThenElse, Tagged,
|
||||
FheEq, Flip, IfThenElse, IfThenZero, ReRandomize, ScalarIfThenElse, Tagged,
|
||||
};
|
||||
use crate::high_level_api::{global_state, CompactPublicKey};
|
||||
use crate::integer::block_decomposition::DecomposableInto;
|
||||
@@ -552,6 +552,66 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<Id> IfThenZero<FheUint<Id>> for FheBool
|
||||
where
|
||||
Id: FheUintId,
|
||||
{
|
||||
/// Conditional selection.
|
||||
///
|
||||
/// The output value returned depends on the value of `self`.
|
||||
///
|
||||
/// - if `self` is true, the output will have the value of `ct_then`
|
||||
/// - if `self` is false, the output will be an encryption of 0
|
||||
fn if_then_zero(&self, ct_then: &FheUint<Id>) -> FheUint<Id> {
|
||||
global_state::with_internal_keys(|sks| match sks {
|
||||
InternalServerKey::Cpu(cpu_sks) => {
|
||||
let ct_condition = self;
|
||||
let mut ct_out = ct_then.ciphertext.on_cpu().clone();
|
||||
cpu_sks.pbs_key().zero_out_if_condition_is_false(
|
||||
&mut ct_out,
|
||||
&ct_condition.ciphertext.on_cpu().0,
|
||||
);
|
||||
FheUint::new(
|
||||
ct_out,
|
||||
cpu_sks.tag.clone(),
|
||||
ReRandomizationMetadata::default(),
|
||||
)
|
||||
}
|
||||
#[cfg(feature = "gpu")]
|
||||
InternalServerKey::Cuda(_) => {
|
||||
panic!("Cuda does not support if_then_zero")
|
||||
}
|
||||
#[cfg(feature = "hpu")]
|
||||
InternalServerKey::Hpu(device) => {
|
||||
let hpu_then = ct_then.ciphertext.on_hpu(device);
|
||||
let hpu_cond = self.ciphertext.on_hpu(device);
|
||||
|
||||
let (opcode, proto) = {
|
||||
let asm_iop = &hpu_asm::iop::IOP_IF_THEN_ZERO;
|
||||
(
|
||||
asm_iop.opcode(),
|
||||
&asm_iop.format().expect("Unspecified IOP format").proto,
|
||||
)
|
||||
};
|
||||
// These clones are cheap are they are just Arc
|
||||
let hpu_result = HpuRadixCiphertext::exec(
|
||||
proto,
|
||||
opcode,
|
||||
&[hpu_then.clone(), hpu_cond.clone()],
|
||||
&[],
|
||||
)
|
||||
.pop()
|
||||
.unwrap();
|
||||
FheUint::new(
|
||||
hpu_result,
|
||||
device.tag.clone(),
|
||||
ReRandomizationMetadata::default(),
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<Id: FheIntId> IfThenElse<FheInt<Id>> for FheBool {
|
||||
/// Conditional selection.
|
||||
///
|
||||
|
||||
@@ -11,6 +11,7 @@ pub use crate::high_level_api::traits::{
|
||||
FheOrd, FheTrivialEncrypt, FheTryEncrypt, FheTryTrivialEncrypt, FheWait, Flip, IfThenElse,
|
||||
OverflowingAdd, OverflowingMul, OverflowingNeg, OverflowingSub, ReRandomize, RotateLeft,
|
||||
RotateLeftAssign, RotateRight, RotateRightAssign, ScalarIfThenElse, SquashNoise, Tagged,
|
||||
IfThenZero,
|
||||
};
|
||||
#[cfg(feature = "hpu")]
|
||||
pub use crate::high_level_api::traits::{FheHpu, HpuHandle};
|
||||
|
||||
@@ -149,6 +149,10 @@ pub trait IfThenElse<Ciphertext> {
|
||||
}
|
||||
}
|
||||
|
||||
pub trait IfThenZero<Ciphertext> {
|
||||
fn if_then_zero(&self, ct_then: &Ciphertext) -> Ciphertext;
|
||||
}
|
||||
|
||||
pub trait ScalarIfThenElse<Lhs, Rhs> {
|
||||
type Output;
|
||||
|
||||
|
||||
@@ -656,18 +656,6 @@ async function compactPublicKeyZeroKnowledgeBench() {
|
||||
ShortintCompactPublicKeyEncryptionParametersName.PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
),
|
||||
},
|
||||
{
|
||||
zk_scheme: "ZKV1",
|
||||
name: shortint_params_name(
|
||||
ShortintParametersName.PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
),
|
||||
block_params: new ShortintParameters(
|
||||
ShortintParametersName.PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128,
|
||||
),
|
||||
casting_params: new ShortintCompactPublicKeyEncryptionParameters(
|
||||
ShortintCompactPublicKeyEncryptionParametersName.V1_5_PARAM_PKE_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128_ZKV1,
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
let bench_results = {};
|
||||
@@ -695,8 +683,8 @@ async function compactPublicKeyZeroKnowledgeBench() {
|
||||
// Proof configuration:
|
||||
let proof_configs = [
|
||||
{ crs_bit_size: 64, bits_to_encrypt: [64] },
|
||||
{ crs_bit_size: 640, bits_to_encrypt: [640] },
|
||||
{ crs_bit_size: 2048, bits_to_encrypt: [2048, 64 * 4] }, // 64 * 4 is a production use-case
|
||||
// 64 * 4 is a production use-case
|
||||
{ crs_bit_size: 2048, bits_to_encrypt: [64, 4 * 64, 2048] },
|
||||
{ crs_bit_size: 4096, bits_to_encrypt: [4096] },
|
||||
];
|
||||
|
||||
|
||||
Reference in New Issue
Block a user