mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-11 07:38:08 -05:00
Compare commits
66 Commits
tm/flip
...
as/add_deb
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
846eed184e | ||
|
|
70fa68bf52 | ||
|
|
7faafd6602 | ||
|
|
8c55f6b8d7 | ||
|
|
f6b1929a8d | ||
|
|
87c0d646a4 | ||
|
|
e5b39a6d4d | ||
|
|
27e2fbd972 | ||
|
|
f54fbf52ce | ||
|
|
2a0dfa5b17 | ||
|
|
a4841036b7 | ||
|
|
1dcc3c8c89 | ||
|
|
1a2643d1da | ||
|
|
bc257904e3 | ||
|
|
8982844a5b | ||
|
|
e80d2548af | ||
|
|
c0ab0a5752 | ||
|
|
f7bfe2f10c | ||
|
|
29c390d92c | ||
|
|
becd08db71 | ||
|
|
ffd7470ef1 | ||
|
|
a3750504c4 | ||
|
|
378c5ccb73 | ||
|
|
4ba1787e12 | ||
|
|
366d359441 | ||
|
|
0ece9e684a | ||
|
|
f8684d1f67 | ||
|
|
b4066df77f | ||
|
|
6b94872a00 | ||
|
|
d88caff6dd | ||
|
|
75a265f93b | ||
|
|
bfbf638fed | ||
|
|
01651d6fb2 | ||
|
|
b2624d1a76 | ||
|
|
9fb7b56629 | ||
|
|
24feeb8609 | ||
|
|
757c2fc828 | ||
|
|
4ff0d6cac2 | ||
|
|
1530f52c79 | ||
|
|
9918dacd6a | ||
|
|
2b503acf18 | ||
|
|
57cc326a64 | ||
|
|
84eb8aeb63 | ||
|
|
f09acfa581 | ||
|
|
8335a6b6b5 | ||
|
|
f80fd157ae | ||
|
|
0ed97cfba8 | ||
|
|
daee3f1850 | ||
|
|
e8dc403ebd | ||
|
|
63e5504c80 | ||
|
|
d664e4ada6 | ||
|
|
c78cc2d2e9 | ||
|
|
b566d78621 | ||
|
|
7da6786d59 | ||
|
|
6edf6b9e26 | ||
|
|
6fde90ad9c | ||
|
|
5d70ae4232 | ||
|
|
89b36ebca0 | ||
|
|
bfc97385f4 | ||
|
|
7ab763abba | ||
|
|
a05db18ba3 | ||
|
|
a3168eb1b5 | ||
|
|
7fccb851d7 | ||
|
|
a78d5cc57b | ||
|
|
9c0d078e1a | ||
|
|
6016755f9d |
12
.cargo/audit.toml
Normal file
12
.cargo/audit.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[advisories]
|
||||
ignore = [
|
||||
# Ignoring unmaintained 'paste' advisory as it is a widely used, low-risk build dependency.
|
||||
"RUSTSEC-2024-0436",
|
||||
]
|
||||
|
||||
[output]
|
||||
# Deny advisories that are warnings by default.
|
||||
# At the moment this works if we allow paste, we might want to disable this in the future if it
|
||||
# becomes too tedious
|
||||
deny = ["warnings"]
|
||||
quiet = false
|
||||
3
.github/workflows/approve_label.yml
vendored
3
.github/workflows/approve_label.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Add labels in pull request
|
||||
name: PR label manager
|
||||
name: approve_label
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -11,6 +11,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
trigger-tests:
|
||||
name: approve_label/trigger-tests
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Run backward compatibility tests
|
||||
name: Backward compatibility Tests on CPU
|
||||
name: aws_tfhe_backward_compat_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -22,13 +22,16 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (backward-compat-tests)
|
||||
name: aws_tfhe_backward_compat_tests/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
@@ -53,11 +56,11 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
backward-compat-tests:
|
||||
name: Backward compatibility tests
|
||||
name: aws_tfhe_backward_compat_tests/backward-compat-tests (bpr)
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
@@ -123,7 +126,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (backward-compat-tests)
|
||||
name: aws_tfhe_backward_compat_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, backward-compat-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
9
.github/workflows/aws_tfhe_fast_tests.yml
vendored
9
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run a small subset of tests to ensure quick feedback.
|
||||
name: Fast AWS Tests on CPU
|
||||
name: aws_tfhe_fast_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -29,6 +29,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: aws_tfhe_fast_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -68,7 +69,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
@@ -132,7 +133,7 @@ jobs:
|
||||
echo "any_changed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (fast-tests)
|
||||
name: aws_tfhe_fast_tests/setup-instance
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name != 'workflow_dispatch' && needs.should-run.outputs.any_file_changed == 'true')
|
||||
needs: should-run
|
||||
@@ -288,7 +289,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (fast-tests)
|
||||
name: aws_tfhe_fast_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, fast-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
11
.github/workflows/aws_tfhe_integer_tests.yml
vendored
11
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: AWS Unsigned Integer Tests on CPU
|
||||
name: aws_tfhe_integer_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -35,6 +35,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: aws_tfhe_integer_tests/should-run
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
|
||||
@@ -55,7 +56,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
integer:
|
||||
@@ -69,7 +70,7 @@ jobs:
|
||||
- .github/workflows/aws_tfhe_integer_tests.yml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (unsigned-integer-tests)
|
||||
name: aws_tfhe_integer_tests/setup-instance
|
||||
needs: should-run
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
|
||||
@@ -100,7 +101,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
unsigned-integer-tests:
|
||||
name: Unsigned integer tests
|
||||
name: aws_tfhe_integer_tests/unsigned-integer-tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
@@ -156,7 +157,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (unsigned-integer-tests)
|
||||
name: aws_tfhe_integer_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [setup-instance, unsigned-integer-tests]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
8
.github/workflows/aws_tfhe_noise_checks.yml
vendored
8
.github/workflows/aws_tfhe_noise_checks.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Run noise checks on CPU
|
||||
name: aws_tfhe_noise_checks
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -25,7 +25,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (noise-checks)
|
||||
name: aws_tfhe_noise_checks/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
exit 1
|
||||
|
||||
noise-checks:
|
||||
name: CPU noise checks
|
||||
name: aws_tfhe_noise_checks/noise-checks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Noise checks tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (noise-checks)
|
||||
name: aws_tfhe_noise_checks/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, noise-checks ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
name: AWS Signed Integer Tests on CPU
|
||||
name: aws_tfhe_signed_integer_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -35,6 +35,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: aws_tfhe_signed_integer_tests/should-run
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
@@ -56,7 +57,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
integer:
|
||||
@@ -70,7 +71,7 @@ jobs:
|
||||
- .github/workflows/aws_tfhe_signed_integer_tests.yml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (unsigned-integer-tests)
|
||||
name: aws_tfhe_signed_integer_tests/setup-instance
|
||||
needs: should-run
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
|
||||
@@ -101,7 +102,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
signed-integer-tests:
|
||||
name: Signed integer tests
|
||||
name: aws_tfhe_signed_integer_tests/signed-integer-tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
@@ -161,7 +162,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (signed-integer-tests)
|
||||
name: aws_tfhe_signed_integer_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [setup-instance, signed-integer-tests]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
11
.github/workflows/aws_tfhe_tests.yml
vendored
11
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: AWS Tests on CPU
|
||||
name: aws_tfhe_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -32,6 +32,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: aws_tfhe_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -77,7 +78,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
@@ -141,7 +142,7 @@ jobs:
|
||||
echo "any_changed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cpu-tests)
|
||||
name: aws_tfhe_tests/setup-instance
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.any_file_changed == 'true')
|
||||
needs: should-run
|
||||
@@ -169,7 +170,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cpu-tests:
|
||||
name: CPU tests
|
||||
name: aws_tfhe_tests/cpu-tests
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
needs: [ should-run, setup-instance ]
|
||||
@@ -268,7 +269,7 @@ jobs:
|
||||
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cpu-tests)
|
||||
name: aws_tfhe_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cpu-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
8
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
8
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: AWS WASM Tests on CPU
|
||||
name: aws_tfhe_wasm_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -28,7 +28,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (wasm-tests)
|
||||
name: aws_tfhe_wasm_tests/setup-instance
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
wasm-tests:
|
||||
name: WASM tests
|
||||
name: aws_tfhe_wasm_tests/wasm-tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (wasm-tests)
|
||||
name: aws_tfhe_wasm_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, wasm-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
8
.github/workflows/benchmark_boolean.yml
vendored
8
.github/workflows/benchmark_boolean.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run boolean benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Boolean benchmarks
|
||||
name: benchmark_boolean
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -23,7 +23,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (boolean-benchmarks)
|
||||
name: benchmark_boolean/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -42,7 +42,7 @@ jobs:
|
||||
profile: bench
|
||||
|
||||
boolean-benchmarks:
|
||||
name: Execute boolean benchmarks in EC2
|
||||
name: benchmark_boolean/boolean-benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
@@ -132,7 +132,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Boolean benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (boolean-benchmarks)
|
||||
name: benchmark_boolean/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, boolean-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
8
.github/workflows/benchmark_core_crypto.yml
vendored
8
.github/workflows/benchmark_core_crypto.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run core crypto benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Core crypto benchmarks
|
||||
name: benchmark_core_crypto
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -23,7 +23,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (core-crypto-benchmarks)
|
||||
name: benchmark_core_crypto/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -42,7 +42,7 @@ jobs:
|
||||
profile: bench
|
||||
|
||||
core-crypto-benchmarks:
|
||||
name: Execute core crypto benchmarks in EC2
|
||||
name: benchmark_core_crypto/core-crypto-benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
@@ -125,7 +125,7 @@ jobs:
|
||||
SLACK_MESSAGE: "PBS benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (core-crypto-benchmarks)
|
||||
name: benchmark_core_crypto/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, core-crypto-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
152
.github/workflows/benchmark_ct_key_sizes.yml
vendored
Normal file
152
.github/workflows/benchmark_ct_key_sizes.yml
vendored
Normal file
@@ -0,0 +1,152 @@
|
||||
# Run sizes benchmarks on an instance and return parsed results to Slab CI bot.
|
||||
name: Ciphertext and Keys sizes benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Monthly benchmarks will be triggered each 24th of the month at 1a.m.
|
||||
- cron: '0 1 24 * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (sizes-benchmarks)
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-big
|
||||
|
||||
sizes-benchmarks:
|
||||
name: Execute sizes client benchmarks
|
||||
needs: setup-instance
|
||||
if: needs.setup-instance.result != 'skipped'
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Measure public key and ciphertext sizes in HL Api
|
||||
run: |
|
||||
make measure_hlapi_compact_pk_ct_sizes
|
||||
|
||||
- name: Parse key and ciphertext sizes results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/hlapi_ct_key_sizes.csv "${RESULTS_FILENAME}" \
|
||||
--database tfhe_rs \
|
||||
--hardware "m6i.32xlarge" \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch "${REF_NAME}" \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${BENCH_DATE}" \
|
||||
--object-sizes
|
||||
env:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Measure key sizes in shortint
|
||||
run: |
|
||||
make measure_shortint_key_sizes
|
||||
|
||||
- name: Parse key sizes results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/shortint_key_sizes.csv "${RESULTS_FILENAME}" \
|
||||
--object-sizes \
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_ct_key_sizes
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Sizes benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (sizes-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, sizes-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (sizes-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
8
.github/workflows/benchmark_dex.yml
vendored
8
.github/workflows/benchmark_dex.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run all DEX benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: DEX benchmarks
|
||||
name: benchmark_dex
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -22,7 +22,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (dex-benchmarks)
|
||||
name: benchmark_dex/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
profile: bench
|
||||
|
||||
dex-benchmarks:
|
||||
name: Execute DEX benchmarks
|
||||
name: benchmark_dex/dex-benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
SLACK_MESSAGE: "DEX benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (dex-benchmarks)
|
||||
name: benchmark_dex/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, dex-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
8
.github/workflows/benchmark_erc20.yml
vendored
8
.github/workflows/benchmark_erc20.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run all ERC20 benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: ERC20 benchmarks
|
||||
name: benchmark_erc20
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -23,7 +23,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (erc20-benchmarks)
|
||||
name: benchmark_erc20/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -42,7 +42,7 @@ jobs:
|
||||
profile: bench
|
||||
|
||||
erc20-benchmarks:
|
||||
name: Execute ERC20 benchmarks
|
||||
name: benchmark_erc20/erc20-benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
@@ -129,7 +129,7 @@ jobs:
|
||||
SLACK_MESSAGE: "ERC20 benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (erc20-benchmarks)
|
||||
name: benchmark_erc20/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, erc20-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
5
.github/workflows/benchmark_gpu.yml
vendored
5
.github/workflows/benchmark_gpu.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run CUDA benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
|
||||
name: Cuda benchmarks
|
||||
name: benchmark_gpu
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -66,6 +66,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
parse-inputs:
|
||||
name: benchmark_gpu/parse-inputs
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
profile: ${{ steps.parse_profile.outputs.profile }}
|
||||
@@ -90,7 +91,7 @@ jobs:
|
||||
echo "name=${NAME}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
run-benchmarks:
|
||||
name: Run benchmarks
|
||||
name: benchmark_gpu/run-benchmarks
|
||||
needs: parse-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
|
||||
8
.github/workflows/benchmark_gpu_4090.yml
vendored
8
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run benchmarks on an RTX 4090 machine and return parsed results to Slab CI bot.
|
||||
name: TFHE Cuda Backend - 4090 benchmarks
|
||||
name: benchmark_gpu_4090
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -27,7 +27,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
cuda-integer-benchmarks:
|
||||
name: Cuda integer benchmarks (RTX 4090)
|
||||
name: benchmark_gpu_4090/cuda-integer-benchmarks
|
||||
if: ${{ github.event_name == 'workflow_dispatch' ||
|
||||
github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs' ||
|
||||
contains(github.event.label.name, '4090_bench') }}
|
||||
@@ -111,7 +111,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
cuda-core-crypto-benchmarks:
|
||||
name: Cuda core crypto benchmarks (RTX 4090)
|
||||
name: benchmark_gpu_4090/cuda-core-crypto-benchmarks
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || contains(github.event.label.name, '4090_bench') }}
|
||||
needs: cuda-integer-benchmarks
|
||||
concurrency:
|
||||
@@ -195,7 +195,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
remove_github_label:
|
||||
name: Remove 4090 bench label
|
||||
name: benchmark_gpu_4090/remove_github_label
|
||||
if: ${{ always() && github.event_name == 'pull_request' }}
|
||||
needs: [cuda-integer-benchmarks, cuda-core-crypto-benchmarks]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
14
.github/workflows/benchmark_gpu_common.yml
vendored
14
.github/workflows/benchmark_gpu_common.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run benchmarks on CUDA instance and return parsed results to Slab CI bot.
|
||||
name: Cuda benchmarks - common
|
||||
name: benchmark_gpu_common
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -63,7 +63,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
prepare-matrix:
|
||||
name: Prepare operations matrix
|
||||
name: benchmark_gpu_common/prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
command: ${{ steps.set_command.outputs.command }}
|
||||
@@ -141,7 +141,7 @@ jobs:
|
||||
echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-${{ inputs.profile }}-benchmarks)
|
||||
name: benchmark_gpu_common/setup-instance
|
||||
needs: prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -185,7 +185,7 @@ jobs:
|
||||
|
||||
# Install dependencies only once since cuda-benchmarks uses a matrix strategy, thus running multiple times.
|
||||
install-dependencies:
|
||||
name: Install dependencies
|
||||
name: benchmark_gpu_common/install-dependencies
|
||||
needs: [ setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -210,7 +210,7 @@ jobs:
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
cuda-benchmarks:
|
||||
name: Cuda benchmarks (${{ inputs.profile }})
|
||||
name: benchmark_gpu_common/cuda-benchmarks
|
||||
needs: [ prepare-matrix, setup-instance, install-dependencies ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
@@ -329,7 +329,7 @@ jobs:
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: benchmark_gpu_common/slack-notify
|
||||
needs: [ setup-instance, cuda-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-benchmarks.result != 'skipped' && failure() }}
|
||||
@@ -342,7 +342,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Cuda benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-${{ inputs.profile }}-benchmarks)
|
||||
name: benchmark_gpu_common/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
5
.github/workflows/benchmark_gpu_dex.yml
vendored
5
.github/workflows/benchmark_gpu_dex.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run CUDA DEX benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
|
||||
name: Cuda DEX benchmarks
|
||||
name: benchmark_gpu_dex/
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -23,6 +23,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
parse-inputs:
|
||||
name: benchmark_gpu_dex/parse-inputs
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
profile: ${{ steps.parse_profile.outputs.profile }}
|
||||
@@ -47,7 +48,7 @@ jobs:
|
||||
echo "name=${NAME}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
run-benchmarks:
|
||||
name: Run benchmarks
|
||||
name: benchmark_gpu_dex/run-benchmarks
|
||||
needs: parse-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_dex_common.yml
|
||||
with:
|
||||
|
||||
10
.github/workflows/benchmark_gpu_dex_common.yml
vendored
10
.github/workflows/benchmark_gpu_dex_common.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run DEX benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Cuda DEX benchmarks - common
|
||||
name: benchmark_gpu_dex_common
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -47,7 +47,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-dex-benchmarks)
|
||||
name: benchmark_gpu_dex_common/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -91,7 +91,7 @@ jobs:
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-dex-benchmarks:
|
||||
name: Cuda DEX benchmarks (${{ inputs.profile }})
|
||||
name: benchmark_gpu_dex_common/cuda-dex-benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -177,7 +177,7 @@ jobs:
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: benchmark_gpu_dex_common/slack-notify
|
||||
needs: [ setup-instance, cuda-dex-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-dex-benchmarks.result != 'skipped' && failure() }}
|
||||
@@ -190,7 +190,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Cuda DEX benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-dex-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-dex-${{ inputs.profile }}-benchmarks)
|
||||
name: benchmark_gpu_dex_common/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-dex-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Run CUDA DEX benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot.
|
||||
name: Cuda DEX weekly benchmarks
|
||||
name: benchmark_gpu_dex_weekly
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@@ -10,7 +10,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-benchmarks-1-h100:
|
||||
name: Run benchmarks (1xH100)
|
||||
name: benchmark_gpu_dex_weekly/run-benchmarks-1-h100
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_dex_common.yml
|
||||
with:
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-2-h100:
|
||||
name: Run benchmarks (2xH100)
|
||||
name: benchmark_gpu_dex_weekly/run-benchmarks-2-h100
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_dex_common.yml
|
||||
with:
|
||||
@@ -44,7 +44,7 @@ jobs:
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-8-h100:
|
||||
name: Run benchmarks (8xH100)
|
||||
name: benchmark_gpu_dex_weekly/run-benchmarks-8-h100
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_dex_common.yml
|
||||
with:
|
||||
|
||||
5
.github/workflows/benchmark_gpu_erc20.yml
vendored
5
.github/workflows/benchmark_gpu_erc20.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run CUDA ERC20 benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
|
||||
name: Cuda ERC20 benchmarks
|
||||
name: benchmark_gpu_erc20
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -24,6 +24,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
parse-inputs:
|
||||
name: benchmark_gpu_erc20/parse-inputs
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
profile: ${{ steps.parse_profile.outputs.profile }}
|
||||
@@ -48,7 +49,7 @@ jobs:
|
||||
echo "name=${NAME}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
run-benchmarks:
|
||||
name: Run benchmarks
|
||||
name: benchmark_gpu_erc20/run-benchmarks
|
||||
needs: parse-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
|
||||
with:
|
||||
|
||||
10
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
10
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Cuda ERC20 benchmarks - common
|
||||
name: benchmark_gpu_erc20_common
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
@@ -48,7 +48,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-erc20-benchmarks)
|
||||
name: benchmark_gpu_erc20_common/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -92,7 +92,7 @@ jobs:
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-erc20-benchmarks:
|
||||
name: Cuda ERC20 benchmarks (${{ inputs.profile }})
|
||||
name: benchmark_gpu_erc20_common/cuda-erc20-benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -178,7 +178,7 @@ jobs:
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: benchmark_gpu_erc20_common/slack-notify
|
||||
needs: [ setup-instance, cuda-erc20-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-erc20-benchmarks.result != 'skipped' && failure() }}
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Cuda ERC20 benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-erc20-${{ inputs.profile }}-benchmarks)
|
||||
name: benchmark_gpu_erc20_common/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Run CUDA ERC20 benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot.
|
||||
name: Cuda ERC20 weekly benchmarks
|
||||
name: benchmark_gpu_erc20_weekly
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@@ -11,7 +11,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-benchmarks-1-h100:
|
||||
name: Run benchmarks (1xH100)
|
||||
name: benchmark_gpu_erc20_weekly/run-benchmarks-1-h100
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
|
||||
with:
|
||||
@@ -28,7 +28,7 @@ jobs:
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-2-h100:
|
||||
name: Run benchmarks (2xH100)
|
||||
name: benchmark_gpu_erc20_weekly/run-benchmarks-2-h100
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
|
||||
with:
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-8-h100:
|
||||
name: Run benchmarks (8xH100)
|
||||
name: benchmark_gpu_erc20_weekly/run-benchmarks-8-h100
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
|
||||
with:
|
||||
|
||||
12
.github/workflows/benchmark_gpu_weekly.yml
vendored
12
.github/workflows/benchmark_gpu_weekly.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run CUDA benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot.
|
||||
name: Cuda weekly benchmarks
|
||||
name: benchmark_gpu_weekly
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@@ -11,7 +11,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
run-benchmarks-8-h100-sxm5-integer:
|
||||
name: Run integer benchmarks (8xH100-SXM5)
|
||||
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -32,7 +32,7 @@ jobs:
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-8-h100-sxm5-integer-compression:
|
||||
name: Run integer compression benchmarks (8xH100-SXM5)
|
||||
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer-compression
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -53,7 +53,7 @@ jobs:
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-8-h100-sxm5-integer-zk:
|
||||
name: Run integer zk benchmarks (8xH100-SXM5)
|
||||
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer-zk
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-8-h100-sxm5-noise-squash:
|
||||
name: Run integer zk benchmarks (8xH100-SXM5)
|
||||
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-noise-squash
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -95,7 +95,7 @@ jobs:
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-1-h100-core-crypto:
|
||||
name: Run core-crypto benchmarks (1xH100)
|
||||
name: benchmark_gpu_weekly/run-benchmarks-1-h100-core-crypto (1xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
|
||||
13
.github/workflows/benchmark_hpu_integer.yml
vendored
13
.github/workflows/benchmark_hpu_integer.yml
vendored
@@ -1,9 +1,12 @@
|
||||
# Run all integer benchmarks on a permanent HPU instance and return parsed results to Slab CI bot.
|
||||
name: Hpu Integer Benchmarks
|
||||
name: benchmark_hpu_integer
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
all_precisions:
|
||||
description: "Run all precisions"
|
||||
type: boolean
|
||||
bench_type:
|
||||
description: "Benchmarks type"
|
||||
type: choice
|
||||
@@ -19,6 +22,7 @@ env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
FAST_BENCH: TRUE
|
||||
|
||||
permissions: {}
|
||||
|
||||
@@ -52,7 +56,7 @@ jobs:
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
integer-benchmarks-hpu:
|
||||
name: Execute integer & erc20 benchmarks for HPU backend
|
||||
name: benchmark_hpu_integer/integer-benchmarks-hpu
|
||||
needs: prepare-matrix
|
||||
runs-on: v80-desktop
|
||||
concurrency:
|
||||
@@ -102,6 +106,11 @@ jobs:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
run: |
|
||||
echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make pull_hpu_files
|
||||
|
||||
10
.github/workflows/benchmark_integer.yml
vendored
10
.github/workflows/benchmark_integer.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run all integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Integer benchmarks
|
||||
name: benchmark_integer
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -41,7 +41,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
prepare-matrix:
|
||||
name: Prepare operations matrix
|
||||
name: benchmark_integer/prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (integer-benchmarks)
|
||||
name: benchmark_integer/setup-instance
|
||||
needs: prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
profile: bench
|
||||
|
||||
integer-benchmarks:
|
||||
name: Execute integer benchmarks for all operations flavor
|
||||
name: benchmark_integer/integer-benchmarks
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
@@ -211,7 +211,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Integer full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (integer-benchmarks)
|
||||
name: benchmark_integer/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, integer-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
298
.github/workflows/benchmark_perf_regression.yml
vendored
Normal file
298
.github/workflows/benchmark_perf_regression.yml
vendored
Normal file
@@ -0,0 +1,298 @@
|
||||
# Run performance regression benchmarks and return parsed results to associated pull-request.
|
||||
name: benchmark_perf_regression
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [ created ]
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
permissions: { }
|
||||
|
||||
jobs:
|
||||
verify-actor:
|
||||
name: benchmark_perf_regression/verify-actor
|
||||
if: (github.event_name == 'pull_request' &&
|
||||
(contains(github.event.label.name, 'bench-perfs-cpu') ||
|
||||
contains(github.event.label.name, 'bench-perfs-gpu'))) ||
|
||||
(github.event.issue.pull_request && startsWith(github.event.comment.body, '/bench'))
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
secrets:
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
prepare-benchmarks:
|
||||
name: benchmark_perf_regression/prepare-benchmarks
|
||||
needs: verify-actor
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
commands: ${{ steps.set_commands.outputs.commands }}
|
||||
slab-backend: ${{ steps.set_slab_details.outputs.backend }}
|
||||
slab-profile: ${{ steps.set_slab_details.outputs.profile }}
|
||||
hardware-name: ${{ steps.get_hardware_name.outputs.name }}
|
||||
custom-env: ${{ steps.get_custom_env.outputs.custom_env }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Generate cpu benchmarks command from label
|
||||
if: (github.event_name == 'pull_request' && contains(github.event.label.name, 'bench-perfs-cpu'))
|
||||
run: |
|
||||
echo "DEFAULT_BENCH_OPTIONS=--backend cpu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Generate cpu benchmarks command from label
|
||||
if: (github.event_name == 'pull_request' && contains(github.event.label.name, 'bench-perfs-gpu'))
|
||||
run: |
|
||||
echo "DEFAULT_BENCH_OPTIONS=--backend gpu" >> "${GITHUB_ENV}"
|
||||
|
||||
# TODO add support for HPU backend
|
||||
|
||||
- name: Generate cargo commands and env from label
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
python3 ci/perf_regression.py parse_profile --issue-comment "/bench ${DEFAULT_BENCH_OPTIONS}"
|
||||
echo "COMMANDS=$(cat ci/perf_regression_generated_commands.json)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Dump issue comment into file # To avoid possible code-injection
|
||||
if: github.event_name == 'issue_comment'
|
||||
run: |
|
||||
echo "${COMMENT_BODY}" >> dumped_comment.txt
|
||||
env:
|
||||
COMMENT_BODY: ${{ github.event.comment.body }}
|
||||
|
||||
- name: Generate cargo commands and env
|
||||
if: github.event_name == 'issue_comment'
|
||||
run: |
|
||||
python3 ci/perf_regression.py parse_profile --issue-comment "$(cat dumped_comment.txt)"
|
||||
echo "COMMANDS=$(cat ci/perf_regression_generated_commands.json)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set commands output
|
||||
id: set_commands
|
||||
run: | # zizmor: ignore[template-injection] this env variable is safe
|
||||
echo "commands=${{ toJSON(env.COMMANDS) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Set Slab details outputs
|
||||
id: set_slab_details
|
||||
run: |
|
||||
echo "backend=$(cat ci/perf_regression_slab_backend_config.txt)" >> "${GITHUB_OUTPUT}"
|
||||
echo "profile=$(cat ci/perf_regression_slab_profile_config.txt)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Get hardware name
|
||||
id: get_hardware_name
|
||||
run: | # zizmor: ignore[template-injection] these interpolations are safe
|
||||
HARDWARE_NAME=$(python3 ci/hardware_finder.py "${{ steps.set_slab_details.outputs.backend }}" "${{ steps.set_slab_details.outputs.profile }}");
|
||||
echo "name=${HARDWARE_NAME}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Get custom env vars
|
||||
id: get_custom_env
|
||||
run: |
|
||||
echo "custom_env=$(cat ci/perf_regression_custom_env.sh)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: benchmark_perf_regression/setup-instance
|
||||
needs: prepare-benchmarks
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: ${{ needs.prepare-benchmarks.outputs.slab-backend }}
|
||||
profile: ${{ needs.prepare-benchmarks.outputs.slab-profile }}
|
||||
|
||||
install-cuda-dependencies-if-required:
|
||||
name: benchmark_perf_regression/install-cuda-dependencies-if-required
|
||||
needs: [ prepare-benchmarks, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
matrix:
|
||||
# explicit include-based build matrix, of known valid options
|
||||
include:
|
||||
- cuda: "12.8"
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.prepare-benchmarks.outputs.slab-backend == 'hyperstack'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
regression-benchmarks:
|
||||
name: benchmark_perf_regression/regression-benchmarks
|
||||
needs: [ prepare-benchmarks, setup-instance, install-cuda-dependencies-if-required ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}_${{ needs.prepare-benchmarks.outputs.slab-backend }}_${{ needs.prepare-benchmarks.outputs.slab-profile }}
|
||||
cancel-in-progress: true
|
||||
timeout-minutes: 720 # 12 hours
|
||||
strategy:
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
command: ${{ fromJson(needs.prepare-benchmarks.outputs.commands) }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Export custom env variables
|
||||
run: | # zizmor: ignore[template-injection] this env variable is safe
|
||||
{
|
||||
${{ needs.prepare-benchmarks.outputs.custom-env }}
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
# Re-export environment variables as dependencies setup perform this task in the previous job.
|
||||
# Local env variables are cleaned at the end of each job.
|
||||
- name: Export CUDA variables
|
||||
if: needs.prepare-benchmarks.outputs.slab-backend == 'hyperstack'
|
||||
shell: bash
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDA_MODULE_LOADER=EAGER" >> "${GITHUB_ENV}"
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-12.8
|
||||
|
||||
- name: Export gcc and g++ variables
|
||||
if: needs.prepare-benchmarks.outputs.slab-backend == 'hyperstack'
|
||||
shell: bash
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
||||
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
GCC_VERSION: 11
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Run regression benchmarks
|
||||
run: |
|
||||
make BENCH_CUSTOM_COMMAND="${BENCH_COMMAND}" bench_custom
|
||||
env:
|
||||
BENCH_COMMAND: ${{ matrix.command }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
|
||||
--database tfhe_rs \
|
||||
--hardware "${HARDWARE_NAME}" \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch "${REF_NAME}" \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${BENCH_DATE}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix regression \
|
||||
--bench-type "${BENCH_TYPE}"
|
||||
env:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
BENCH_TYPE: ${{ env.__TFHE_RS_BENCH_TYPE }}
|
||||
HARDWARE_NAME: ${{ needs.prepare-benchmarks.outputs.hardware-name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_regression
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: benchmark_perf_regression/slack-notify
|
||||
needs: [ prepare-benchmarks, setup-instance, regression-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.regression-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Performance regression benchmarks finished with status: ${{ needs.regression-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
# TODO Add job for regression calculation
|
||||
|
||||
teardown-instance:
|
||||
name: benchmark_perf_regression/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, regression-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (regression-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
23
.github/workflows/benchmark_shortint.yml
vendored
23
.github/workflows/benchmark_shortint.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run all shortint benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Shortint full benchmarks
|
||||
name: benchmark_shortint
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -27,7 +27,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
prepare-matrix:
|
||||
name: Prepare operations matrix
|
||||
name: benchmark_shortint/prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (shortint-benchmarks)
|
||||
name: benchmark_shortint/setup-instance
|
||||
needs: prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -69,7 +69,7 @@ jobs:
|
||||
profile: bench
|
||||
|
||||
shortint-benchmarks:
|
||||
name: Execute shortint benchmarks for all operations flavor
|
||||
name: benchmark_shortint/shortint-benchmarks
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
@@ -131,19 +131,6 @@ jobs:
|
||||
env:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
# This small benchmark needs to be executed only once.
|
||||
- name: Measure key sizes
|
||||
if: matrix.op_flavor == 'default'
|
||||
run: |
|
||||
make measure_shortint_key_sizes
|
||||
|
||||
- name: Parse key sizes results
|
||||
if: matrix.op_flavor == 'default'
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/shortint_key_sizes.csv "${RESULTS_FILENAME}" \
|
||||
--object-sizes \
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
@@ -168,7 +155,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Shortint full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (shortint-benchmarks)
|
||||
name: benchmark_shortint/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, shortint-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
10
.github/workflows/benchmark_signed_integer.yml
vendored
10
.github/workflows/benchmark_signed_integer.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run all signed integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Signed Integer full benchmarks
|
||||
name: benchmark_signed_integer
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -41,7 +41,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
prepare-matrix:
|
||||
name: Prepare operations matrix
|
||||
name: benchmark_signed_integer/prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (signed-integer-benchmarks)
|
||||
name: benchmark_signed_integer/setup-instance
|
||||
needs: prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
profile: bench
|
||||
|
||||
signed-integer-benchmarks:
|
||||
name: Execute signed integer benchmarks for all operations flavor
|
||||
name: benchmark_signed_integer/signed-integer-benchmarks
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
@@ -203,7 +203,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Signed integer full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (integer-benchmarks)
|
||||
name: benchmark_signed_integer/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, signed-integer-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
24
.github/workflows/benchmark_tfhe_fft.yml
vendored
24
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run FFT benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: FFT benchmarks
|
||||
name: benchmark_tfhe_fft
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -27,8 +27,8 @@ on:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
setup-ec2:
|
||||
name: Setup EC2 instance (fft-benchmarks)
|
||||
setup-instance:
|
||||
name: benchmark_tfhe_fft/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
@@ -45,12 +45,12 @@ jobs:
|
||||
profile: bench
|
||||
|
||||
fft-benchmarks:
|
||||
name: Execute FFT benchmarks in EC2
|
||||
needs: setup-ec2
|
||||
name: benchmark_tfhe_fft/fft-benchmarks
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
||||
@@ -124,10 +124,10 @@ jobs:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-fft benchmarks failed. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-ec2:
|
||||
name: Teardown EC2 instance (fft-benchmarks)
|
||||
if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
|
||||
needs: [ setup-ec2, fft-benchmarks ]
|
||||
teardown-instance:
|
||||
name: benchmark_tfhe_fft/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, fft-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
@@ -138,7 +138,7 @@ jobs:
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
@@ -146,4 +146,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (fft-benchmarks) failed. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (fft-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
24
.github/workflows/benchmark_tfhe_ntt.yml
vendored
24
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run NTT benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: NTT benchmarks
|
||||
name: benchmark_tfhe_ntt
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -27,8 +27,8 @@ on:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
setup-ec2:
|
||||
name: Setup EC2 instance (ntt-benchmarks)
|
||||
setup-instance:
|
||||
name: benchmark_tfhe_ntt/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
@@ -45,12 +45,12 @@ jobs:
|
||||
profile: bench
|
||||
|
||||
ntt-benchmarks:
|
||||
name: Execute NTT benchmarks in EC2
|
||||
needs: setup-ec2
|
||||
name: benchmark_tfhe_ntt/ntt-benchmarks
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
||||
@@ -124,10 +124,10 @@ jobs:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-ntt benchmarks failed. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-ec2:
|
||||
name: Teardown EC2 instance (ntt-benchmarks)
|
||||
if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
|
||||
needs: [setup-ec2, ntt-benchmarks]
|
||||
teardown-instance:
|
||||
name: benchmark_tfhe_ntt/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [setup-instance, ntt-benchmarks]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
@@ -138,7 +138,7 @@ jobs:
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
@@ -146,4 +146,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (ntt-benchmarks) failed. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "EC2 teardown (ntt-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
11
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
11
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run benchmarks of the tfhe-zk-pok crate on an instance and return parsed results to Slab CI bot.
|
||||
name: tfhe-zk-pok benchmarks
|
||||
name: benchmark_tfhe_zk_pok
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -35,6 +35,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: benchmark_tfhe_zk_pok/should-run
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
((github.event_name == 'push' || github.event_name == 'schedule') && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -50,7 +51,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
zk_pok:
|
||||
@@ -58,7 +59,7 @@ jobs:
|
||||
- .github/workflows/benchmark_tfhe_zk_pok.yml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (tfhe-zk-pok-benchmarks)
|
||||
name: benchmark_tfhe_zk_pok/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
needs: should-run
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
@@ -81,7 +82,7 @@ jobs:
|
||||
profile: bench
|
||||
|
||||
tfhe-zk-pok-benchmarks:
|
||||
name: Execute tfhe-zk-pok benchmarks
|
||||
name: benchmark_tfhe_zk_pok/tfhe-zk-pok-benchmarks
|
||||
if: needs.setup-instance.result != 'skipped'
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
@@ -173,7 +174,7 @@ jobs:
|
||||
SLACK_MESSAGE: "tfhe-zk-pok benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (tfhe-zk-pok-benchmarks)
|
||||
name: benchmark_tfhe_zk_pok/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, tfhe-zk-pok-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
24
.github/workflows/benchmark_wasm_client.yml
vendored
24
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run WASM client benchmarks on an instance and return parsed results to Slab CI bot.
|
||||
name: WASM client benchmarks
|
||||
name: benchmark_wasm_client
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -26,6 +26,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: benchmark_wasm_client/should-run
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
@@ -44,7 +45,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
wasm_bench:
|
||||
@@ -57,7 +58,7 @@ jobs:
|
||||
- .github/workflows/wasm_client_benchmark.yml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (wasm-client-benchmarks)
|
||||
name: benchmark_wasm_client/setup-instance
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.wasm_bench)
|
||||
@@ -78,7 +79,7 @@ jobs:
|
||||
profile: cpu-small
|
||||
|
||||
wasm-client-benchmarks:
|
||||
name: Execute WASM client benchmarks
|
||||
name: benchmark_wasm_client/wasm-client-benchmarks
|
||||
needs: setup-instance
|
||||
if: needs.setup-instance.result != 'skipped'
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
@@ -165,19 +166,6 @@ jobs:
|
||||
env:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
# Run these benchmarks only once
|
||||
- name: Measure public key and ciphertext sizes in HL Api
|
||||
if: matrix.browser == 'chrome'
|
||||
run: |
|
||||
make measure_hlapi_compact_pk_ct_sizes
|
||||
|
||||
- name: Parse key and ciphertext sizes results
|
||||
if: matrix.browser == 'chrome'
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/hlapi_cpk_and_cctl_sizes.csv "${RESULTS_FILENAME}" \
|
||||
--key-gen \
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
@@ -210,7 +198,7 @@ jobs:
|
||||
SLACK_MESSAGE: "WASM benchmarks (${{ matrix.browser }}) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (wasm-client-benchmarks)
|
||||
name: benchmark_wasm_client/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, wasm-client-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
13
.github/workflows/benchmark_zk_pke.yml
vendored
13
.github/workflows/benchmark_zk_pke.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Run PKE Zero-Knowledge benchmarks on an instance and return parsed results to Slab CI bot.
|
||||
name: PKE ZK benchmarks
|
||||
name: benchmark_zk_pke
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -36,6 +36,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: benchmark_zk_pke/should-run
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
((github.event_name == 'push' || github.event_name == 'schedule') && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -51,7 +52,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
zk_pok:
|
||||
@@ -67,7 +68,7 @@ jobs:
|
||||
- .github/workflows/zk_pke_benchmark.yml
|
||||
|
||||
prepare-matrix:
|
||||
name: Prepare operations matrix
|
||||
name: benchmark_zk_pke/prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
@@ -96,7 +97,7 @@ jobs:
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (pke-zk-benchmarks)
|
||||
name: benchmark_zk_pke/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ should-run, prepare-matrix ]
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
@@ -119,7 +120,7 @@ jobs:
|
||||
profile: bench
|
||||
|
||||
pke-zk-benchmarks:
|
||||
name: Execute PKE ZK benchmarks
|
||||
name: benchmark_zk_pke/pke-zk-benchmarks
|
||||
if: needs.setup-instance.result != 'skipped'
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
concurrency:
|
||||
@@ -223,7 +224,7 @@ jobs:
|
||||
SLACK_MESSAGE: "PKE ZK benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (pke-zk-benchmarks)
|
||||
name: benchmark_zk_pke/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, pke-zk-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
40
.github/workflows/cargo_audit.yml
vendored
Normal file
40
.github/workflows/cargo_audit.yml
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
# Run cargo audit
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# runs every day at 4am UTC
|
||||
- cron: '0 4 * * *'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
audit:
|
||||
name: cargo_audit/audit
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Audit dependencies
|
||||
run: |
|
||||
make audit_dependencies
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "cargo-audit finished with status: ${{ job.status }}. ([action run](${{ env.ACTION_RUN_URL }}))"
|
||||
3
.github/workflows/cargo_build.yml
vendored
3
.github/workflows/cargo_build.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Cargo Build TFHE-rs
|
||||
name: cargo_build
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -19,6 +19,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
cargo-builds:
|
||||
name: cargo_build/cargo-builds (bpr)
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
strategy:
|
||||
|
||||
3
.github/workflows/cargo_build_tfhe_fft.yml
vendored
3
.github/workflows/cargo_build_tfhe_fft.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Build tfhe-fft
|
||||
name: Cargo Build tfhe-fft
|
||||
name: cargo_build_tfhe_fft
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -17,6 +17,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
cargo-builds-fft:
|
||||
name: cargo_build_tfhe_fft/cargo-builds-fft (bpr)
|
||||
runs-on: ${{ matrix.runner_type }}
|
||||
|
||||
strategy:
|
||||
|
||||
3
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
3
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Build tfhe-ntt
|
||||
name: Cargo Build tfhe-ntt
|
||||
name: cargo_build_tfhe_ntt
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -17,6 +17,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
cargo-builds-ntt:
|
||||
name: cargo_build_tfhe_ntt/cargo-builds-ntt (bpr)
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
|
||||
9
.github/workflows/cargo_test_fft.yml
vendored
9
.github/workflows/cargo_test_fft.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Test tfhe-fft
|
||||
name: Cargo Test tfhe-fft
|
||||
name: cargo_test_fft
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -21,6 +21,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: cargo_test_fft/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -36,7 +37,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
fft:
|
||||
@@ -46,6 +47,7 @@ jobs:
|
||||
- '.github/workflows/cargo_test_fft.yml'
|
||||
|
||||
cargo-tests-fft:
|
||||
name: cargo_test_fft/cargo-tests-fft
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.fft_test == 'true'
|
||||
runs-on: ${{ matrix.runner_type }}
|
||||
@@ -77,6 +79,7 @@ jobs:
|
||||
make test_fft_no_std
|
||||
|
||||
cargo-tests-fft-nightly:
|
||||
name: cargo_test_fft/cargo-tests-fft-nightly
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.fft_test == 'true'
|
||||
runs-on: ${{ matrix.runner_type }}
|
||||
@@ -104,6 +107,7 @@ jobs:
|
||||
make test_fft_no_std_nightly
|
||||
|
||||
cargo-tests-fft-node-js:
|
||||
name: cargo_test_fft/cargo-tests-fft-node-js
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.fft_test == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
@@ -119,6 +123,7 @@ jobs:
|
||||
make test_fft_node_js_ci
|
||||
|
||||
cargo-tests-fft-successful:
|
||||
name: cargo_test_fft/cargo-tests-fft-successful (bpr)
|
||||
needs: [ should-run, cargo-tests-fft, cargo-tests-fft-nightly, cargo-tests-fft-node-js ]
|
||||
if: ${{ always() }}
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
11
.github/workflows/cargo_test_ntt.yml
vendored
11
.github/workflows/cargo_test_ntt.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Test tfhe-ntt
|
||||
name: Cargo Test tfhe-ntt
|
||||
name: cargo_test_ntt
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -22,6 +22,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: cargo_test_ntt/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -37,7 +38,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
ntt:
|
||||
@@ -47,6 +48,7 @@ jobs:
|
||||
- '.github/workflows/cargo_test_ntt.yml'
|
||||
|
||||
setup-instance:
|
||||
name: cargo_test_ntt/setup-instance
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.ntt_test == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
@@ -75,6 +77,7 @@ jobs:
|
||||
echo "matrix_os=[\"${INSTANCE_TO_USE}\", \"macos-latest\", \"windows-latest\"]" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cargo-tests-ntt:
|
||||
name: cargo_test_ntt/cargo-tests-ntt
|
||||
needs: [should-run, setup-instance]
|
||||
if: needs.should-run.outputs.ntt_test == 'true'
|
||||
runs-on: ${{ matrix.os }}
|
||||
@@ -101,6 +104,7 @@ jobs:
|
||||
run: make test_ntt_no_std
|
||||
|
||||
cargo-tests-ntt-nightly:
|
||||
name: cargo_test_ntt/cargo-tests-ntt-nightly
|
||||
needs: [should-run, setup-instance]
|
||||
if: needs.should-run.outputs.ntt_test == 'true'
|
||||
runs-on: ${{ matrix.os }}
|
||||
@@ -126,6 +130,7 @@ jobs:
|
||||
run: make test_ntt_no_std_nightly
|
||||
|
||||
cargo-tests-ntt-successful:
|
||||
name: cargo_test_ntt/cargo-tests-ntt-successful (bpr)
|
||||
needs: [should-run, cargo-tests-ntt, cargo-tests-ntt-nightly]
|
||||
if: ${{ always() }}
|
||||
runs-on: ubuntu-latest
|
||||
@@ -151,7 +156,7 @@ jobs:
|
||||
exit 1
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cargo-tests-ntt-successful)
|
||||
name: cargo_test_ntt/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [setup-instance, cargo-tests-ntt-successful]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
4
.github/workflows/check_commit.yml
vendored
4
.github/workflows/check_commit.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Check commit and PR compliance
|
||||
name: Check commit and PR compliance
|
||||
name: check_commit
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
@@ -7,7 +7,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
check-commit-pr:
|
||||
name: Check commit and PR
|
||||
name: check_commit/check-commit-pr (bpr)
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
4
.github/workflows/ci_lint.yml
vendored
4
.github/workflows/ci_lint.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Lint and check CI
|
||||
name: CI Lint and Checks
|
||||
name: ci_lint
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -14,7 +14,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
lint-check:
|
||||
name: Lint and checks
|
||||
name: ci_lint/lint-check (bpr)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
|
||||
20
.github/workflows/code_coverage.yml
vendored
20
.github/workflows/code_coverage.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Code Coverage
|
||||
name: code_coverage
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -22,7 +22,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (code-coverage)
|
||||
name: code_coverage/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
@@ -38,8 +38,8 @@ jobs:
|
||||
backend: aws
|
||||
profile: cpu-small
|
||||
|
||||
code-coverage:
|
||||
name: Code coverage tests
|
||||
code-coverage-tests:
|
||||
name: code_coverage/code-coverage-tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}_${{ github.event_name }}
|
||||
@@ -60,7 +60,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
tfhe:
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@fdcc8476540edceab3de004e990f80d881c6cc00
|
||||
uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -104,7 +104,7 @@ jobs:
|
||||
make test_integer_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@fdcc8476540edceab3de004e990f80d881c6cc00
|
||||
uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -121,9 +121,9 @@ jobs:
|
||||
SLACK_MESSAGE: "Code coverage finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (code-coverage)
|
||||
name: code_coverage/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, code-coverage ]
|
||||
needs: [ setup-instance, code-coverage-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
@@ -142,4 +142,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (code-coverage) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (code-coverage-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
87
.github/workflows/coprocessor-benchmark-gpu.yml
vendored
87
.github/workflows/coprocessor-benchmark-gpu.yml
vendored
@@ -1,8 +1,24 @@
|
||||
# Run all fhevm coprocessor benchmarks on a GPU instance on Hyperstack and return parsed results to Slab CI bot.
|
||||
name: Cuda Coprocessor benchmarks
|
||||
name: coprocessor-benchmark-gpu
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
profile:
|
||||
description: "Instance type"
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- "l40 (n3-L40x1)"
|
||||
- "4-l40 (n3-L40x4)"
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "4-h100 (n3-H100x4)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
- "multi-h100-sxm5_fallback (n3-H100x8-SXM5)"
|
||||
|
||||
schedule:
|
||||
# Weekly tests @ 1AM
|
||||
- cron: "0 1 * * 6"
|
||||
@@ -17,7 +33,9 @@ env:
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
PROFILE: "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
PROFILE_SCHEDULED_RUN: "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
PROFILE_MANUAL_RUN: ${{ inputs.profile }}
|
||||
IS_MANUAL_RUN: ${{ github.event_name == 'workflow_dispatch' }}
|
||||
BENCHMARK_TYPE: "ALL"
|
||||
OPTIMIZATION_TARGET: "throughput"
|
||||
BATCH_SIZE: "5000"
|
||||
@@ -40,15 +58,25 @@ jobs:
|
||||
- name: Parse profile
|
||||
id: parse_profile
|
||||
run: |
|
||||
if [[ ${IS_MANUAL_RUN} == true ]]; then
|
||||
PROFILE_RAW="${PROFILE_MANUAL_RUN}"
|
||||
else
|
||||
PROFILE_RAW="${PROFILE_SCHEDULED_RUN}"
|
||||
fi
|
||||
# shellcheck disable=SC2001
|
||||
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
|
||||
PROFILE_VAL=$(echo "${PROFILE_RAW}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
|
||||
echo "profile=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Parse hardware name
|
||||
id: parse_hardware_name
|
||||
run: |
|
||||
if [[ ${IS_MANUAL_RUN} == true ]]; then
|
||||
PROFILE_RAW="${PROFILE_MANUAL_RUN}"
|
||||
else
|
||||
PROFILE_RAW="${PROFILE}"
|
||||
fi
|
||||
# shellcheck disable=SC2001
|
||||
PROFILE_VAL=$(echo "${PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
|
||||
PROFILE_VAL=$(echo "${PROFILE_RAW}" | sed 's|.*[[:space:]](\(.*\))|\1|')
|
||||
echo "name=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
@@ -71,7 +99,7 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: ${{ needs.parse-inputs.outputs.profile }}
|
||||
|
||||
benchmark:
|
||||
benchmark-gpu:
|
||||
name: coprocessor-benchmark-gpu/benchmark-gpu (bpr)
|
||||
needs: [ parse-inputs, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
@@ -130,6 +158,13 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
working-directory: tfhe-rs/
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./tfhe-rs/.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Check fhEVM and TFHE-rs repos
|
||||
run: |
|
||||
pwd
|
||||
@@ -140,13 +175,6 @@ jobs:
|
||||
run: git lfs checkout
|
||||
working-directory: fhevm/
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./fhevm/.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
@@ -154,7 +182,7 @@ jobs:
|
||||
|
||||
- name: Install cargo dependencies
|
||||
run: |
|
||||
sudo apt-get install -y protobuf-compiler cmake pkg-config libssl-dev \
|
||||
sudo apt-get install -y protobuf-compiler pkg-config libssl-dev \
|
||||
libclang-dev docker-compose-v2 docker.io acl
|
||||
sudo usermod -aG docker "$USER"
|
||||
newgrp docker
|
||||
@@ -181,12 +209,19 @@ jobs:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Login to Chainguard Registry
|
||||
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
|
||||
with:
|
||||
registry: cgr.dev
|
||||
username: ${{ secrets.CGR_USERNAME }}
|
||||
password: ${{ secrets.CGR_PASSWORD }}
|
||||
|
||||
- name: Init database
|
||||
run: make init_db
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Use Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
|
||||
with:
|
||||
node-version: 20.x
|
||||
|
||||
@@ -203,8 +238,12 @@ jobs:
|
||||
|
||||
- name: Profile erc20 no-cmux benchmark on GPU
|
||||
run: |
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "profile_erc20_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" \
|
||||
FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" \
|
||||
BENCHMARK_TYPE="THROUGHPUT_200" \
|
||||
OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" \
|
||||
make -e "profile_erc20_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Get nsys profile name
|
||||
id: nsys_profile_name
|
||||
@@ -215,7 +254,7 @@ jobs:
|
||||
REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
|
||||
run: |
|
||||
mv report1.nsys-rep ${{ env.REPORT_NAME }}
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Upload profile artifact
|
||||
env:
|
||||
@@ -223,17 +262,17 @@ jobs:
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ env.REPORT_NAME }}
|
||||
path: fhevm/coprocessor/fhevm-engine/coprocessor/${{ env.REPORT_NAME }}
|
||||
path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}
|
||||
|
||||
- name: Run latency benchmark on GPU
|
||||
run: |
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Run throughput benchmarks on GPU
|
||||
run: |
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="THROUGHPUT_200" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
@@ -246,7 +285,7 @@ jobs:
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${BENCH_DATE}" \
|
||||
--walk-subdirs \
|
||||
--crate "coprocessor/fhevm-engine/coprocessor" \
|
||||
--crate "coprocessor/fhevm-engine/tfhe-worker" \
|
||||
--name-suffix "operation_batch_size_${BATCH_SIZE}-schedule_${SCHEDULING_POLICY}-optimization_target_${OPTIMIZATION_TARGET}"
|
||||
working-directory: fhevm/
|
||||
|
||||
@@ -273,9 +312,9 @@ jobs:
|
||||
--slab-url "${SLAB_URL}"
|
||||
|
||||
teardown-instance:
|
||||
name: coprocessor-benchmark-gpu/teardown
|
||||
name: coprocessor-benchmark-gpu/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, benchmark ]
|
||||
needs: [ setup-instance, benchmark-gpu ]
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: 'read'
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
name: CSPRNG randomness testing Workflow
|
||||
name: csprng_randomness_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -26,7 +26,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (csprng-randomness-tests)
|
||||
name: csprng_randomness_tests/setup-instance
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
csprng-randomness-tests:
|
||||
name: CSPRNG randomness tests
|
||||
name: csprng_randomness_tests/csprng-randomness-tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (csprng-randomness-tests)
|
||||
name: csprng_randomness_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, csprng-randomness-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
4
.github/workflows/gpu_4090_tests.yml
vendored
4
.github/workflows/gpu_4090_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an RTX 4090 machine
|
||||
name: Cuda - 4090 full tests
|
||||
name: gpu_4090_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -27,7 +27,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
cuda-tests-linux:
|
||||
name: CUDA tests (RTX 4090)
|
||||
name: gpu_4090_tests/cuda-tests-linux
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
contains(github.event.label.name, '4090_test') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
|
||||
10
.github/workflows/gpu_code_validation_tests.yml
vendored
10
.github/workflows/gpu_code_validation_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||
name: Cuda - CPU Memory Checks
|
||||
name: gpu_code_validation_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -31,7 +31,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-tests)
|
||||
name: gpu_code_validation_tests/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved')
|
||||
@@ -58,7 +58,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA Memory Checks tests
|
||||
name: gpu_code_validation_tests/cuda-tests-linux
|
||||
needs: [ setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
make test_high_level_api_gpu_valgrind
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_code_validation_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
@@ -127,7 +127,7 @@ jobs:
|
||||
SLACK_MESSAGE: "GPU Memory Checks tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
name: gpu_code_validation_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
13
.github/workflows/gpu_fast_h100_tests.yml
vendored
13
.github/workflows/gpu_fast_h100_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
|
||||
name: Cuda - Fast tests on H100
|
||||
name: gpu_fast_h100_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -30,6 +30,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_fast_h100_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -45,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -66,7 +67,7 @@ jobs:
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
name: gpu_fast_h100_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
@@ -108,7 +109,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 tests
|
||||
name: gpu_fast_h100_tests/cuda-tests-linux
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -165,7 +166,7 @@ jobs:
|
||||
BIG_TESTS_INSTANCE=TRUE make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_fast_h100_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
@@ -187,7 +188,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
name: gpu_fast_h100_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
13
.github/workflows/gpu_fast_tests.yml
vendored
13
.github/workflows/gpu_fast_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||
name: Cuda - Fast tests
|
||||
name: gpu_fast_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -29,6 +29,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_fast_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -44,7 +45,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -65,7 +66,7 @@ jobs:
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-tests)
|
||||
name: gpu_fast_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
needs.should-run.outputs.gpu_test == 'true'
|
||||
@@ -93,7 +94,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA tests
|
||||
name: gpu_fast_tests/cuda-tests-linux
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -151,7 +152,7 @@ jobs:
|
||||
make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_fast_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
@@ -173,7 +174,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
name: gpu_fast_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
10
.github/workflows/gpu_full_h100_tests.yml
vendored
10
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
|
||||
name: Cuda - Full tests on H100
|
||||
name: gpu_full_h100_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -20,7 +20,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
name: gpu_full_h100_tests/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 tests
|
||||
name: gpu_full_h100_tests/cuda-tests-linux
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}
|
||||
@@ -102,7 +102,7 @@ jobs:
|
||||
make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_full_h100_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ failure() }}
|
||||
@@ -115,7 +115,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
name: gpu_full_h100_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
15
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
15
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||
name: Cuda - Full tests multi-GPU
|
||||
name: gpu_full_multi_gpu_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -30,6 +30,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_full_multi_gpu_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -45,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -66,7 +67,7 @@ jobs:
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-tests-multi-gpu)
|
||||
name: gpu_full_multi_gpu_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
@@ -85,7 +86,7 @@ jobs:
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: multi-gpu-test
|
||||
profile: 4-l40
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
@@ -95,7 +96,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA multi-GPU tests
|
||||
name: gpu_full_multi_gpu_tests/cuda-tests-linux
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -154,7 +155,7 @@ jobs:
|
||||
make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_full_multi_gpu_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
@@ -176,7 +177,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests-multi-gpu)
|
||||
name: gpu_full_multi_gpu_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
12
.github/workflows/gpu_integer_long_run_tests.yml
vendored
12
.github/workflows/gpu_integer_long_run_tests.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Cuda - Long Run Tests on GPU
|
||||
name: gpu_integer_long_run_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -27,7 +27,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (gpu-tests)
|
||||
name: gpu_integer_long_run_tests/setup-instance
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
@@ -43,10 +43,10 @@ jobs:
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: multi-gpu-test
|
||||
profile: 4-l40
|
||||
|
||||
cuda-tests:
|
||||
name: Long run GPU tests
|
||||
name: gpu_integer_long_run_tests/cuda-tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}_${{github.event_name}}
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
fi
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_integer_long_run_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests.result != 'skipped' && failure() }}
|
||||
@@ -103,7 +103,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (gpu-tests)
|
||||
name: gpu_integer_long_run_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
10
.github/workflows/gpu_memory_sanitizer.yml
vendored
10
.github/workflows/gpu_memory_sanitizer.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||
name: Cuda - GPU Memory Checks
|
||||
name: gpu_memory_sanitizer
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -30,7 +30,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-tests)
|
||||
name: gpu_memory_sanitizer/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved')
|
||||
@@ -57,7 +57,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA Memory Checks tests
|
||||
name: gpu_memory_sanitizer/cuda-tests-linux
|
||||
needs: [ setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -102,7 +102,7 @@ jobs:
|
||||
make test_high_level_api_gpu_sanitizer
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_memory_sanitizer/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
@@ -124,7 +124,7 @@ jobs:
|
||||
SLACK_MESSAGE: "GPU Memory Checks tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
name: gpu_memory_sanitizer/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
8
.github/workflows/gpu_pcc.yml
vendored
8
.github/workflows/gpu_pcc.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Perform tfhe-cuda-backend post-commit checks on an AWS instance
|
||||
name: Cuda - Post-commit Checks
|
||||
name: gpu_pcc
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -28,7 +28,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-pcc)
|
||||
name: gpu_pcc/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
@@ -53,7 +53,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-pcc:
|
||||
name: CUDA post-commit checks
|
||||
name: gpu_pcc/cuda-pcc (bpr)
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}
|
||||
@@ -149,7 +149,7 @@ jobs:
|
||||
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-pcc)
|
||||
name: cuda_pcc/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cuda-pcc ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Signed integer GPU tests on an RTXA6000 VM on hyperstack with classical PBS
|
||||
name: Cuda - Signed integer tests with classical PBS
|
||||
name: gpu_signed_integer_classic_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -30,6 +30,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_signed_integer_classic_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -45,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -66,7 +67,7 @@ jobs:
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-signed-classic-tests)
|
||||
name: gpu_signed_integer_classic_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
@@ -95,7 +96,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA signed integer tests with classical PBS
|
||||
name: gpu_signed_integer_classic_tests/cuda-tests-linux
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -137,7 +138,7 @@ jobs:
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_signed_integer_classic_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
@@ -159,7 +160,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-signed-classic-tests)
|
||||
name: gpu_signed_integer_classic_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Signed integer GPU tests on an H100 VM on hyperstack
|
||||
name: Cuda - Signed integer tests on H100
|
||||
name: gpu_signed_integer_h100_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -30,6 +30,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_signed_integer_h100_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -45,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -66,7 +67,7 @@ jobs:
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
name: gpu_signed_integer_h100_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
@@ -108,7 +109,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 signed integer tests
|
||||
name: gpu_signed_integer_h100_tests/cuda-tests-linux
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -151,7 +152,7 @@ jobs:
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_signed_integer_h100_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
@@ -173,7 +174,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
name: gpu_signed_integer_h100_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
13
.github/workflows/gpu_signed_integer_tests.yml
vendored
13
.github/workflows/gpu_signed_integer_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend signed integer on an AWS instance
|
||||
name: Cuda - Signed integer tests
|
||||
name: gpu_signed_integer_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -31,6 +31,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_signed_integer_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -46,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -67,7 +68,7 @@ jobs:
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-signed-integer-tests)
|
||||
name: gpu_signed_integer_tests/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
needs: should-run
|
||||
if: (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
@@ -96,7 +97,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-signed-integer-tests:
|
||||
name: CUDA signed integer tests
|
||||
name: gpu_signed_integer_tests/cuda-signed-integer-tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -146,7 +147,7 @@ jobs:
|
||||
make test_signed_integer_multi_bit_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_signed_integer_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-signed-integer-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-signed-integer-tests.result != 'skipped' && failure() }}
|
||||
@@ -168,7 +169,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Signed GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
name: gpu_signed_integer_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cuda-signed-integer-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Test unsigned integers on an RTXA6000 VM on hyperstack with the classical PBS
|
||||
name: Cuda - Unsigned integer tests with classical PBS
|
||||
name: gpu_unsigned_integer_classic_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -30,6 +30,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_unsigned_integer_classic_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -45,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -66,7 +67,7 @@ jobs:
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-unsigned-classic-tests)
|
||||
name: gpu_unsigned_integer_classic_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
@@ -95,7 +96,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA unsigned integer tests with classical PBS
|
||||
name: gpu_unsigned_integer_classic_tests/cuda-tests-linux
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -137,7 +138,7 @@ jobs:
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_unsigned_integer_classic_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
@@ -159,7 +160,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-unsigned-classic-tests)
|
||||
name: gpu_unsigned_integer_classic_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Test unsigned integers on an H100 VM on hyperstack
|
||||
name: Cuda - Unsigned integer tests on H100
|
||||
name: gpu_unsigned_integer_h100_tests/
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -30,6 +30,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_unsigned_integer_h100_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -45,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -66,7 +67,7 @@ jobs:
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
name: gpu_unsigned_integer_h100_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
@@ -108,7 +109,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 unsigned integer tests
|
||||
name: gpu_unsigned_integer_h100_tests/cuda-tests-linux
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -151,7 +152,7 @@ jobs:
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_unsigned_integer_h100_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
@@ -173,7 +174,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
name: gpu_unsigned_integer_h100_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
13
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
13
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend unsigned integer on an AWS instance
|
||||
name: Cuda - Unsigned integer tests
|
||||
name: gpu_unsigned_integer_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -31,6 +31,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_unsigned_integer_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -46,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -67,7 +68,7 @@ jobs:
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-unsigned-integer-tests)
|
||||
name: gpu_unsigned_integer_tests/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
needs: should-run
|
||||
if: (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
@@ -96,7 +97,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-unsigned-integer-tests:
|
||||
name: CUDA unsigned integer tests
|
||||
name: gpu_unsigned_integer_tests/cuda-unsigned-integer-tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -146,7 +147,7 @@ jobs:
|
||||
make test_unsigned_integer_multi_bit_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
name: gpu_unsigned_integer_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-unsigned-integer-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-unsigned-integer-tests.result != 'skipped' && failure() }}
|
||||
@@ -168,7 +169,7 @@ jobs:
|
||||
SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
name: gpu_unsigned_integer_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cuda-unsigned-integer-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
8
.github/workflows/hpu_hlapi_tests.yml
vendored
8
.github/workflows/hpu_hlapi_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Test tfhe-fft
|
||||
name: Cargo Test HLAPI HPU
|
||||
# Test HPU backend HLAPI layer
|
||||
name: hpu_hlapi_tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -21,6 +21,7 @@ permissions: { }
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: hpu_hlapi_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
@@ -36,7 +37,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
|
||||
with:
|
||||
files_yaml: |
|
||||
hpu:
|
||||
@@ -46,6 +47,7 @@ jobs:
|
||||
- mockups/tfhe-hpu-mockup/**
|
||||
|
||||
cargo-tests-hpu:
|
||||
name: hpu_hlapi_tests/cargo-tests-hpu (bpr)
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.hpu_test == 'true'
|
||||
runs-on: large_ubuntu_16
|
||||
|
||||
8
.github/workflows/integer_long_run_tests.yml
vendored
8
.github/workflows/integer_long_run_tests.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: AWS Long Run Tests on CPU
|
||||
name: integer_long_run_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -23,7 +23,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cpu-tests)
|
||||
name: integer_long_run_tests/setup-instance
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
@@ -42,7 +42,7 @@ jobs:
|
||||
profile: cpu-big
|
||||
|
||||
cpu-tests:
|
||||
name: Long run CPU tests
|
||||
name: integer_long_run_tests/cpu-tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}_${{github.event_name}}
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
SLACK_MESSAGE: "CPU long run tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cpu-tests)
|
||||
name: integer_long_run_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cpu-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
5
.github/workflows/m1_tests.yml
vendored
5
.github/workflows/m1_tests.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Tests on M1 CPU
|
||||
name: m1_tests
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -32,6 +32,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
cargo-builds-m1:
|
||||
name: m1_tests/cargo-builds-m1
|
||||
if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
contains(github.event.label.name, 'm1_test') }}
|
||||
@@ -178,7 +179,7 @@ jobs:
|
||||
make test_integer_multi_bit_ci
|
||||
|
||||
remove_label:
|
||||
name: Remove m1_test label
|
||||
name: m1_tests/remove_label
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- cargo-builds-m1
|
||||
|
||||
16
.github/workflows/make_release.yml
vendored
16
.github/workflows/make_release.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Publish new release of tfhe-rs on various platform.
|
||||
name: Publish release
|
||||
name: make_release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -36,15 +36,18 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify_tag:
|
||||
uses: ./.github/workflows/verify_tagged_commit.yml
|
||||
verify-tag:
|
||||
name: make_release/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
secrets:
|
||||
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
name: make_release/package
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify_tag
|
||||
needs: verify-tag
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
@@ -66,6 +69,7 @@ jobs:
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
@@ -81,7 +85,7 @@ jobs:
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: Publish Release
|
||||
name: make_release/publish_release
|
||||
needs: [package] # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
# For provenance of npmjs publish
|
||||
|
||||
21
.github/workflows/make_release_cuda.yml
vendored
21
.github/workflows/make_release_cuda.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Publish CUDA release
|
||||
name: make_release_cuda
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -18,15 +18,17 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify_tag:
|
||||
uses: ./.github/workflows/verify_tagged_commit.yml
|
||||
verify-tag:
|
||||
name: make_release_cuda/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
secrets:
|
||||
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (publish-cuda-release)
|
||||
needs: verify_tag
|
||||
name: make_release_cuda/setup-instance
|
||||
needs: verify-tag
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
@@ -43,7 +45,7 @@ jobs:
|
||||
profile: gpu-build
|
||||
|
||||
package:
|
||||
name: Package CUDA Release for provenance
|
||||
name: make_release_cuda/package
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
outputs:
|
||||
@@ -104,6 +106,7 @@ jobs:
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_cuda/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
@@ -119,7 +122,7 @@ jobs:
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish-cuda-release:
|
||||
name: Publish CUDA Release
|
||||
name: make_release_cuda/publish-cuda-release
|
||||
needs: [setup-instance, package] # for comparing hashes
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
permissions:
|
||||
@@ -201,7 +204,7 @@ jobs:
|
||||
SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (publish-release)
|
||||
name: make_release_cuda/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [setup-instance, publish-cuda-release]
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
18
.github/workflows/make_release_hpu.yml
vendored
18
.github/workflows/make_release_hpu.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Publish HPU release
|
||||
name: make_release_hpu
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -18,15 +18,18 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify_tag:
|
||||
uses: ./.github/workflows/verify_tagged_commit.yml
|
||||
verify-tag:
|
||||
name: make_release_hpu/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
secrets:
|
||||
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
name: make_release_hpu/package
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify_tag
|
||||
needs: verify-tag
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
@@ -48,6 +51,7 @@ jobs:
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_hpu/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
@@ -63,9 +67,9 @@ jobs:
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: Publish tfhe-hpu-backend Release
|
||||
name: make_release_hpu/publish-release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [verify_tag, package] # for comparing hashes
|
||||
needs: [verify-tag, package] # for comparing hashes
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
|
||||
16
.github/workflows/make_release_tfhe_csprng.yml
vendored
16
.github/workflows/make_release_tfhe_csprng.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Publish tfhe-csprng release
|
||||
name: make_release_tfhe_csprng
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -18,13 +18,16 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify_tag:
|
||||
uses: ./.github/workflows/verify_tagged_commit.yml
|
||||
verify-tag:
|
||||
name: make_release_tfhe_csprng/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
secrets:
|
||||
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
name: make_release_tfhe_csprng/package
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
@@ -48,6 +51,7 @@ jobs:
|
||||
|
||||
|
||||
provenance:
|
||||
name: make_release_tfhe_csprng/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
@@ -64,8 +68,8 @@ jobs:
|
||||
|
||||
|
||||
publish_release:
|
||||
name: Publish tfhe-csprng Release
|
||||
needs: [verify_tag, package]
|
||||
name: make_release_tfhe_csprng/publish-release
|
||||
needs: [verify-tag, package]
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
|
||||
18
.github/workflows/make_release_tfhe_fft.yml
vendored
18
.github/workflows/make_release_tfhe_fft.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Publish new release of tfhe-fft
|
||||
name: Publish tfhe-fft release
|
||||
name: make_release_tfhe_fft
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -19,15 +19,18 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify_tag:
|
||||
uses: ./.github/workflows/verify_tagged_commit.yml
|
||||
verify-tag:
|
||||
name: make_release_tfhe_fft/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
secrets:
|
||||
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
name: make_release_tfhe_fft/package
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify_tag
|
||||
needs: verify-tag
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
@@ -49,6 +52,7 @@ jobs:
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_tfhe_fft/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
@@ -64,9 +68,9 @@ jobs:
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: Publish tfhe-fft Release
|
||||
name: make_release_tfhe_fft/publish-release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [verify_tag, package] # for comparing hashes
|
||||
needs: [verify-tag, package] # for comparing hashes
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
|
||||
18
.github/workflows/make_release_tfhe_ntt.yml
vendored
18
.github/workflows/make_release_tfhe_ntt.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Publish new release of tfhe-ntt
|
||||
name: Publish tfhe-ntt release
|
||||
name: make_release_tfhe_ntt
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -19,15 +19,18 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify_tag:
|
||||
uses: ./.github/workflows/verify_tagged_commit.yml
|
||||
verify-tag:
|
||||
name: make_release_tfhe_ntt/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
secrets:
|
||||
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
name: make_release_tfhe_ntt/package
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify_tag
|
||||
needs: verify-tag
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
@@ -49,6 +52,7 @@ jobs:
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_tfhe_ntt/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
@@ -64,9 +68,9 @@ jobs:
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: Publish tfhe-ntt Release
|
||||
name: make_release_tfhe_ntt/publish-release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [verify_tag, package] # for comparing hashes
|
||||
needs: [verify-tag, package] # for comparing hashes
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Publish tfhe-versionable release
|
||||
name: make_release_tfhe_versionable
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -13,14 +13,16 @@ env:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify_tag:
|
||||
uses: ./.github/workflows/verify_tagged_commit.yml
|
||||
verify-tag:
|
||||
name: make_release_tfhe_versionable/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
secrets:
|
||||
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package-derive:
|
||||
name: Package tfhe-versionable-derive Release
|
||||
name: make_release_tfhe_versionable/package-derive
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
@@ -43,6 +45,7 @@ jobs:
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance-derive:
|
||||
name: make_release_tfhe_versionable/provenance-derive
|
||||
needs: [package-derive]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
@@ -57,8 +60,8 @@ jobs:
|
||||
base64-subjects: ${{ needs.package-derive.outputs.hash }}
|
||||
|
||||
publish_release-derive:
|
||||
name: Publish tfhe-versionable-derive Release
|
||||
needs: [ verify_tag, package-derive ] # for comparing hashes
|
||||
name: make_release_tfhe_versionable/publish_release_derive
|
||||
needs: [ verify-tag, package-derive ] # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
@@ -102,7 +105,7 @@ jobs:
|
||||
SLACK_MESSAGE: "tfhe-versionable-derive release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
package:
|
||||
name: Package tfhe-versionable Release
|
||||
name: make_release_tfhe_versionable/package
|
||||
needs: publish_release-derive
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -126,6 +129,7 @@ jobs:
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_tfhe_versionable/provenance
|
||||
needs: package
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
@@ -140,7 +144,7 @@ jobs:
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: Publish tfhe-versionable Release
|
||||
name: make_release_tfhe_versionable/publish-release
|
||||
needs: package # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
68
.github/workflows/make_release_zk_pok.yml
vendored
68
.github/workflows/make_release_zk_pok.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Publish tfhe-zk-pok release
|
||||
name: make_release_zk_pok
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -15,33 +15,45 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
permissions: {}
|
||||
permissions: { }
|
||||
|
||||
jobs:
|
||||
verify-tag:
|
||||
name: make_release_zk_pok/verify-tag
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_commit_actor.yml
|
||||
secrets:
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-zk-pok
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate-zk-pok
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
name: make_release_zk_pok/package
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify-tag
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-zk-pok
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate-zk-pok
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_zk_pok/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
needs: [ package ]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
@@ -54,15 +66,9 @@ jobs:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
verify_tag:
|
||||
uses: ./.github/workflows/verify_tagged_commit.yml
|
||||
secrets:
|
||||
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
publish_release:
|
||||
name: Publish tfhe-zk-pok Release
|
||||
needs: [verify_tag, package] # for comparing hashes
|
||||
name: make_release_zk_pok/publish-release
|
||||
needs: [ verify-tag, package ] # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
|
||||
5
.github/workflows/parameters_check.yml
vendored
5
.github/workflows/parameters_check.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Perform a security check on all the cryptographic parameters set
|
||||
name: Parameters curves security check
|
||||
name: parameters_check
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -16,6 +16,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
params-curves-security-check:
|
||||
name: parameters_check/params-curves-security-check
|
||||
runs-on: large_ubuntu_16-22.04
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
@@ -29,7 +30,7 @@ jobs:
|
||||
with:
|
||||
repository: malb/lattice-estimator
|
||||
path: lattice_estimator
|
||||
ref: '52f4b7a99ae7b5dfd088c5c295070bd38ff0d1e0'
|
||||
ref: 'e35f45b7976a90a79c3c6625a45bbc344c1abc67'
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Install Sage
|
||||
|
||||
4
.github/workflows/placeholder_workflow.yml
vendored
4
.github/workflows/placeholder_workflow.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Placeholder workflow file allowing running it without having to merge to main first
|
||||
name: Placeholder Workflow
|
||||
name: placeholder_workflow
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -8,7 +8,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
placeholder:
|
||||
name: Placeholder
|
||||
name: placeholder_workflow/placeholder
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
|
||||
74
.github/workflows/sync_on_push.yml
vendored
74
.github/workflows/sync_on_push.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Sync repos
|
||||
name: Sync repos
|
||||
name: sync_on_push
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -7,30 +7,62 @@ on:
|
||||
- 'main'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions: {}
|
||||
permissions: { }
|
||||
|
||||
jobs:
|
||||
sync-repo:
|
||||
name: sync_on_push/sync-repo
|
||||
if: ${{ github.repository == 'zama-ai/tfhe-rs' }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: git-sync
|
||||
uses: valtech-sd/git-sync@e734cfe9485a92e720eac5af8a4555dde5fecf88
|
||||
with:
|
||||
source_repo: "zama-ai/tfhe-rs"
|
||||
source_branch: "main"
|
||||
destination_repo: "https://${{ secrets.BOT_USERNAME }}:${{ secrets.FHE_ACTIONS_TOKEN }}@github.com/${{ secrets.SYNC_DEST_REPO }}"
|
||||
destination_branch: "main"
|
||||
- name: git-sync tags
|
||||
uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
|
||||
with:
|
||||
source_repo: "zama-ai/tfhe-rs"
|
||||
source_branch: "refs/tags/*"
|
||||
destination_repo: "https://${{ secrets.BOT_USERNAME }}:${{ secrets.FHE_ACTIONS_TOKEN }}@github.com/${{ secrets.SYNC_DEST_REPO }}"
|
||||
destination_branch: "refs/tags/*"
|
||||
env:
|
||||
SOURCE_REPO: "zama-ai/tfhe-rs"
|
||||
SOURCE_BRANCH: "main"
|
||||
DESTINATION_BRANCH: "main"
|
||||
USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
TOKEN: ${{ secrets.SYNC_REPO_TOKEN }}
|
||||
DEST_REPO: ${{ secrets.SYNC_DEST_REPO }}
|
||||
run: |
|
||||
echo ">>> Cloning source repo..."
|
||||
git lfs install
|
||||
git clone "https://${USERNAME}:${TOKEN}@github.com/${SOURCE_REPO}.git" ./tfhe-rs --origin source && cd ./tfhe-rs
|
||||
git remote add destination "https://${USERNAME}:${TOKEN}@github.com/${DEST_REPO}.git"
|
||||
|
||||
echo ">>> Fetching all branches references down locally so subsequent commands can see them..."
|
||||
git fetch source '+refs/heads/*:refs/heads/*' --update-head-ok
|
||||
|
||||
echo ">>> Print out all branches"
|
||||
git --no-pager branch -a -vv
|
||||
|
||||
echo ">>> Fetching all LFS items from source..."
|
||||
git lfs fetch --all source "${SOURCE_BRANCH}"
|
||||
|
||||
echo ">>> Pushing git changes..."
|
||||
git push destination "${SOURCE_BRANCH}:${DESTINATION_BRANCH}" -f
|
||||
|
||||
echo ">>> Pushing all LFS items..."
|
||||
git lfs push --all destination "${DESTINATION_BRANCH}"
|
||||
|
||||
- name: git-sync-tags
|
||||
env:
|
||||
SOURCE_REPO: "zama-ai/tfhe-rs"
|
||||
SOURCE_BRANCH: "refs/tags/*"
|
||||
DESTINATION_BRANCH: "refs/tags/*"
|
||||
USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
TOKEN: ${{ secrets.SYNC_REPO_TOKEN }}
|
||||
DEST_REPO: ${{ secrets.SYNC_DEST_REPO }}
|
||||
run: |
|
||||
echo ">>> Cloning source repo..."
|
||||
git lfs install
|
||||
git clone "https://${USERNAME}:${TOKEN}@github.com/${SOURCE_REPO}.git" ./tfhe-rs-tag --origin source && cd ./tfhe-rs-tag
|
||||
git remote add destination "https://${USERNAME}:${TOKEN}@github.com/${DEST_REPO}.git"
|
||||
|
||||
echo ">>> Fetching all branches references down locally so subsequent commands can see them..."
|
||||
git fetch source '+refs/heads/*:refs/heads/*' --update-head-ok
|
||||
|
||||
echo ">>> Print out all branches"
|
||||
git --no-pager branch -a -vv
|
||||
|
||||
echo ">>> Pushing git changes..."
|
||||
git push destination "${SOURCE_BRANCH}:${DESTINATION_BRANCH}" -f
|
||||
|
||||
6
.github/workflows/unverified_prs.yml
vendored
6
.github/workflows/unverified_prs.yml
vendored
@@ -1,4 +1,5 @@
|
||||
name: 'Close unverified PRs'
|
||||
# Close unverified PRs'
|
||||
name: unverified_prs
|
||||
on:
|
||||
schedule:
|
||||
- cron: '30 1 * * *'
|
||||
@@ -7,12 +8,13 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
name: unverified_prs/stale
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: read
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
|
||||
- uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
|
||||
with:
|
||||
stale-pr-message: 'This PR is unverified and has been open for 2 days, it will now be closed. If you want to contribute please sign the CLA as indicated by the bot.'
|
||||
days-before-stale: 2
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# Verify a tagged commit
|
||||
name: Verify tagged commit
|
||||
# Verify a commit actor
|
||||
name: verify_commit_actor
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
secrets:
|
||||
RELEASE_TEAM:
|
||||
ALLOWED_TEAM:
|
||||
required: true
|
||||
READ_ORG_TOKEN:
|
||||
required: true
|
||||
@@ -12,9 +12,9 @@ on:
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
checks:
|
||||
check-actor:
|
||||
name: verify_commit_actor/check-actor
|
||||
runs-on: ubuntu-latest
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
steps:
|
||||
# Check triggering actor membership
|
||||
- name: Actor verification
|
||||
@@ -23,7 +23,7 @@ jobs:
|
||||
with:
|
||||
username: ${{ github.triggering_actor }}
|
||||
org: ${{ github.repository_owner }}
|
||||
team: ${{ secrets.RELEASE_TEAM }}
|
||||
team: ${{ secrets.ALLOWED_TEAM }}
|
||||
github_token: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
- name: Actor authorized
|
||||
@@ -32,7 +32,7 @@ pulp = { version = "0.21", default-features = false }
|
||||
rand = "0.8"
|
||||
rayon = "1.11"
|
||||
serde = { version = "1.0", default-features = false }
|
||||
wasm-bindgen = "0.2.100"
|
||||
wasm-bindgen = "0.2.101"
|
||||
getrandom = "0.2.8"
|
||||
|
||||
[profile.bench]
|
||||
|
||||
48
Makefile
48
Makefile
@@ -21,11 +21,11 @@ BENCH_OP_FLAVOR?=DEFAULT
|
||||
BENCH_TYPE?=latency
|
||||
BENCH_PARAM_TYPE?=classical
|
||||
BENCH_PARAMS_SET?=default
|
||||
BENCH_CUSTOM_COMMAND:=
|
||||
NODE_VERSION=22.6
|
||||
BACKWARD_COMPAT_DATA_DIR=utils/tfhe-backward-compat-data
|
||||
WASM_PACK_VERSION="0.13.1"
|
||||
# We are kind of hacking the cut here, the version cannot contain a quote '"'
|
||||
WASM_BINDGEN_VERSION:=$(shell grep '^wasm-bindgen[[:space:]]*=' Cargo.toml | cut -d '"' -f 2 | xargs)
|
||||
WASM_BINDGEN_VERSION:=$(shell cargo tree --target wasm32-unknown-unknown -e all --prefix none | grep "wasm-bindgen v" | head -n 1 | cut -d 'v' -f2)
|
||||
WEB_RUNNER_DIR=web-test-runner
|
||||
WEB_SERVER_DIR=tfhe/web_wasm_parallel_tests
|
||||
# This is done to avoid forgetting it, we still precise the RUSTFLAGS in the commands to be able to
|
||||
@@ -114,10 +114,6 @@ install_cargo_nextest: install_rs_build_toolchain
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cargo-nextest --locked || \
|
||||
( echo "Unable to install cargo nextest, unknown error." && exit 1 )
|
||||
|
||||
# The installation should use the ^ symbol if the specified version in the root Cargo.toml is of the
|
||||
# form "0.2.96" then we get ^0.2.96 e.g., as we don't lock those dependencies
|
||||
# this allows to get the matching CLI
|
||||
# If a version range is specified no need to add the leading ^
|
||||
.PHONY: install_wasm_bindgen_cli # Install wasm-bindgen-cli to get access to the test runner
|
||||
install_wasm_bindgen_cli: install_rs_build_toolchain
|
||||
cargo +$(RS_BUILD_TOOLCHAIN) install --locked wasm-bindgen-cli --version "$(WASM_BINDGEN_VERSION)"
|
||||
@@ -160,9 +156,13 @@ install_tarpaulin: install_rs_build_toolchain
|
||||
( echo "Unable to install cargo tarpaulin, unknown error." && exit 1 )
|
||||
|
||||
.PHONY: install_cargo_dylint # Install custom tfhe-rs lints
|
||||
install_cargo_dylint:
|
||||
install_cargo_dylint: install_rs_build_toolchain
|
||||
cargo install --locked cargo-dylint dylint-link
|
||||
|
||||
.PHONY: install_cargo_audit # Check dependencies
|
||||
install_cargo_audit: install_rs_build_toolchain
|
||||
cargo install --locked cargo-audit
|
||||
|
||||
.PHONY: install_typos_checker # Install typos checker
|
||||
install_typos_checker: install_rs_build_toolchain
|
||||
@typos --version > /dev/null 2>&1 || \
|
||||
@@ -545,6 +545,10 @@ tfhe_lints: install_cargo_dylint
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo dylint --all -p tfhe-zk-pok --no-deps -- \
|
||||
--features=experimental
|
||||
|
||||
.PHONY: audit_dependencies # Run cargo audit to check vulnerable dependencies
|
||||
audit_dependencies: install_rs_build_toolchain install_cargo_audit
|
||||
cargo audit
|
||||
|
||||
|
||||
.PHONY: build_core # Build core_crypto without experimental features
|
||||
build_core: install_rs_build_toolchain install_rs_check_toolchain
|
||||
@@ -693,7 +697,7 @@ test_integer_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=integer,gpu -p tfhe -- integer::gpu::server_key:: --test-threads=2
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=integer,gpu -p tfhe -- integer::gpu::server_key::
|
||||
--features=integer,gpu -p tfhe -- integer::gpu::server_key:: --test-threads=4
|
||||
|
||||
.PHONY: test_integer_gpu_debug # Run the tests of the integer module with Debug flags for CUDA
|
||||
test_integer_gpu_debug: install_rs_build_toolchain
|
||||
@@ -990,11 +994,34 @@ test_high_level_api: install_rs_build_toolchain
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings -p tfhe \
|
||||
-- high_level_api::
|
||||
|
||||
test_high_level_api_gpu_one: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test \
|
||||
--features=integer,internal-keycache,gpu,zk-pok -p tfhe \
|
||||
-- --nocapture high_level_api::array::tests::booleans::test_gpu_only_bitand
|
||||
|
||||
#
|
||||
test_high_level_api_gpu_mul: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test \
|
||||
--features=integer,internal-keycache,gpu-debug --profile release \
|
||||
-p tfhe \
|
||||
-- --nocapture integer::gpu::server_key::radix::tests_unsigned::test_mul:: \
|
||||
--test-threads=6
|
||||
|
||||
test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
--test-threads=4 --features=integer,internal-keycache,gpu,zk-pok -p tfhe \
|
||||
-E "test(/high_level_api::.*gpu.*/)"
|
||||
|
||||
test_list_gpu: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest list --cargo-profile $(CARGO_PROFILE) \
|
||||
--features=integer,internal-keycache,gpu,zk-pok -p tfhe \
|
||||
-E "test(/.*gpu.*/)"
|
||||
|
||||
.PHONY: build_one_hl_api_test_gpu
|
||||
build_one_hl_api_test_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
|
||||
--features=integer,gpu-debug -vv -p tfhe -- "$${TEST}" --test-threads=1 --nocapture
|
||||
|
||||
test_high_level_api_hpu: install_rs_build_toolchain install_cargo_nextest
|
||||
ifeq ($(HPU_CONFIG), v80)
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
@@ -1556,6 +1583,11 @@ bench_hlapi_noise_squash_gpu: install_rs_check_toolchain
|
||||
--bench hlapi-noise-squash \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --
|
||||
|
||||
|
||||
.PHONY: bench_custom # Run benchmarks with a user-defined command
|
||||
bench_custom: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench -p tfhe-benchmark $(BENCH_CUSTOM_COMMAND)
|
||||
|
||||
#
|
||||
# Utility tools
|
||||
#
|
||||
|
||||
@@ -86,6 +86,7 @@ if(CMAKE_BUILD_TYPE_LOWERCASE STREQUAL "debug")
|
||||
message("Compiling in Debug mode")
|
||||
add_definitions(-DDEBUG)
|
||||
set(OPTIMIZATION_FLAGS "${OPTIMIZATION_FLAGS} -O0 -G -g")
|
||||
set(USE_NVTOOLS 1)
|
||||
else()
|
||||
# Release mode
|
||||
message("Compiling in Release mode")
|
||||
|
||||
@@ -4,10 +4,19 @@
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cuda_runtime.h>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#define CUDA_STREAM_POOL
|
||||
|
||||
enum CudaStreamType
|
||||
{
|
||||
KEY = 0,
|
||||
ALLOC = 1,
|
||||
TEMP_HELPER = 2,
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
|
||||
#define check_cuda_error(ans) \
|
||||
@@ -76,13 +85,27 @@ void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index);
|
||||
uint32_t cuda_is_available();
|
||||
|
||||
void *cuda_malloc(uint64_t size, uint32_t gpu_index);
|
||||
void *cuda_ext_malloc(uint64_t size, uint32_t gpu_index);
|
||||
|
||||
void *cuda_malloc_with_size_tracking_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
uint64_t &size_tracker,
|
||||
bool allocate_gpu_memory);
|
||||
void *cuda_intern_malloc_with_size_tracking_async(uint64_t size,
|
||||
cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
uint64_t &size_tracker,
|
||||
bool allocate_gpu_memory,
|
||||
const char *file, int line);
|
||||
|
||||
void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
|
||||
#define cuda_malloc_with_size_tracking_async( \
|
||||
size, stream, gpu_index, size_tracker, allocate_gpu_memory) \
|
||||
cuda_intern_malloc_with_size_tracking_async( \
|
||||
size, stream, gpu_index, size_tracker, allocate_gpu_memory, __FILE__, \
|
||||
__LINE__)
|
||||
|
||||
void *cuda_int_malloc_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index, const char *file, int line);
|
||||
#define cuda_malloc_async(size, stream, gpu_index) \
|
||||
cuda_int_malloc_async(size, stream, gpu_index, __FILE__, __LINE__)
|
||||
void *cuda_ext_malloc_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index);
|
||||
|
||||
bool cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
|
||||
uint64_t cuda_device_total_memory(uint32_t gpu_index);
|
||||
@@ -96,18 +119,28 @@ void cuda_memcpy_with_size_tracking_async_to_gpu(void *dest, const void *src,
|
||||
void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_ext_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
|
||||
void *dest, void const *src, uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index, bool gpu_memory_allocated);
|
||||
|
||||
void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
void cuda_ext_memcpy_async_gpu_to_gpu(void *dest, void const *src,
|
||||
uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
uint32_t gpu_index);
|
||||
void cuda_ext_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
void cuda_ext_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memset_with_size_tracking_async(void *dest, uint64_t val,
|
||||
uint64_t size, cudaStream_t stream,
|
||||
@@ -116,6 +149,8 @@ void cuda_memset_with_size_tracking_async(void *dest, uint64_t val,
|
||||
|
||||
void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
void cuda_ext_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
int cuda_get_number_of_gpus();
|
||||
|
||||
@@ -123,13 +158,26 @@ int cuda_get_number_of_sms();
|
||||
|
||||
void cuda_synchronize_device(uint32_t gpu_index);
|
||||
|
||||
void cuda_drop(void *ptr, uint32_t gpu_index);
|
||||
void cuda_int_drop(void *ptr, uint32_t gpu_index, const char *file, int line);
|
||||
#define cuda_drop(ptr, gpu_index) \
|
||||
cuda_int_drop(ptr, gpu_index, __FILE__, __LINE__)
|
||||
|
||||
void cuda_drop_with_size_tracking_async(void *ptr, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated);
|
||||
void cuda_ext_drop(void *ptr, uint32_t gpu_index);
|
||||
|
||||
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index);
|
||||
void cuda_int_drop_with_size_tracking_async(void *ptr, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated,
|
||||
const char *file, int line);
|
||||
|
||||
#define cuda_drop_with_size_tracking_async(ptr, stream, gpu_index, \
|
||||
gpu_memory_allocated) \
|
||||
cuda_int_drop_with_size_tracking_async( \
|
||||
ptr, stream, gpu_index, gpu_memory_allocated, __FILE__, __LINE__)
|
||||
|
||||
void cuda_int_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index,
|
||||
const char *file, int line);
|
||||
#define cuda_drop_async(ptr, stream, gpu_index) \
|
||||
cuda_int_drop_async(ptr, stream, gpu_index, __FILE__, __LINE__)
|
||||
}
|
||||
|
||||
uint32_t cuda_get_max_shared_memory(uint32_t gpu_index);
|
||||
@@ -141,4 +189,5 @@ bool cuda_check_support_thread_block_clusters();
|
||||
template <typename Torus>
|
||||
void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus *d_array, Torus value, Torus n);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "integer/integer.h"
|
||||
|
||||
extern std::mutex m;
|
||||
extern bool p2p_enabled;
|
||||
extern const int THRESHOLD_MULTI_GPU;
|
||||
@@ -37,10 +39,149 @@ get_variant_element(const std::variant<std::vector<Torus>, Torus> &variant,
|
||||
}
|
||||
}
|
||||
|
||||
int get_active_gpu_count(int num_inputs, int gpu_count);
|
||||
uint32_t get_active_gpu_count(uint32_t num_inputs, uint32_t gpu_count);
|
||||
|
||||
int get_num_inputs_on_gpu(int total_num_inputs, int gpu_index, int gpu_count);
|
||||
|
||||
int get_gpu_offset(int total_num_inputs, int gpu_index, int gpu_count);
|
||||
|
||||
// A Set of GPU Streams and associated GPUs
|
||||
// Can be constructed from the FFI struct CudaStreamsFFI which
|
||||
// is only used to pass the streams/gpus at the rust/C interface
|
||||
// This class should only be constructed from the FFI struct,
|
||||
// through class methods or through the copy constructor. The class
|
||||
// can also be constructed as an empty set
|
||||
struct CudaStreams {
|
||||
private:
|
||||
cudaStream_t const *_streams;
|
||||
uint32_t const *_gpu_indexes;
|
||||
uint32_t _gpu_count;
|
||||
bool _owns_streams;
|
||||
|
||||
// Prevent the construction of a CudaStreams class from user-code
|
||||
CudaStreams(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count)
|
||||
: _streams(streams), _gpu_indexes(gpu_indexes), _gpu_count(gpu_count),
|
||||
_owns_streams(false) {}
|
||||
|
||||
public:
|
||||
// Construct an empty set. Invalid use of an empty set should raise an error
|
||||
// right away through asserts or because of a nullptr dereference
|
||||
CudaStreams()
|
||||
: _streams(nullptr), _gpu_indexes(nullptr), _gpu_count((uint32_t)-1),
|
||||
_owns_streams(false) {}
|
||||
|
||||
// Returns a subset of this set as an active subset. An active subset is one
|
||||
// that is temporarily used to perform some computation
|
||||
CudaStreams active_gpu_subset(int num_radix_blocks) {
|
||||
return CudaStreams(_streams, _gpu_indexes,
|
||||
get_active_gpu_count(num_radix_blocks, _gpu_count));
|
||||
}
|
||||
|
||||
// Returns a subset containing only the first gpu of this set. It
|
||||
// is used to create subset of streams for mono-GPU functions
|
||||
CudaStreams subset_first_gpu() const {
|
||||
return CudaStreams(_streams, _gpu_indexes, 1);
|
||||
}
|
||||
|
||||
// Synchronize all the streams in the set
|
||||
void synchronize() const {
|
||||
for (uint32_t i = 0; i < _gpu_count; i++) {
|
||||
cuda_synchronize_stream(_streams[i], _gpu_indexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
cudaStream_t stream(uint32_t idx) const {
|
||||
PANIC_IF_FALSE(idx < _gpu_count, "Invalid GPU index");
|
||||
return _streams[idx];
|
||||
}
|
||||
uint32_t gpu_index(uint32_t idx) const {
|
||||
PANIC_IF_FALSE(idx < _gpu_count, "Invalid GPU index");
|
||||
return _gpu_indexes[idx];
|
||||
}
|
||||
uint32_t count() const { return _gpu_count; }
|
||||
|
||||
// Construct from the rust FFI stream set. Streams are created in rust
|
||||
// using the bindings.
|
||||
CudaStreams(CudaStreamsFFI &ffi)
|
||||
: _streams((cudaStream_t *)ffi.streams), _gpu_indexes(ffi.gpu_indexes),
|
||||
_gpu_count(ffi.gpu_count), _owns_streams(false) {}
|
||||
|
||||
// Create a new set of streams on the same gpus as those of the current stream
|
||||
// set Can be used to parallelize computation by issuing kernels on multiple
|
||||
// streams on the same GPU
|
||||
void create_on_same_gpus(const CudaStreams &other) {
|
||||
PANIC_IF_FALSE(_streams == nullptr,
|
||||
"Assign clone to non-empty cudastreams");
|
||||
|
||||
cudaStream_t *new_streams = new cudaStream_t[other._gpu_count];
|
||||
|
||||
uint32_t *gpu_indexes_clone = new uint32_t[_gpu_count];
|
||||
for (uint32_t i = 0; i < other._gpu_count; ++i) {
|
||||
new_streams[i] = cuda_create_stream(other._gpu_indexes[i]);
|
||||
gpu_indexes_clone[i] = other._gpu_indexes[i];
|
||||
}
|
||||
|
||||
this->_streams = new_streams;
|
||||
this->_gpu_indexes = gpu_indexes_clone;
|
||||
this->_gpu_count = other._gpu_count;
|
||||
|
||||
// Flag this instance as owning streams so that we can destroy
|
||||
// the streams when they aren't needed anymore
|
||||
this->_owns_streams = true;
|
||||
}
|
||||
|
||||
// Copy constructor, setting the own flag to false
|
||||
// Only the initial instance of CudaStreams created with
|
||||
// assign_clone owns streams, all copies of it do not own the
|
||||
// streams
|
||||
CudaStreams(const CudaStreams &src)
|
||||
: _streams(src._streams), _gpu_indexes(src._gpu_indexes),
|
||||
_gpu_count(src._gpu_count), _owns_streams(false) {}
|
||||
|
||||
CudaStreams &operator=(CudaStreams const &other) {
|
||||
PANIC_IF_FALSE(this->_streams == nullptr ||
|
||||
this->_streams == other._streams,
|
||||
"Assigning an already initialized CudaStreams");
|
||||
this->_streams = other._streams;
|
||||
this->_gpu_indexes = other._gpu_indexes;
|
||||
this->_gpu_count = other._gpu_count;
|
||||
|
||||
// Only the initial instance of CudaStreams created with
|
||||
// assign_clone owns streams, all copies of it do not own the
|
||||
// streams
|
||||
this->_owns_streams = false;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Destroy the streams if they are created by assign_clone.
|
||||
// We require the developer to call `destroy` on all instances
|
||||
// of cloned streams.
|
||||
void release() {
|
||||
// If this instance doesn't own streams, there's nothing to do
|
||||
// as the streams were created on the Rust side.
|
||||
if (_owns_streams) {
|
||||
for (uint32_t i = 0; i < _gpu_count; ++i) {
|
||||
cuda_destroy_stream(_streams[i], _gpu_indexes[i]);
|
||||
}
|
||||
delete[] _streams;
|
||||
_streams = nullptr;
|
||||
delete[] _gpu_indexes;
|
||||
_gpu_indexes = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// The destructor checks that streams created with assign_clone
|
||||
// were destroyed manually with `destroy`.
|
||||
~CudaStreams() {
|
||||
// Ensure streams are destroyed
|
||||
PANIC_IF_FALSE(
|
||||
!_owns_streams || _streams == nullptr,
|
||||
"Destroy (this=%p) was not called on a CudaStreams object that "
|
||||
"is a clone "
|
||||
"of another one, %p",
|
||||
this, this->_streams);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#define CUDA_INTEGER_COMPRESSION_H
|
||||
|
||||
#include "../../pbs/pbs_enums.h"
|
||||
#include "../integer.h"
|
||||
|
||||
typedef struct {
|
||||
void *ptr;
|
||||
@@ -25,77 +26,65 @@ typedef struct {
|
||||
|
||||
extern "C" {
|
||||
uint64_t scratch_cuda_integer_compress_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t compression_glwe_dimension,
|
||||
uint32_t compression_polynomial_size, uint32_t lwe_dimension,
|
||||
uint32_t ks_level, uint32_t ks_base_log, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t lwe_per_glwe, bool allocate_gpu_memory);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
|
||||
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t lwe_per_glwe, bool allocate_gpu_memory);
|
||||
|
||||
uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t encryption_glwe_dimension,
|
||||
uint32_t encryption_polynomial_size, uint32_t compression_glwe_dimension,
|
||||
uint32_t compression_polynomial_size, uint32_t lwe_dimension,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
uint32_t encryption_glwe_dimension, uint32_t encryption_polynomial_size,
|
||||
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
|
||||
uint32_t lwe_dimension, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t num_blocks_to_decompress, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_compress_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaPackedGlweCiphertextListFFI *glwe_array_out,
|
||||
CudaStreamsFFI streams, CudaPackedGlweCiphertextListFFI *glwe_array_out,
|
||||
CudaLweCiphertextListFFI const *lwe_array_in, void *const *fp_ksk,
|
||||
int8_t *mem_ptr);
|
||||
|
||||
void cuda_integer_decompress_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaLweCiphertextListFFI *lwe_array_out,
|
||||
CudaStreamsFFI streams, CudaLweCiphertextListFFI *lwe_array_out,
|
||||
CudaPackedGlweCiphertextListFFI const *glwe_in,
|
||||
uint32_t const *indexes_array, void *const *bsks, int8_t *mem_ptr);
|
||||
|
||||
void cleanup_cuda_integer_compress_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
void cleanup_cuda_integer_compress_radix_ciphertext_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void cleanup_cuda_integer_decompress_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
void cleanup_cuda_integer_decompress_radix_ciphertext_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_compress_radix_ciphertext_128(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t compression_glwe_dimension,
|
||||
uint32_t compression_polynomial_size, uint32_t lwe_dimension,
|
||||
uint32_t ks_level, uint32_t ks_base_log, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t lwe_per_glwe, bool allocate_gpu_memory);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
|
||||
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, uint32_t lwe_per_glwe, bool allocate_gpu_memory);
|
||||
|
||||
uint64_t scratch_cuda_integer_decompress_radix_ciphertext_128(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t compression_glwe_dimension,
|
||||
uint32_t compression_polynomial_size, uint32_t lwe_dimension,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
bool allocate_gpu_memory);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr,
|
||||
uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
|
||||
uint32_t lwe_dimension, uint32_t num_radix_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_compress_radix_ciphertext_128(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaPackedGlweCiphertextListFFI *glwe_array_out,
|
||||
CudaStreamsFFI streams, CudaPackedGlweCiphertextListFFI *glwe_array_out,
|
||||
CudaLweCiphertextListFFI const *lwe_array_in, void *const *fp_ksk,
|
||||
int8_t *mem_ptr);
|
||||
|
||||
void cuda_integer_decompress_radix_ciphertext_128(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaLweCiphertextListFFI *lwe_array_out,
|
||||
CudaStreamsFFI streams, CudaLweCiphertextListFFI *lwe_array_out,
|
||||
CudaPackedGlweCiphertextListFFI const *glwe_in,
|
||||
uint32_t const *indexes_array, int8_t *mem_ptr);
|
||||
|
||||
void cleanup_cuda_integer_compress_radix_ciphertext_128(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
void cleanup_cuda_integer_compress_radix_ciphertext_128(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void cleanup_cuda_integer_decompress_radix_ciphertext_128(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr_void);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -12,8 +12,7 @@ template <typename Torus> struct int_compression {
|
||||
bool gpu_memory_allocated;
|
||||
uint32_t lwe_per_glwe;
|
||||
|
||||
int_compression(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_params compression_params,
|
||||
int_compression(CudaStreams streams, int_radix_params compression_params,
|
||||
uint32_t num_radix_blocks, uint32_t lwe_per_glwe,
|
||||
bool allocate_gpu_memory, uint64_t &size_tracker) {
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
@@ -25,26 +24,29 @@ template <typename Torus> struct int_compression {
|
||||
tmp_lwe = static_cast<Torus *>(cuda_malloc_with_size_tracking_async(
|
||||
num_radix_blocks * (compression_params.small_lwe_dimension + 1) *
|
||||
sizeof(Torus),
|
||||
streams[0], gpu_indexes[0], size_tracker, allocate_gpu_memory));
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory));
|
||||
tmp_glwe_array_out =
|
||||
static_cast<Torus *>(cuda_malloc_with_size_tracking_async(
|
||||
lwe_per_glwe * glwe_accumulator_size * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0], size_tracker, allocate_gpu_memory));
|
||||
lwe_per_glwe * glwe_accumulator_size * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory));
|
||||
|
||||
size_tracker += scratch_packing_keyswitch_lwe_list_to_glwe<Torus>(
|
||||
streams[0], gpu_indexes[0], &fp_ks_buffer,
|
||||
streams.stream(0), streams.gpu_index(0), &fp_ks_buffer,
|
||||
compression_params.small_lwe_dimension,
|
||||
compression_params.glwe_dimension, compression_params.polynomial_size,
|
||||
num_radix_blocks, allocate_gpu_memory);
|
||||
}
|
||||
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
cuda_drop_with_size_tracking_async(tmp_lwe, streams[0], gpu_indexes[0],
|
||||
void release(CudaStreams streams) {
|
||||
cuda_drop_with_size_tracking_async(
|
||||
tmp_lwe, streams.stream(0), streams.gpu_index(0), gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(tmp_glwe_array_out, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(tmp_glwe_array_out, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cleanup_packing_keyswitch_lwe_list_to_glwe(
|
||||
streams[0], gpu_indexes[0], &fp_ks_buffer, gpu_memory_allocated);
|
||||
streams.stream(0), streams.gpu_index(0), &fp_ks_buffer,
|
||||
gpu_memory_allocated);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -60,8 +62,7 @@ template <typename Torus> struct int_decompression {
|
||||
int_radix_lut<Torus> *decompression_rescale_lut;
|
||||
bool gpu_memory_allocated;
|
||||
|
||||
int_decompression(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_params encryption_params,
|
||||
int_decompression(CudaStreams streams, int_radix_params encryption_params,
|
||||
int_radix_params compression_params,
|
||||
uint32_t num_blocks_to_decompress, bool allocate_gpu_memory,
|
||||
uint64_t &size_tracker) {
|
||||
@@ -78,19 +79,21 @@ template <typename Torus> struct int_decompression {
|
||||
|
||||
tmp_extracted_glwe = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
num_blocks_to_decompress * glwe_accumulator_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0], size_tracker, allocate_gpu_memory);
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory);
|
||||
tmp_indexes_array = (uint32_t *)cuda_malloc_with_size_tracking_async(
|
||||
num_blocks_to_decompress * sizeof(uint32_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, allocate_gpu_memory);
|
||||
num_blocks_to_decompress * sizeof(uint32_t), streams.stream(0),
|
||||
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
|
||||
tmp_extracted_lwe = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
num_blocks_to_decompress * lwe_accumulator_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0], size_tracker, allocate_gpu_memory);
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory);
|
||||
|
||||
// rescale is only needed on 64-bit decompression
|
||||
if constexpr (std::is_same_v<Torus, uint64_t>) {
|
||||
decompression_rescale_lut = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, encryption_params, 1,
|
||||
num_blocks_to_decompress, allocate_gpu_memory, size_tracker);
|
||||
streams, encryption_params, 1, num_blocks_to_decompress,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
// Rescale is done using an identity LUT
|
||||
// Here we do not divide by message_modulus
|
||||
@@ -98,8 +101,8 @@ template <typename Torus> struct int_decompression {
|
||||
// space, we want to keep the original 2-bit value in the 4-bit space,
|
||||
// so we apply the identity and the encoding will rescale it for us.
|
||||
decompression_rescale_lut = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, encryption_params, 1,
|
||||
num_blocks_to_decompress, allocate_gpu_memory, size_tracker);
|
||||
streams, encryption_params, 1, num_blocks_to_decompress,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
auto decompression_rescale_f = [](Torus x) -> Torus { return x; };
|
||||
|
||||
auto effective_compression_message_modulus =
|
||||
@@ -107,7 +110,8 @@ template <typename Torus> struct int_decompression {
|
||||
auto effective_compression_carry_modulus = 1;
|
||||
|
||||
generate_device_accumulator_with_encoding<Torus>(
|
||||
streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
decompression_rescale_lut->get_lut(0, 0),
|
||||
decompression_rescale_lut->get_degree(0),
|
||||
decompression_rescale_lut->get_max_degree(0),
|
||||
encryption_params.glwe_dimension, encryption_params.polynomial_size,
|
||||
@@ -115,22 +119,22 @@ template <typename Torus> struct int_decompression {
|
||||
effective_compression_carry_modulus,
|
||||
encryption_params.message_modulus, encryption_params.carry_modulus,
|
||||
decompression_rescale_f, gpu_memory_allocated);
|
||||
auto active_gpu_count =
|
||||
get_active_gpu_count(num_blocks_to_decompress, gpu_count);
|
||||
decompression_rescale_lut->broadcast_lut(streams, gpu_indexes,
|
||||
active_gpu_count);
|
||||
auto active_streams = streams.active_gpu_subset(num_blocks_to_decompress);
|
||||
decompression_rescale_lut->broadcast_lut(active_streams);
|
||||
}
|
||||
}
|
||||
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
cuda_drop_with_size_tracking_async(tmp_extracted_glwe, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(tmp_extracted_lwe, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(tmp_indexes_array, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
void release(CudaStreams streams) {
|
||||
cuda_drop_with_size_tracking_async(tmp_extracted_glwe, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(tmp_extracted_lwe, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(tmp_indexes_array, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
if constexpr (std::is_same_v<Torus, uint64_t>) {
|
||||
decompression_rescale_lut->release(streams, gpu_indexes, gpu_count);
|
||||
decompression_rescale_lut->release(streams);
|
||||
delete decompression_rescale_lut;
|
||||
decompression_rescale_lut = nullptr;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -47,12 +47,11 @@ uint64_t scratch_cuda_multi_bit_programmable_bootstrap_128_vector_64(
|
||||
void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_128(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_output_indexes, void const *lut_vector,
|
||||
void const *lut_vector_indexes, void const *lwe_array_in,
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
void const *lwe_array_in, void const *lwe_input_indexes,
|
||||
void const *bootstrapping_key, int8_t *mem_ptr, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
|
||||
void cleanup_cuda_multi_bit_programmable_bootstrap_128(void *stream,
|
||||
const uint32_t gpu_index,
|
||||
|
||||
@@ -6,36 +6,26 @@
|
||||
#include <stdint.h>
|
||||
|
||||
extern "C" {
|
||||
|
||||
void cuda_lwe_expand_64(void *const stream, uint32_t gpu_index,
|
||||
void *lwe_array_out, const void *lwe_compact_array_in,
|
||||
uint32_t lwe_dimension, uint32_t num_lwe,
|
||||
const uint32_t *lwe_compact_input_indexes,
|
||||
const uint32_t *output_body_id_per_compact_list);
|
||||
|
||||
uint64_t scratch_cuda_expand_without_verification_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension,
|
||||
uint32_t computing_ks_level, uint32_t computing_ks_base_log,
|
||||
uint32_t casting_input_dimension, uint32_t casting_output_dimension,
|
||||
uint32_t casting_ks_level, uint32_t casting_ks_base_log, uint32_t pbs_level,
|
||||
uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
const uint32_t *num_lwes_per_compact_list, const bool *is_boolean_array,
|
||||
uint32_t num_compact_lists, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, KS_TYPE casting_key_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t computing_ks_level,
|
||||
uint32_t computing_ks_base_log, uint32_t casting_input_dimension,
|
||||
uint32_t casting_output_dimension, uint32_t casting_ks_level,
|
||||
uint32_t casting_ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, const uint32_t *num_lwes_per_compact_list,
|
||||
const bool *is_boolean_array, uint32_t num_compact_lists,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
KS_TYPE casting_key_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_expand_without_verification_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, const void *lwe_flattened_compact_array_in,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *computing_ksks,
|
||||
void *const *casting_keys,
|
||||
CudaStreamsFFI streams, void *lwe_array_out,
|
||||
const void *lwe_flattened_compact_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *computing_ksks, void *const *casting_keys,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void cleanup_expand_without_verification_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
void cleanup_expand_without_verification_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
}
|
||||
#endif // ZK_H
|
||||
|
||||
@@ -5,6 +5,96 @@
|
||||
#include "integer/integer.cuh"
|
||||
#include <cstdint>
|
||||
|
||||
////////////////////////////////////
|
||||
// Helper structures used in expand
|
||||
template <typename Torus> struct lwe_mask {
|
||||
Torus *mask;
|
||||
|
||||
lwe_mask(Torus *mask) : mask{mask} {}
|
||||
};
|
||||
|
||||
template <typename Torus> struct compact_lwe_body {
|
||||
Torus *body;
|
||||
uint64_t monomial_degree;
|
||||
|
||||
/* Body id is the index of the body in the compact ciphertext list.
|
||||
* It's used to compute the rotation.
|
||||
*/
|
||||
compact_lwe_body(Torus *body, const uint64_t body_id)
|
||||
: body{body}, monomial_degree{body_id} {}
|
||||
};
|
||||
|
||||
template <typename Torus> struct compact_lwe_list {
|
||||
Torus *ptr;
|
||||
uint32_t lwe_dimension;
|
||||
uint32_t total_num_lwes;
|
||||
|
||||
compact_lwe_list(Torus *ptr, uint32_t lwe_dimension, uint32_t total_num_lwes)
|
||||
: ptr{ptr}, lwe_dimension{lwe_dimension}, total_num_lwes{total_num_lwes} {
|
||||
}
|
||||
|
||||
lwe_mask<Torus> get_mask() { return lwe_mask(ptr); }
|
||||
|
||||
// Returns the index-th body
|
||||
compact_lwe_body<Torus> get_body(uint32_t index) {
|
||||
if (index >= total_num_lwes) {
|
||||
PANIC("index out of range in compact_lwe_list::get_body");
|
||||
}
|
||||
|
||||
return compact_lwe_body(&ptr[lwe_dimension + index], uint64_t(index));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Torus> struct flattened_compact_lwe_lists {
|
||||
Torus *d_ptr;
|
||||
Torus **d_ptr_to_compact_list;
|
||||
const uint32_t *h_num_lwes_per_compact_list;
|
||||
uint32_t num_compact_lists;
|
||||
uint32_t lwe_dimension;
|
||||
uint32_t total_num_lwes;
|
||||
|
||||
flattened_compact_lwe_lists(Torus *d_ptr,
|
||||
const uint32_t *h_num_lwes_per_compact_list,
|
||||
uint32_t num_compact_lists,
|
||||
uint32_t lwe_dimension)
|
||||
: d_ptr(d_ptr), h_num_lwes_per_compact_list(h_num_lwes_per_compact_list),
|
||||
num_compact_lists(num_compact_lists), lwe_dimension(lwe_dimension) {
|
||||
d_ptr_to_compact_list =
|
||||
static_cast<Torus **>(malloc(num_compact_lists * sizeof(Torus **)));
|
||||
total_num_lwes = 0;
|
||||
auto curr_list = d_ptr;
|
||||
for (auto i = 0; i < num_compact_lists; ++i) {
|
||||
total_num_lwes += h_num_lwes_per_compact_list[i];
|
||||
d_ptr_to_compact_list[i] = curr_list;
|
||||
curr_list += lwe_dimension + h_num_lwes_per_compact_list[i];
|
||||
}
|
||||
}
|
||||
|
||||
compact_lwe_list<Torus> get_device_compact_list(uint32_t compact_list_index) {
|
||||
if (compact_list_index >= num_compact_lists) {
|
||||
PANIC("index out of range in flattened_compact_lwe_lists::get");
|
||||
}
|
||||
|
||||
return compact_lwe_list(d_ptr_to_compact_list[compact_list_index],
|
||||
lwe_dimension,
|
||||
h_num_lwes_per_compact_list[compact_list_index]);
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* A expand_job tells the expand kernel exactly which input mask and body to use
|
||||
* and what rotation to apply
|
||||
*/
|
||||
template <typename Torus> struct expand_job {
|
||||
lwe_mask<Torus> mask_to_use;
|
||||
compact_lwe_body<Torus> body_to_use;
|
||||
|
||||
expand_job(lwe_mask<Torus> mask_to_use, compact_lwe_body<Torus> body_to_use)
|
||||
: mask_to_use{mask_to_use}, body_to_use{body_to_use} {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
|
||||
template <typename Torus> struct zk_expand_mem {
|
||||
int_radix_params computing_params;
|
||||
int_radix_params casting_params;
|
||||
@@ -17,13 +107,13 @@ template <typename Torus> struct zk_expand_mem {
|
||||
Torus *tmp_expanded_lwes;
|
||||
Torus *tmp_ksed_small_to_big_expanded_lwes;
|
||||
|
||||
uint32_t *d_lwe_compact_input_indexes;
|
||||
|
||||
uint32_t *d_body_id_per_compact_list;
|
||||
bool gpu_memory_allocated;
|
||||
|
||||
zk_expand_mem(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_params computing_params,
|
||||
uint32_t *num_lwes_per_compact_list;
|
||||
expand_job<Torus> *d_expand_jobs;
|
||||
expand_job<Torus> *h_expand_jobs;
|
||||
|
||||
zk_expand_mem(CudaStreams streams, int_radix_params computing_params,
|
||||
int_radix_params casting_params, KS_TYPE casting_key_type,
|
||||
const uint32_t *num_lwes_per_compact_list,
|
||||
const bool *is_boolean_array, uint32_t num_compact_lists,
|
||||
@@ -33,9 +123,17 @@ template <typename Torus> struct zk_expand_mem {
|
||||
casting_key_type(casting_key_type) {
|
||||
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
|
||||
// We copy num_lwes_per_compact_list so we get protection against
|
||||
// num_lwes_per_compact_list being freed while this buffer is still in use
|
||||
this->num_lwes_per_compact_list =
|
||||
(uint32_t *)malloc(num_compact_lists * sizeof(uint32_t));
|
||||
memcpy(this->num_lwes_per_compact_list, num_lwes_per_compact_list,
|
||||
num_compact_lists * sizeof(uint32_t));
|
||||
|
||||
num_lwes = 0;
|
||||
for (int i = 0; i < num_compact_lists; i++) {
|
||||
num_lwes += num_lwes_per_compact_list[i];
|
||||
num_lwes += this->num_lwes_per_compact_list[i];
|
||||
}
|
||||
|
||||
if (computing_params.carry_modulus != computing_params.message_modulus) {
|
||||
@@ -73,11 +171,10 @@ template <typename Torus> struct zk_expand_mem {
|
||||
params = computing_params;
|
||||
}
|
||||
message_and_carry_extract_luts = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, 4, 2 * num_lwes,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
streams, params, 4, 2 * num_lwes, allocate_gpu_memory, size_tracker);
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
message_and_carry_extract_luts->get_lut(0, 0),
|
||||
message_and_carry_extract_luts->get_degree(0),
|
||||
message_and_carry_extract_luts->get_max_degree(0),
|
||||
@@ -85,7 +182,7 @@ template <typename Torus> struct zk_expand_mem {
|
||||
params.carry_modulus, message_extract_lut_f, gpu_memory_allocated);
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
message_and_carry_extract_luts->get_lut(0, 1),
|
||||
message_and_carry_extract_luts->get_degree(1),
|
||||
message_and_carry_extract_luts->get_max_degree(1),
|
||||
@@ -93,7 +190,7 @@ template <typename Torus> struct zk_expand_mem {
|
||||
params.carry_modulus, carry_extract_lut_f, gpu_memory_allocated);
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
message_and_carry_extract_luts->get_lut(0, 2),
|
||||
message_and_carry_extract_luts->get_degree(2),
|
||||
message_and_carry_extract_luts->get_max_degree(2),
|
||||
@@ -102,7 +199,7 @@ template <typename Torus> struct zk_expand_mem {
|
||||
gpu_memory_allocated);
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
message_and_carry_extract_luts->get_lut(0, 3),
|
||||
message_and_carry_extract_luts->get_degree(3),
|
||||
message_and_carry_extract_luts->get_max_degree(3),
|
||||
@@ -110,9 +207,12 @@ template <typename Torus> struct zk_expand_mem {
|
||||
params.carry_modulus, carry_extract_and_sanitize_bool_lut_f,
|
||||
gpu_memory_allocated);
|
||||
|
||||
// Hint for future readers: if message_modulus == 4 then
|
||||
// packed_messages_per_lwe becomes 2
|
||||
auto num_packed_msgs = log2_int(params.message_modulus);
|
||||
// We are always packing two LWEs. We just need to be sure we have enough
|
||||
// space in the carry part to store a message of the same size as is in the
|
||||
// message part.
|
||||
if (params.carry_modulus < params.message_modulus)
|
||||
PANIC("Carry modulus must be at least as large as message modulus");
|
||||
auto num_packed_msgs = 2;
|
||||
|
||||
// Adjust indexes to permute the output and access the correct LUT
|
||||
auto h_indexes_in = static_cast<Torus *>(
|
||||
@@ -121,49 +221,14 @@ template <typename Torus> struct zk_expand_mem {
|
||||
malloc(num_packed_msgs * num_lwes * sizeof(Torus)));
|
||||
auto h_lut_indexes = static_cast<Torus *>(
|
||||
malloc(num_packed_msgs * num_lwes * sizeof(Torus)));
|
||||
auto h_body_id_per_compact_list =
|
||||
static_cast<uint32_t *>(malloc(num_lwes * sizeof(uint32_t)));
|
||||
auto h_lwe_compact_input_indexes =
|
||||
static_cast<uint32_t *>(malloc(num_lwes * sizeof(uint32_t)));
|
||||
|
||||
d_body_id_per_compact_list =
|
||||
static_cast<uint32_t *>(cuda_malloc_with_size_tracking_async(
|
||||
num_lwes * sizeof(uint32_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, allocate_gpu_memory));
|
||||
d_lwe_compact_input_indexes =
|
||||
static_cast<uint32_t *>(cuda_malloc_with_size_tracking_async(
|
||||
num_lwes * sizeof(uint32_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, allocate_gpu_memory));
|
||||
d_expand_jobs =
|
||||
static_cast<expand_job<Torus> *>(cuda_malloc_with_size_tracking_async(
|
||||
num_lwes * sizeof(expand_job<Torus>), streams.stream(0),
|
||||
streams.gpu_index(0), size_tracker, allocate_gpu_memory));
|
||||
|
||||
auto compact_list_id = 0;
|
||||
auto idx = 0;
|
||||
auto count = 0;
|
||||
// During flattening, all num_lwes LWEs from all compact lists are stored
|
||||
// sequentially on a Torus array. h_lwe_compact_input_indexes stores the
|
||||
// index of the first LWE related to the compact list that contains the i-th
|
||||
// LWE
|
||||
for (int i = 0; i < num_lwes; i++) {
|
||||
h_lwe_compact_input_indexes[i] = idx;
|
||||
count++;
|
||||
if (count == num_lwes_per_compact_list[compact_list_id]) {
|
||||
compact_list_id++;
|
||||
idx += casting_params.big_lwe_dimension + count;
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Stores the index of the i-th LWE (within each compact list) related to
|
||||
// the k-th compact list.
|
||||
auto offset = 0;
|
||||
for (int k = 0; k < num_compact_lists; k++) {
|
||||
auto num_lwes_in_kth_compact_list = num_lwes_per_compact_list[k];
|
||||
uint32_t body_count = 0;
|
||||
for (int i = 0; i < num_lwes_in_kth_compact_list; i++) {
|
||||
h_body_id_per_compact_list[i + offset] = body_count;
|
||||
body_count++;
|
||||
}
|
||||
offset += num_lwes_in_kth_compact_list;
|
||||
}
|
||||
h_expand_jobs = static_cast<expand_job<Torus> *>(
|
||||
malloc(num_lwes * sizeof(expand_job<Torus>)));
|
||||
|
||||
/*
|
||||
* Each LWE contains encrypted data in both carry and message spaces
|
||||
@@ -198,9 +263,9 @@ template <typename Torus> struct zk_expand_mem {
|
||||
* num_packed_msgs to use the sanitization LUT (which ensures output is
|
||||
* exactly 0 or 1).
|
||||
*/
|
||||
offset = 0;
|
||||
auto offset = 0;
|
||||
for (int k = 0; k < num_compact_lists; k++) {
|
||||
auto num_lwes_in_kth = num_lwes_per_compact_list[k];
|
||||
auto num_lwes_in_kth = this->num_lwes_per_compact_list[k];
|
||||
for (int i = 0; i < num_packed_msgs * num_lwes_in_kth; i++) {
|
||||
auto lwe_index = i + num_packed_msgs * offset;
|
||||
auto lwe_index_in_list = i % num_lwes_in_kth;
|
||||
@@ -217,61 +282,53 @@ template <typename Torus> struct zk_expand_mem {
|
||||
}
|
||||
|
||||
message_and_carry_extract_luts->set_lwe_indexes(
|
||||
streams[0], gpu_indexes[0], h_indexes_in, h_indexes_out);
|
||||
streams.stream(0), streams.gpu_index(0), h_indexes_in, h_indexes_out);
|
||||
auto lut_indexes = message_and_carry_extract_luts->get_lut_indexes(0, 0);
|
||||
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
d_lwe_compact_input_indexes, h_lwe_compact_input_indexes,
|
||||
num_lwes * sizeof(uint32_t), streams[0], gpu_indexes[0],
|
||||
allocate_gpu_memory);
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
lut_indexes, h_lut_indexes, num_packed_msgs * num_lwes * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0], allocate_gpu_memory);
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
d_body_id_per_compact_list, h_body_id_per_compact_list,
|
||||
num_lwes * sizeof(uint32_t), streams[0], gpu_indexes[0],
|
||||
allocate_gpu_memory);
|
||||
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
|
||||
|
||||
auto active_gpu_count = get_active_gpu_count(2 * num_lwes, gpu_count);
|
||||
message_and_carry_extract_luts->broadcast_lut(streams, gpu_indexes,
|
||||
active_gpu_count);
|
||||
auto active_streams = streams.active_gpu_subset(2 * num_lwes);
|
||||
message_and_carry_extract_luts->broadcast_lut(active_streams);
|
||||
|
||||
message_and_carry_extract_luts->allocate_lwe_vector_for_non_trivial_indexes(
|
||||
streams, gpu_indexes, active_gpu_count, 2 * num_lwes, size_tracker,
|
||||
allocate_gpu_memory);
|
||||
active_streams, 2 * num_lwes, size_tracker, allocate_gpu_memory);
|
||||
// The expanded LWEs will always be on the casting key format
|
||||
tmp_expanded_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
num_lwes * (casting_params.big_lwe_dimension + 1) * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0], size_tracker, allocate_gpu_memory);
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory);
|
||||
|
||||
tmp_ksed_small_to_big_expanded_lwes =
|
||||
(Torus *)cuda_malloc_with_size_tracking_async(
|
||||
num_lwes * (casting_params.big_lwe_dimension + 1) * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0], size_tracker, allocate_gpu_memory);
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory);
|
||||
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
free(h_indexes_in);
|
||||
free(h_indexes_out);
|
||||
free(h_lut_indexes);
|
||||
free(h_body_id_per_compact_list);
|
||||
free(h_lwe_compact_input_indexes);
|
||||
}
|
||||
|
||||
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
void release(CudaStreams streams) {
|
||||
|
||||
message_and_carry_extract_luts->release(streams, gpu_indexes, gpu_count);
|
||||
message_and_carry_extract_luts->release(streams);
|
||||
delete message_and_carry_extract_luts;
|
||||
|
||||
cuda_drop_with_size_tracking_async(d_body_id_per_compact_list, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_lwe_compact_input_indexes, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(tmp_expanded_lwes, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(tmp_ksed_small_to_big_expanded_lwes,
|
||||
streams[0], gpu_indexes[0],
|
||||
cuda_drop_with_size_tracking_async(tmp_expanded_lwes, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(tmp_ksed_small_to_big_expanded_lwes,
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_expand_jobs, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
free(num_lwes_per_compact_list);
|
||||
free(h_expand_jobs);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -49,17 +49,16 @@ __global__ void device_batch_fft_ggsw_vector(double2 *dest, T *src,
|
||||
* global memory
|
||||
*/
|
||||
template <typename T, typename ST, class params>
|
||||
void batch_fft_ggsw_vector(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, double2 *dest, T *src,
|
||||
void batch_fft_ggsw_vector(CudaStreams streams, double2 *dest, T *src,
|
||||
int8_t *d_mem, uint32_t r, uint32_t glwe_dim,
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t max_shared_memory) {
|
||||
PANIC_IF_FALSE(gpu_count == 1,
|
||||
PANIC_IF_FALSE(streams.count() == 1,
|
||||
"GPU error (batch_fft_ggsw_vector): multi-GPU execution on %d "
|
||||
"gpus is not supported yet.",
|
||||
gpu_count);
|
||||
streams.count());
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cuda_set_device(streams.gpu_index(0));
|
||||
|
||||
int shared_memory_size = sizeof(double) * polynomial_size;
|
||||
|
||||
@@ -68,11 +67,11 @@ void batch_fft_ggsw_vector(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
|
||||
if (max_shared_memory < shared_memory_size) {
|
||||
device_batch_fft_ggsw_vector<T, ST, params, NOSM>
|
||||
<<<gridSize, blockSize, 0, streams[0]>>>(dest, src, d_mem);
|
||||
<<<gridSize, blockSize, 0, streams.stream(0)>>>(dest, src, d_mem);
|
||||
} else {
|
||||
device_batch_fft_ggsw_vector<T, ST, params, FULLSM>
|
||||
<<<gridSize, blockSize, shared_memory_size, streams[0]>>>(dest, src,
|
||||
d_mem);
|
||||
<<<gridSize, blockSize, shared_memory_size, streams.stream(0)>>>(
|
||||
dest, src, d_mem);
|
||||
}
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
@@ -142,8 +142,7 @@ __host__ void host_keyswitch_lwe_ciphertext_vector(
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void execute_keyswitch_async(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void execute_keyswitch_async(CudaStreams streams,
|
||||
const LweArrayVariant<Torus> &lwe_array_out,
|
||||
const LweArrayVariant<Torus> &lwe_output_indexes,
|
||||
const LweArrayVariant<Torus> &lwe_array_in,
|
||||
@@ -154,8 +153,9 @@ void execute_keyswitch_async(cudaStream_t const *streams,
|
||||
|
||||
/// If the number of radix blocks is lower than the number of GPUs, not all
|
||||
/// GPUs will be active and there will be 1 input per GPU
|
||||
for (uint i = 0; i < gpu_count; i++) {
|
||||
int num_samples_on_gpu = get_num_inputs_on_gpu(num_samples, i, gpu_count);
|
||||
for (uint i = 0; i < streams.count(); i++) {
|
||||
int num_samples_on_gpu =
|
||||
get_num_inputs_on_gpu(num_samples, i, streams.count());
|
||||
|
||||
Torus *current_lwe_array_out = get_variant_element(lwe_array_out, i);
|
||||
Torus *current_lwe_output_indexes =
|
||||
@@ -166,7 +166,7 @@ void execute_keyswitch_async(cudaStream_t const *streams,
|
||||
|
||||
// Compute Keyswitch
|
||||
host_keyswitch_lwe_ciphertext_vector<Torus>(
|
||||
streams[i], gpu_indexes[i], current_lwe_array_out,
|
||||
streams.stream(i), streams.gpu_index(i), current_lwe_array_out,
|
||||
current_lwe_output_indexes, current_lwe_array_in,
|
||||
current_lwe_input_indexes, ksks[i], lwe_dimension_in, lwe_dimension_out,
|
||||
base_log, level_count, num_samples_on_gpu);
|
||||
|
||||
@@ -1,15 +1,42 @@
|
||||
#include "device.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
#include <deque>
|
||||
#include <unordered_map>
|
||||
#include <mutex>
|
||||
|
||||
#ifdef USE_NVTOOLS
|
||||
#include <cuda_profiler_api.h>
|
||||
#endif
|
||||
|
||||
#ifdef CUDA_STREAM_POOL
|
||||
#include <deque>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#endif
|
||||
|
||||
#include <bits/this_thread_sleep.h>
|
||||
|
||||
#define USE_MEMORY_MANAGER
|
||||
// #define DEBUG_MEMORY_MANAGER
|
||||
#define MAX_CACHE_SIZE (1 << 30)
|
||||
|
||||
#ifdef USE_MEMORY_MANAGER
|
||||
#include <list>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#endif
|
||||
|
||||
uint32_t cuda_get_device() {
|
||||
int device;
|
||||
check_cuda_error(cudaGetDevice(&device));
|
||||
return static_cast<uint32_t>(device);
|
||||
}
|
||||
std::mutex pool_mutex;
|
||||
bool mem_pools_enabled = false;
|
||||
|
||||
std::atomic<bool> mem_pools_enabled = false;
|
||||
|
||||
// We use memory pools to reduce some overhead of memory allocations due
|
||||
// to our scratch/release pattern. This function is the simplest way of using
|
||||
@@ -26,13 +53,13 @@ bool mem_pools_enabled = false;
|
||||
// We tested more complex configurations of mempools, but they did not yield
|
||||
// better results.
|
||||
void cuda_setup_mempool(uint32_t caller_gpu_index) {
|
||||
if (!mem_pools_enabled) {
|
||||
pool_mutex.lock();
|
||||
if (mem_pools_enabled)
|
||||
return; // If mem pools are already enabled, we don't need to do anything
|
||||
|
||||
// We do it only once for all GPUs
|
||||
mem_pools_enabled = true;
|
||||
bool pools_not_initialized = false;
|
||||
bool pools_initialized = true;
|
||||
|
||||
// if pools_not_initialized is found, mem_pools_enabled is set to pools_initialized
|
||||
// and the if body runs
|
||||
if (mem_pools_enabled.compare_exchange_strong(pools_not_initialized, pools_initialized)) {
|
||||
uint32_t num_gpus = cuda_get_number_of_gpus();
|
||||
for (uint32_t gpu_index = 0; gpu_index < num_gpus; gpu_index++) {
|
||||
cuda_set_device(gpu_index);
|
||||
@@ -75,7 +102,6 @@ void cuda_setup_mempool(uint32_t caller_gpu_index) {
|
||||
}
|
||||
// We return to the original gpu_index
|
||||
cuda_set_device(caller_gpu_index);
|
||||
pool_mutex.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,8 +109,335 @@ void cuda_set_device(uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
// Mempools are initialized only once in all the GPUS available
|
||||
cuda_setup_mempool(gpu_index);
|
||||
#ifdef USE_NVTOOLS
|
||||
check_cuda_error(cudaProfilerStart());
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef USE_MEMORY_MANAGER
|
||||
|
||||
enum CudaMemBlockUsageType { CUDA_ALLOC = 0, MEMSET, MEMCPY_SRC, MEMCPY_DEST, FREE };
|
||||
|
||||
enum CudaAllocType { SYNC = 0, ASYNC };
|
||||
|
||||
#ifdef DEBUG_MEMORY_MANAGER
|
||||
struct CudaMemBlockUsage {
|
||||
std::string location;
|
||||
uint64_t timestamp;
|
||||
CudaMemBlockUsageType type;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct CudaMemBlock {
|
||||
int8_t *ptr;
|
||||
uint64_t size;
|
||||
cudaStream_t stream;
|
||||
uint32_t gpu_index;
|
||||
size_t thread_id;
|
||||
CudaAllocType alloc_type;
|
||||
#ifdef DEBUG_MEMORY_MANAGER
|
||||
std::vector<CudaMemBlockUsage> usages;
|
||||
#endif
|
||||
};
|
||||
|
||||
class CudaMemoryManager {
|
||||
std::list<CudaMemBlock> cuda_allocs; // fresh allocs
|
||||
std::list<CudaMemBlock> cuda_freed; // freed for good
|
||||
|
||||
std::unordered_map<cudaStream_t,
|
||||
std::unordered_map<uint64_t, std::deque<CudaMemBlock>>>
|
||||
cache; // freed and re-used
|
||||
uint64_t cache_size = 0, peak_cache_size = 0;
|
||||
|
||||
std::mutex allocs_mutex;
|
||||
|
||||
#ifdef DEBUG_MEMORY_MANAGER
|
||||
std::string make_location(const char *file, int line) {
|
||||
std::stringstream sstr;
|
||||
sstr << file << ":" << line;
|
||||
return sstr.str();
|
||||
}
|
||||
uint64_t make_timestamp() {
|
||||
const std::chrono::time_point<std::chrono::system_clock> now =
|
||||
std::chrono::system_clock::now();
|
||||
|
||||
auto us = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
now.time_since_epoch())
|
||||
.count() %
|
||||
1000000;
|
||||
return us;
|
||||
}
|
||||
|
||||
void check_range_is_valid(CudaMemBlockUsageType usage_type, int8_t *dest,
|
||||
uint64_t size, cudaStream_t stream,
|
||||
const char *file, int line) {
|
||||
CudaMemBlockUsage usage = {make_location(file, line), make_timestamp(),
|
||||
usage_type};
|
||||
|
||||
const char *info = NULL;
|
||||
switch (usage_type) {
|
||||
case MEMSET:
|
||||
info = "memset";
|
||||
break;
|
||||
case MEMCPY_SRC:
|
||||
info = "memcpy source";
|
||||
break;
|
||||
case MEMCPY_DEST:
|
||||
info = "memcpy dest";
|
||||
break;
|
||||
default:
|
||||
info = "unknown";
|
||||
}
|
||||
|
||||
auto device_id = cuda_get_device();
|
||||
|
||||
bool found = false;
|
||||
for (auto it = cuda_allocs.begin(); it != cuda_allocs.end(); it++) {
|
||||
if (it->ptr == dest && it->gpu_index == device_id) {
|
||||
printf("%s with size tracking: found ptr %p\n", info, dest);
|
||||
if (size > it->size) {
|
||||
PANIC("%s OF %lu bytes TOO BIG TO %p OF SIZE %ld\n", info, size, dest,
|
||||
it->size);
|
||||
}
|
||||
it->usages.push_back(usage);
|
||||
found = true;
|
||||
} else {
|
||||
if (dest > it->ptr && dest < it->ptr + it->size &&
|
||||
it->gpu_index == device_id) {
|
||||
printf("%s with size tracking: indirect ptr %p in buffer %p\n", info,
|
||||
dest, it->ptr);
|
||||
if (dest + size > it->ptr + it->size) {
|
||||
auto remain_bytes = it->ptr + it->size - dest;
|
||||
PANIC("%s OF %lu bytes TOO BIG TO %p WHICH HAS ROOM ONLY FOR %d\n",
|
||||
info, size, dest, remain_bytes);
|
||||
}
|
||||
it->usages.push_back(usage);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
PANIC("Cuda %s to %p of size %lu, unknown pointer", info, dest, size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
void alloc(void **ptr, uint64_t size, CudaAllocType alloc_type,
|
||||
uint32_t gpu_index, cudaStream_t stream, const char *file,
|
||||
int line) {
|
||||
std::lock_guard<std::mutex> guard(allocs_mutex);
|
||||
|
||||
auto cache_of_stream = cache.find(stream);
|
||||
if (cache_of_stream != cache.end()) {
|
||||
auto cache_of_size = cache_of_stream->second.find(size);
|
||||
if (cache_of_size != cache_of_stream->second.end() &&
|
||||
!cache_of_size->second.empty()) {
|
||||
auto cached_alloc = cache_of_size->second.front();
|
||||
cache_of_size->second.pop_front();
|
||||
|
||||
// move to active allocs
|
||||
cuda_allocs.push_back(cached_alloc);
|
||||
*ptr = cached_alloc.ptr;
|
||||
|
||||
if (cache_size < size) {
|
||||
PANIC("INVALID CACHE USE!!");
|
||||
}
|
||||
|
||||
cache_size -= size;
|
||||
|
||||
#ifdef DEBUG_MEMORY_MANAGER
|
||||
printf("Cuda Allocation serviced from cache: %p of size %lu on gpu %d "
|
||||
"in %s\n",
|
||||
ptr, size, gpu_index, "");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
if (alloc_type == SYNC) {
|
||||
check_cuda_error(cudaMalloc(ptr, size));
|
||||
} else if (alloc_type == ASYNC) {
|
||||
#ifndef CUDART_VERSION
|
||||
#error CUDART_VERSION Undefined!
|
||||
#elif (CUDART_VERSION >= 11020)
|
||||
int support_async_alloc;
|
||||
check_cuda_error(cudaDeviceGetAttribute(
|
||||
&support_async_alloc, cudaDevAttrMemoryPoolsSupported, gpu_index));
|
||||
|
||||
if (support_async_alloc) {
|
||||
check_cuda_error(cudaMallocAsync(ptr, size, stream));
|
||||
} else {
|
||||
check_cuda_error(cudaMalloc(ptr, size));
|
||||
}
|
||||
#else
|
||||
check_cuda_error(cudaMalloc((void **)&ptr, size));
|
||||
#endif
|
||||
} else {
|
||||
PANIC("Invalid allocation mode");
|
||||
}
|
||||
|
||||
if (*ptr == nullptr) {
|
||||
if (size > 0) {
|
||||
PANIC("Allocation failed for %lu bytes, allocator returned %p", size,
|
||||
ptr);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
auto thread_id = std::hash<std::thread::id>{}(std::this_thread::get_id());
|
||||
CudaMemBlock block = {(int8_t *)*ptr, size, stream,
|
||||
gpu_index, thread_id, alloc_type};
|
||||
#ifdef DEBUG_MEMORY_MANAGER
|
||||
CudaMemBlockUsage usage = {make_location(file, line), make_timestamp(),
|
||||
CUDA_ALLOC};
|
||||
block.usages.push_back(usage);
|
||||
|
||||
printf("Cuda Allocated %p of size %lu on gpu %d in %s\n", ptr, size,
|
||||
gpu_index, usage.location.c_str());
|
||||
#endif
|
||||
|
||||
cuda_allocs.push_back(block);
|
||||
}
|
||||
void memset(int8_t *dest, uint64_t size, cudaStream_t stream,
|
||||
const char *file, int line) {
|
||||
#ifdef DEBUG_MEMORY_MANAGER
|
||||
std::lock_guard<std::mutex> guard(allocs_mutex);
|
||||
|
||||
check_range_is_valid(MEMSET, dest, size, stream, file, line);
|
||||
#endif
|
||||
}
|
||||
|
||||
void memcpy(int8_t *dest, int8_t *src, uint64_t size, cudaStream_t stream,
|
||||
const char *file, int line) {
|
||||
#ifdef DEBUG_MEMORY_MANAGER
|
||||
std::lock_guard<std::mutex> guard(allocs_mutex);
|
||||
|
||||
check_range_is_valid(MEMCPY_SRC, src, size, stream, file, line);
|
||||
check_range_is_valid(MEMCPY_DEST, src, size, stream, file, line);
|
||||
#endif
|
||||
}
|
||||
|
||||
void free(void *ptr, CudaAllocType alloc_type, uint32_t gpu_index,
|
||||
cudaStream_t stream, const char *file, int line) {
|
||||
if (ptr == nullptr)
|
||||
return;
|
||||
|
||||
std::lock_guard<std::mutex> guard(allocs_mutex);
|
||||
|
||||
bool found = false;
|
||||
bool must_free = false;
|
||||
|
||||
for (auto it = cuda_allocs.begin(); it != cuda_allocs.end(); it++) {
|
||||
if (it->ptr == ptr && it->gpu_index == gpu_index) {
|
||||
found = true;
|
||||
|
||||
if (cache_size + it->size < (MAX_CACHE_SIZE)) {
|
||||
cache[stream][it->size].push_back(*it);
|
||||
cache_size += it->size;
|
||||
if (peak_cache_size < cache_size) {
|
||||
peak_cache_size = cache_size;
|
||||
}
|
||||
} else {
|
||||
cuda_freed.push_back(*it);
|
||||
must_free = true;
|
||||
}
|
||||
#ifdef DEBUG_MEMORY_MANAGER
|
||||
printf("cuda dropped buffer %p of size %lu on gpu %d\n", ptr, it->size,
|
||||
gpu_index);
|
||||
#endif
|
||||
cuda_allocs.erase(it++);
|
||||
}
|
||||
}
|
||||
|
||||
if (must_free) {
|
||||
cuda_set_device(gpu_index);
|
||||
if (alloc_type == SYNC) {
|
||||
check_cuda_error(cudaFree(ptr));
|
||||
} else if (alloc_type == ASYNC) {
|
||||
#ifndef CUDART_VERSION
|
||||
#error CUDART_VERSION Undefined!
|
||||
#elif (CUDART_VERSION >= 11020)
|
||||
int support_async_alloc;
|
||||
check_cuda_error(cudaDeviceGetAttribute(
|
||||
&support_async_alloc, cudaDevAttrMemoryPoolsSupported, gpu_index));
|
||||
|
||||
if (support_async_alloc) {
|
||||
check_cuda_error(cudaFreeAsync(ptr, stream));
|
||||
} else {
|
||||
check_cuda_error(cudaFree(ptr));
|
||||
}
|
||||
#else
|
||||
check_cuda_error(cudaFree(ptr));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MEMORY_MANAGER
|
||||
if (!found) {
|
||||
for (auto it = cuda_freed.begin(); it != cuda_freed.end(); it++) {
|
||||
if (it->ptr == ptr && it->gpu_index == gpu_index) {
|
||||
found = true;
|
||||
printf("Drop in %s: %d\n", file, line);
|
||||
printf("Alloc in %s\n", it->usages[0].location.c_str());
|
||||
PANIC("cuda drop already dropped buffer %p of size %lu on gpu %d\n",
|
||||
ptr, it->size, gpu_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
PANIC("cuda drop unknown buffer %p\n", ptr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
~CudaMemoryManager() {
|
||||
#ifdef DEBUG_MEMORY_MANAGER
|
||||
printf("%lu ALLOCATIONS AT PROGRAM EXIT\n", cuda_allocs.size());
|
||||
|
||||
for (auto &cuda_alloc : cuda_allocs) {
|
||||
printf("%p of size %lu allocated at %s\n", cuda_alloc.ptr,
|
||||
cuda_alloc.size, cuda_alloc.usages[0].location.c_str());
|
||||
}
|
||||
|
||||
printf("\n\n\n %llu PEAK CACHE SIZE\n", peak_cache_size);
|
||||
|
||||
for (auto &cache_for_size : cache) {
|
||||
for (auto &cuda_alloc : cache_for_size.second) {
|
||||
printf("%p of size %lu cached at %s\n", cuda_alloc.ptr, cuda_alloc.size,
|
||||
cuda_alloc.usages[0].location.c_str());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
class CudaMultiGPUMemoryManager {
|
||||
std::unordered_map<uint32_t, CudaMemoryManager> gMemManagers;
|
||||
std::mutex gMemManagersMutex; // for creation of the mem managers
|
||||
std::atomic<uint32_t> gMemManagerExists = 0;
|
||||
|
||||
public:
|
||||
CudaMemoryManager &get(uint32_t gpu_index) {
|
||||
if (gMemManagerExists.load() & (1 << gpu_index)) {
|
||||
return gMemManagers[gpu_index];
|
||||
} else {
|
||||
std::lock_guard<std::mutex> guard(gMemManagersMutex);
|
||||
uint32_t exist_flags = gMemManagerExists.load();
|
||||
if (!(exist_flags & (1 << gpu_index))) {
|
||||
gMemManagers[gpu_index]; // create it
|
||||
gMemManagerExists.store(exist_flags | (1 << gpu_index));
|
||||
}
|
||||
return gMemManagers[gpu_index];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
CudaMultiGPUMemoryManager gCudaMemoryManager;
|
||||
#endif
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaEvent_t event;
|
||||
@@ -109,18 +462,90 @@ void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index) {
|
||||
check_cuda_error(cudaEventDestroy(event));
|
||||
}
|
||||
|
||||
#ifdef CUDA_STREAM_POOL
|
||||
struct CudaBoundStream
|
||||
{
|
||||
cudaStream_t stream;
|
||||
uint32_t gpu_index;
|
||||
};
|
||||
|
||||
class CudaStreamPool
|
||||
{
|
||||
std::vector<CudaBoundStream> poolCompute;
|
||||
std::vector<CudaBoundStream> poolTransfer;
|
||||
|
||||
std::mutex mutex_pools;
|
||||
|
||||
size_t nextStream = 0;
|
||||
|
||||
const size_t MAX_STREAMS = 8;
|
||||
|
||||
public:
|
||||
cudaStream_t create_stream(uint32_t gpu_index)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex_pools);
|
||||
if (poolCompute.empty())
|
||||
{
|
||||
poolCompute.reserve(MAX_STREAMS);
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
for (size_t i = 0; i < MAX_STREAMS; i++)
|
||||
{
|
||||
cudaStream_t stream;
|
||||
check_cuda_error(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||
poolCompute.push_back(CudaBoundStream{stream, gpu_index});
|
||||
}
|
||||
}
|
||||
|
||||
PANIC_IF_FALSE(gpu_index == poolCompute[nextStream].gpu_index, "Bad gpu in stream pool");
|
||||
cudaStream_t res = poolCompute[nextStream].stream;
|
||||
nextStream = (nextStream + 1) % poolCompute.size();
|
||||
return res;
|
||||
}
|
||||
|
||||
void destroy_stream(cudaStream_t stream, uint32_t gpu_index)
|
||||
{
|
||||
//do nothing
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class CudaMultiStreamPool {
|
||||
std::unordered_map<uint32_t, CudaStreamPool> per_gpu_pools;
|
||||
std::mutex pools_mutex; // for creation of the mem managers
|
||||
|
||||
public:
|
||||
CudaStreamPool &get(uint32_t gpu_index) {
|
||||
std::lock_guard<std::mutex> guard(pools_mutex);
|
||||
return per_gpu_pools[gpu_index]; // creates it if it does not exist
|
||||
}
|
||||
};
|
||||
|
||||
CudaMultiStreamPool gCudaStreamPool;
|
||||
#endif
|
||||
|
||||
|
||||
/// Unsafe function to create a CUDA stream, must check first that GPU exists
|
||||
cudaStream_t cuda_create_stream(uint32_t gpu_index) {
|
||||
#ifdef CUDA_STREAM_POOL
|
||||
cuda_set_device(gpu_index); // this will initialize the mempool
|
||||
return gCudaStreamPool.get(gpu_index).create_stream(gpu_index);
|
||||
#else
|
||||
cuda_set_device(gpu_index);
|
||||
cudaStream_t stream;
|
||||
check_cuda_error(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||
return stream;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Unsafe function to destroy CUDA stream, must check first the GPU exists
|
||||
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index) {
|
||||
#ifdef CUDA_STREAM_POOL
|
||||
gCudaStreamPool.get(gpu_index).destroy_stream(stream, gpu_index);
|
||||
#else
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaStreamDestroy(stream));
|
||||
#endif
|
||||
}
|
||||
|
||||
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index) {
|
||||
@@ -128,13 +553,6 @@ void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index) {
|
||||
check_cuda_error(cudaStreamSynchronize(stream));
|
||||
}
|
||||
|
||||
void synchronize_streams(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count) {
|
||||
for (uint i = 0; i < gpu_count; i++) {
|
||||
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Determine if a CUDA device is available at runtime
|
||||
uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
|
||||
|
||||
@@ -142,24 +560,38 @@ uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
|
||||
/// or if there's not enough memory. A safe wrapper around it must call
|
||||
/// cuda_check_valid_malloc() first
|
||||
void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
|
||||
void *ptr = nullptr;
|
||||
#ifdef USE_MEMORY_MANAGER
|
||||
gCudaMemoryManager.get(gpu_index).alloc(&ptr, size, SYNC, gpu_index, 0,
|
||||
"rust_code", 0);
|
||||
#else
|
||||
cuda_set_device(gpu_index);
|
||||
void *ptr;
|
||||
check_cuda_error(cudaMalloc((void **)&ptr, size));
|
||||
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void *cuda_ext_malloc(uint64_t size, uint32_t gpu_index) {
|
||||
return cuda_malloc(size, gpu_index);
|
||||
}
|
||||
|
||||
/// Allocates a size-byte array at the device memory. Tries to do it
|
||||
/// asynchronously.
|
||||
void *cuda_malloc_with_size_tracking_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
uint64_t &size_tracker,
|
||||
bool allocate_gpu_memory) {
|
||||
void *cuda_intern_malloc_with_size_tracking_async(uint64_t size,
|
||||
cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
uint64_t &size_tracker,
|
||||
bool allocate_gpu_memory,
|
||||
const char *file, int line) {
|
||||
size_tracker += size;
|
||||
void *ptr = nullptr;
|
||||
if (!allocate_gpu_memory)
|
||||
return ptr;
|
||||
|
||||
#ifdef USE_MEMORY_MANAGER
|
||||
gCudaMemoryManager.get(gpu_index).alloc(&ptr, size, ASYNC, gpu_index, stream,
|
||||
file, line);
|
||||
#else
|
||||
cuda_set_device(gpu_index);
|
||||
|
||||
#ifndef CUDART_VERSION
|
||||
@@ -177,16 +609,23 @@ void *cuda_malloc_with_size_tracking_async(uint64_t size, cudaStream_t stream,
|
||||
#else
|
||||
check_cuda_error(cudaMalloc((void **)&ptr, size));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/// Allocates a size-byte array at the device memory. Tries to do it
|
||||
/// asynchronously.
|
||||
void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
void *cuda_int_malloc_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index, const char *file, int line) {
|
||||
uint64_t size_tracker = 0;
|
||||
return cuda_malloc_with_size_tracking_async(size, stream, gpu_index,
|
||||
size_tracker, true);
|
||||
return cuda_intern_malloc_with_size_tracking_async(
|
||||
size, stream, gpu_index, size_tracker, true, file, line);
|
||||
}
|
||||
|
||||
void *cuda_ext_malloc_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
return cuda_malloc_async(size, stream, gpu_index);
|
||||
}
|
||||
|
||||
/// Check that allocation is valid
|
||||
@@ -261,6 +700,11 @@ void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
|
||||
gpu_index, true);
|
||||
}
|
||||
|
||||
void cuda_ext_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_memcpy_async_to_gpu(dest, src, size, stream, gpu_index);
|
||||
}
|
||||
|
||||
/// Copy memory within a GPU asynchronously
|
||||
void cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
|
||||
void *dest, void const *src, uint64_t size, cudaStream_t stream,
|
||||
@@ -293,6 +737,12 @@ void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
gpu_index, true);
|
||||
}
|
||||
|
||||
void cuda_ext_memcpy_async_gpu_to_gpu(void *dest, void const *src,
|
||||
uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
cuda_memcpy_async_gpu_to_gpu(dest, src, size, stream, gpu_index);
|
||||
}
|
||||
|
||||
/// Copy memory within a GPU
|
||||
void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
uint32_t gpu_index) {
|
||||
@@ -317,6 +767,11 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
}
|
||||
}
|
||||
|
||||
void cuda_ext_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
uint32_t gpu_index) {
|
||||
cuda_memcpy_gpu_to_gpu(dest, src, size, gpu_index);
|
||||
}
|
||||
|
||||
/// Synchronizes device
|
||||
void cuda_synchronize_device(uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
@@ -329,6 +784,7 @@ void cuda_memset_with_size_tracking_async(void *dest, uint64_t val,
|
||||
bool gpu_memory_allocated) {
|
||||
if (size == 0 || !gpu_memory_allocated)
|
||||
return;
|
||||
|
||||
cudaPointerAttributes attr;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr, dest));
|
||||
if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
@@ -336,6 +792,7 @@ void cuda_memset_with_size_tracking_async(void *dest, uint64_t val,
|
||||
}
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaMemsetAsync(dest, val, size, stream));
|
||||
gCudaMemoryManager.get(gpu_index).memset((int8_t *)dest, size, stream, "", 0);
|
||||
}
|
||||
|
||||
void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
@@ -344,6 +801,11 @@ void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
true);
|
||||
}
|
||||
|
||||
void cuda_ext_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_memset_async(dest, val, size, stream, gpu_index);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ void cuda_set_value_kernel(Torus *array, Torus value, Torus n) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
@@ -395,6 +857,11 @@ void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream));
|
||||
}
|
||||
|
||||
void cuda_ext_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_memcpy_async_to_cpu(dest, src, size, stream, gpu_index);
|
||||
}
|
||||
|
||||
/// Return number of GPUs available
|
||||
int cuda_get_number_of_gpus() {
|
||||
int num_gpus;
|
||||
@@ -410,19 +877,31 @@ int cuda_get_number_of_sms() {
|
||||
}
|
||||
|
||||
/// Drop a cuda array
|
||||
void cuda_drop(void *ptr, uint32_t gpu_index) {
|
||||
void cuda_int_drop(void *ptr, uint32_t gpu_index, const char *file, int line) {
|
||||
#ifdef USE_MEMORY_MANAGER
|
||||
gCudaMemoryManager.get(gpu_index).free(ptr, SYNC, gpu_index, 0, file, line);
|
||||
#else
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaFree(ptr));
|
||||
#endif
|
||||
}
|
||||
|
||||
void cuda_ext_drop(void *ptr, uint32_t gpu_index) { cuda_drop(ptr, gpu_index); }
|
||||
|
||||
/// Drop a cuda array asynchronously, if the data was allocated & it's supported
|
||||
/// on the device
|
||||
void cuda_drop_with_size_tracking_async(void *ptr, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated) {
|
||||
void cuda_int_drop_with_size_tracking_async(void *ptr, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated,
|
||||
const char *file, int line) {
|
||||
|
||||
if (!gpu_memory_allocated)
|
||||
return;
|
||||
|
||||
#ifdef USE_MEMORY_MANAGER
|
||||
gCudaMemoryManager.get(gpu_index).free(ptr, ASYNC, gpu_index, stream, file,
|
||||
line);
|
||||
#else
|
||||
cuda_set_device(gpu_index);
|
||||
#ifndef CUDART_VERSION
|
||||
#error CUDART_VERSION Undefined!
|
||||
@@ -439,11 +918,14 @@ void cuda_drop_with_size_tracking_async(void *ptr, cudaStream_t stream,
|
||||
#else
|
||||
check_cuda_error(cudaFree(ptr));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Drop a cuda array asynchronously, if supported on the device
|
||||
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_drop_with_size_tracking_async(ptr, stream, gpu_index, true);
|
||||
void cuda_int_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index,
|
||||
const char *file, int line) {
|
||||
cuda_int_drop_with_size_tracking_async(ptr, stream, gpu_index, true, file,
|
||||
line);
|
||||
}
|
||||
|
||||
/// Get the maximum size for the shared memory per streaming multiprocessors
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
#include "integer/abs.cuh"
|
||||
|
||||
uint64_t scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, bool is_signed, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, bool is_signed,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
@@ -16,31 +15,27 @@ uint64_t scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_abs_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_abs_buffer<uint64_t> **)mem_ptr, is_signed, num_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
CudaStreams(streams), (int_abs_buffer<uint64_t> **)mem_ptr, is_signed,
|
||||
num_blocks, params, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *ct, int8_t *mem_ptr,
|
||||
bool is_signed, void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
|
||||
auto mem = (int_abs_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_abs_kb<uint64_t>((cudaStream_t *)(streams), gpu_indexes,
|
||||
gpu_count, ct, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem, is_signed);
|
||||
host_integer_abs_kb<uint64_t>(CudaStreams(streams), ct, bsks,
|
||||
(uint64_t **)(ksks), ms_noise_reduction_key,
|
||||
mem, is_signed);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_abs_inplace(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
void cleanup_cuda_integer_abs_inplace(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_abs_buffer<uint64_t> *mem_ptr =
|
||||
(int_abs_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
mem_ptr->release(CudaStreams(streams));
|
||||
delete mem_ptr;
|
||||
*mem_ptr_void = nullptr;
|
||||
}
|
||||
|
||||
@@ -18,14 +18,12 @@
|
||||
|
||||
template <typename Torus>
|
||||
__host__ uint64_t scratch_cuda_integer_abs_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_abs_buffer<Torus> **mem_ptr, bool is_signed,
|
||||
CudaStreams streams, int_abs_buffer<Torus> **mem_ptr, bool is_signed,
|
||||
uint32_t num_blocks, int_radix_params params, bool allocate_gpu_memory) {
|
||||
|
||||
uint64_t size_tracker = 0;
|
||||
if (is_signed) {
|
||||
*mem_ptr = new int_abs_buffer<Torus>(streams, gpu_indexes, gpu_count,
|
||||
params, num_blocks,
|
||||
*mem_ptr = new int_abs_buffer<Torus>(streams, params, num_blocks,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
}
|
||||
return size_tracker;
|
||||
@@ -33,8 +31,7 @@ __host__ uint64_t scratch_cuda_integer_abs_kb(
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_abs_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *ct, void *const *bsks,
|
||||
CudaStreams streams, CudaRadixCiphertextFFI *ct, void *const *bsks,
|
||||
uint64_t *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
int_abs_buffer<uint64_t> *mem_ptr, bool is_signed) {
|
||||
@@ -47,24 +44,24 @@ __host__ void host_integer_abs_kb(
|
||||
(31 - __builtin_clz(mem_ptr->params.message_modulus)) *
|
||||
ct->num_radix_blocks;
|
||||
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], mask, ct);
|
||||
copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
|
||||
mask, ct);
|
||||
|
||||
host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
|
||||
streams, mask, num_bits_in_ciphertext - 1,
|
||||
mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks, ms_noise_reduction_key);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
|
||||
host_addition<Torus>(streams.stream(0), streams.gpu_index(0), ct, mask, ct,
|
||||
ct->num_radix_blocks, mem_ptr->params.message_modulus,
|
||||
mem_ptr->params.carry_modulus);
|
||||
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, ct, nullptr, nullptr, mem_ptr->scp_mem,
|
||||
bsks, ksks, ms_noise_reduction_key, requested_flag, uses_carry);
|
||||
streams, ct, nullptr, nullptr, mem_ptr->scp_mem, bsks, ksks,
|
||||
ms_noise_reduction_key, requested_flag, uses_carry);
|
||||
|
||||
host_integer_radix_bitop_kb<Torus>(streams, gpu_indexes, gpu_count, ct, mask,
|
||||
ct, mem_ptr->bitxor_mem, bsks, ksks,
|
||||
ms_noise_reduction_key);
|
||||
host_integer_radix_bitop_kb<Torus>(streams, ct, mask, ct, mem_ptr->bitxor_mem,
|
||||
bsks, ksks, ms_noise_reduction_key);
|
||||
}
|
||||
|
||||
#endif // TFHE_RS_ABS_CUH
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
#include "integer/bitwise_ops.cuh"
|
||||
|
||||
uint64_t scratch_cuda_integer_radix_bitop_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
BITOP_TYPE op_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -16,32 +15,28 @@ uint64_t scratch_cuda_integer_radix_bitop_kb_64(
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_radix_bitop_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_bitop_buffer<uint64_t> **)mem_ptr, lwe_ciphertext_count, params,
|
||||
op_type, allocate_gpu_memory);
|
||||
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
|
||||
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_bitop_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
|
||||
host_integer_radix_bitop_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_1, lwe_array_2, (int_bitop_buffer<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), ms_noise_reduction_key);
|
||||
CudaStreams(streams), lwe_array_out, lwe_array_1, lwe_array_2,
|
||||
(int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_bitop(void *const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
void cleanup_cuda_integer_bitop(CudaStreamsFFI streams, int8_t **mem_ptr_void) {
|
||||
|
||||
int_bitop_buffer<uint64_t> *mem_ptr =
|
||||
(int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
mem_ptr->release(CudaStreams(streams));
|
||||
delete mem_ptr;
|
||||
*mem_ptr_void = nullptr;
|
||||
}
|
||||
|
||||
@@ -13,8 +13,7 @@
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_bitop_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
|
||||
void *const *bsks, Torus *const *ksks,
|
||||
@@ -46,8 +45,8 @@ __host__ void host_integer_radix_bitop_kb(
|
||||
}
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_1, lwe_array_2,
|
||||
bsks, ksks, ms_noise_reduction_key, lut, lwe_array_out->num_radix_blocks,
|
||||
streams, lwe_array_out, lwe_array_1, lwe_array_2, bsks, ksks,
|
||||
ms_noise_reduction_key, lut, lwe_array_out->num_radix_blocks,
|
||||
lut->params.message_modulus);
|
||||
|
||||
memcpy(lwe_array_out->degrees, degrees,
|
||||
@@ -56,14 +55,12 @@ __host__ void host_integer_radix_bitop_kb(
|
||||
|
||||
template <typename Torus>
|
||||
__host__ uint64_t scratch_cuda_integer_radix_bitop_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_bitop_buffer<Torus> **mem_ptr,
|
||||
CudaStreams streams, int_bitop_buffer<Torus> **mem_ptr,
|
||||
uint32_t num_radix_blocks, int_radix_params params, BITOP_TYPE op,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
uint64_t size_tracker = 0;
|
||||
*mem_ptr = new int_bitop_buffer<Torus>(streams, gpu_indexes, gpu_count, op,
|
||||
params, num_radix_blocks,
|
||||
*mem_ptr = new int_bitop_buffer<Torus>(streams, op, params, num_radix_blocks,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
return size_tracker;
|
||||
}
|
||||
|
||||
@@ -2,28 +2,26 @@
|
||||
|
||||
void extend_radix_with_trivial_zero_blocks_msb_64(
|
||||
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
|
||||
void *const *streams, uint32_t const *gpu_indexes) {
|
||||
CudaStreamsFFI streams) {
|
||||
host_extend_radix_with_trivial_zero_blocks_msb<uint64_t>(
|
||||
output, input, (cudaStream_t *)streams, gpu_indexes);
|
||||
output, input, CudaStreams(streams));
|
||||
}
|
||||
|
||||
void trim_radix_blocks_lsb_64(CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
void *const *streams,
|
||||
uint32_t const *gpu_indexes) {
|
||||
CudaStreamsFFI streams) {
|
||||
|
||||
host_trim_radix_blocks_lsb<uint64_t>(output, input, (cudaStream_t *)streams,
|
||||
gpu_indexes);
|
||||
host_trim_radix_blocks_lsb<uint64_t>(output, input, CudaStreams(streams));
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_extend_radix_with_sign_msb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t num_additional_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks,
|
||||
uint32_t num_additional_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
@@ -32,34 +30,31 @@ uint64_t scratch_cuda_extend_radix_with_sign_msb_64(
|
||||
noise_reduction_type);
|
||||
|
||||
return scratch_extend_radix_with_sign_msb<uint64_t>(
|
||||
(cudaStream_t *)streams, gpu_indexes, gpu_count,
|
||||
CudaStreams(streams),
|
||||
(int_extend_radix_with_sign_msb_buffer<uint64_t> **)mem_ptr, params,
|
||||
num_blocks, num_additional_blocks, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_extend_radix_with_sign_msb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
|
||||
int8_t *mem_ptr, uint32_t num_additional_blocks, void *const *bsks,
|
||||
void *const *ksks,
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input, int8_t *mem_ptr,
|
||||
uint32_t num_additional_blocks, void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
PUSH_RANGE("cast")
|
||||
host_extend_radix_with_sign_msb<uint64_t>(
|
||||
(cudaStream_t *)streams, gpu_indexes, gpu_count, output, input,
|
||||
CudaStreams(streams), output, input,
|
||||
(int_extend_radix_with_sign_msb_buffer<uint64_t> *)mem_ptr,
|
||||
num_additional_blocks, bsks, (uint64_t **)ksks, ms_noise_reduction_key);
|
||||
POP_RANGE()
|
||||
}
|
||||
|
||||
void cleanup_cuda_extend_radix_with_sign_msb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
void cleanup_cuda_extend_radix_with_sign_msb_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
PUSH_RANGE("clean cast")
|
||||
int_extend_radix_with_sign_msb_buffer<uint64_t> *mem_ptr =
|
||||
(int_extend_radix_with_sign_msb_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
mem_ptr->release(CudaStreams(streams));
|
||||
POP_RANGE()
|
||||
delete mem_ptr;
|
||||
*mem_ptr_void = nullptr;
|
||||
|
||||
@@ -8,19 +8,18 @@
|
||||
template <typename Torus>
|
||||
__host__ void host_extend_radix_with_trivial_zero_blocks_msb(
|
||||
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes) {
|
||||
CudaStreams streams) {
|
||||
PUSH_RANGE("extend only")
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0], output,
|
||||
0, input->num_radix_blocks, input, 0,
|
||||
input->num_radix_blocks);
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), output, 0,
|
||||
input->num_radix_blocks, input, 0, input->num_radix_blocks);
|
||||
POP_RANGE()
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_trim_radix_blocks_lsb(CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes) {
|
||||
CudaStreams streams) {
|
||||
|
||||
const uint32_t input_start_lwe_index =
|
||||
input->num_radix_blocks - output->num_radix_blocks;
|
||||
@@ -31,30 +30,29 @@ __host__ void host_trim_radix_blocks_lsb(CudaRadixCiphertextFFI *output,
|
||||
input->num_radix_blocks, output->num_radix_blocks);
|
||||
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], output, 0, output->num_radix_blocks, input,
|
||||
input_start_lwe_index, input->num_radix_blocks);
|
||||
streams.stream(0), streams.gpu_index(0), output, 0,
|
||||
output->num_radix_blocks, input, input_start_lwe_index,
|
||||
input->num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ uint64_t scratch_extend_radix_with_sign_msb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_extend_radix_with_sign_msb_buffer<Torus> **mem_ptr,
|
||||
CudaStreams streams, int_extend_radix_with_sign_msb_buffer<Torus> **mem_ptr,
|
||||
const int_radix_params params, uint32_t num_radix_blocks,
|
||||
uint32_t num_additional_blocks, const bool allocate_gpu_memory) {
|
||||
PUSH_RANGE("scratch cast/extend")
|
||||
uint64_t size_tracker = 0;
|
||||
|
||||
*mem_ptr = new int_extend_radix_with_sign_msb_buffer<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_radix_blocks,
|
||||
num_additional_blocks, allocate_gpu_memory, size_tracker);
|
||||
streams, params, num_radix_blocks, num_additional_blocks,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
POP_RANGE()
|
||||
return size_tracker;
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_extend_radix_with_sign_msb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *output,
|
||||
CudaStreams streams, CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
int_extend_radix_with_sign_msb_buffer<Torus> *mem_ptr,
|
||||
uint32_t num_additional_blocks, void *const *bsks, Torus *const *ksks,
|
||||
@@ -62,8 +60,8 @@ __host__ void host_extend_radix_with_sign_msb(
|
||||
|
||||
if (num_additional_blocks == 0) {
|
||||
PUSH_RANGE("cast/extend no addblocks")
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], output,
|
||||
input);
|
||||
copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
|
||||
output, input);
|
||||
POP_RANGE()
|
||||
return;
|
||||
}
|
||||
@@ -72,24 +70,24 @@ __host__ void host_extend_radix_with_sign_msb(
|
||||
|
||||
PANIC_IF_FALSE(input_blocks > 0, "Cuda error: input blocks cannot be zero");
|
||||
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0], output,
|
||||
0, input_blocks, input, 0,
|
||||
input_blocks);
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), output, 0, input_blocks, input,
|
||||
0, input_blocks);
|
||||
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
mem_ptr->last_block, 0, 1, input,
|
||||
input_blocks - 1, input_blocks);
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), mem_ptr->last_block, 0, 1, input,
|
||||
input_blocks - 1, input_blocks);
|
||||
|
||||
host_apply_univariate_lut_kb(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->padding_block,
|
||||
mem_ptr->last_block, mem_ptr->lut, ksks, ms_noise_reduction_key, bsks);
|
||||
host_apply_univariate_lut_kb(streams, mem_ptr->padding_block,
|
||||
mem_ptr->last_block, mem_ptr->lut, ksks,
|
||||
ms_noise_reduction_key, bsks);
|
||||
|
||||
for (uint32_t i = 0; i < num_additional_blocks; ++i) {
|
||||
uint32_t dst_block_idx = input_blocks + i;
|
||||
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0], output,
|
||||
dst_block_idx, dst_block_idx + 1,
|
||||
mem_ptr->padding_block, 0, 1);
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), output, dst_block_idx,
|
||||
dst_block_idx + 1, mem_ptr->padding_block, 0, 1);
|
||||
}
|
||||
POP_RANGE()
|
||||
}
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#include "integer/cmux.cuh"
|
||||
|
||||
uint64_t scratch_cuda_integer_radix_cmux_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
PUSH_RANGE("scratch cmux")
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -18,16 +18,14 @@ uint64_t scratch_cuda_integer_radix_cmux_kb_64(
|
||||
[](uint64_t x) -> uint64_t { return x == 1; };
|
||||
|
||||
uint64_t ret = scratch_cuda_integer_radix_cmux_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_cmux_buffer<uint64_t> **)mem_ptr, predicate_lut_f,
|
||||
lwe_ciphertext_count, params, allocate_gpu_memory);
|
||||
CudaStreams(streams), (int_cmux_buffer<uint64_t> **)mem_ptr,
|
||||
predicate_lut_f, lwe_ciphertext_count, params, allocate_gpu_memory);
|
||||
POP_RANGE()
|
||||
return ret;
|
||||
}
|
||||
|
||||
void cuda_cmux_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
|
||||
@@ -35,21 +33,18 @@ void cuda_cmux_integer_radix_ciphertext_kb_64(
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
PUSH_RANGE("cmux")
|
||||
host_integer_radix_cmux_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_condition, lwe_array_true, lwe_array_false,
|
||||
(int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key);
|
||||
CudaStreams(streams), lwe_array_out, lwe_condition, lwe_array_true,
|
||||
lwe_array_false, (int_cmux_buffer<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), ms_noise_reduction_key);
|
||||
POP_RANGE()
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_radix_cmux(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
void cleanup_cuda_integer_radix_cmux(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
PUSH_RANGE("cleanup cmux")
|
||||
int_cmux_buffer<uint64_t> *mem_ptr =
|
||||
(int_cmux_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
mem_ptr->release(CudaStreams(streams));
|
||||
delete mem_ptr;
|
||||
*mem_ptr_void = nullptr;
|
||||
POP_RANGE()
|
||||
|
||||
@@ -6,8 +6,7 @@
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
zero_out_if(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
zero_out_if(CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_input,
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
int_zero_out_if_buffer<Torus> *mem_ptr,
|
||||
@@ -27,26 +26,25 @@ zero_out_if(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
"Cuda error: input and output radix ciphertexts must have the same "
|
||||
"lwe dimension");
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cuda_set_device(streams.gpu_index(0));
|
||||
auto params = mem_ptr->params;
|
||||
|
||||
// We can't use integer_radix_apply_bivariate_lookup_table_kb since the
|
||||
// second operand is not an array
|
||||
auto tmp_lwe_array_input = mem_ptr->tmp;
|
||||
host_pack_bivariate_blocks_with_single_block<Torus>(
|
||||
streams, gpu_indexes, gpu_count, tmp_lwe_array_input,
|
||||
predicate->lwe_indexes_in, lwe_array_input, lwe_condition,
|
||||
predicate->lwe_indexes_in, params.message_modulus, num_radix_blocks);
|
||||
streams, tmp_lwe_array_input, predicate->lwe_indexes_in, lwe_array_input,
|
||||
lwe_condition, predicate->lwe_indexes_in, params.message_modulus,
|
||||
num_radix_blocks);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, tmp_lwe_array_input, bsks,
|
||||
ksks, ms_noise_reduction_key, predicate, num_radix_blocks);
|
||||
streams, lwe_array_out, tmp_lwe_array_input, bsks, ksks,
|
||||
ms_noise_reduction_key, predicate, num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_cmux_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false,
|
||||
@@ -62,18 +60,19 @@ __host__ void host_integer_radix_cmux_kb(
|
||||
auto params = mem_ptr->params;
|
||||
Torus lwe_size = params.big_lwe_dimension + 1;
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], mem_ptr->buffer_in, 0, num_radix_blocks,
|
||||
lwe_array_true, 0, num_radix_blocks);
|
||||
streams.stream(0), streams.gpu_index(0), mem_ptr->buffer_in, 0,
|
||||
num_radix_blocks, lwe_array_true, 0, num_radix_blocks);
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], mem_ptr->buffer_in, num_radix_blocks,
|
||||
2 * num_radix_blocks, lwe_array_false, 0, num_radix_blocks);
|
||||
streams.stream(0), streams.gpu_index(0), mem_ptr->buffer_in,
|
||||
num_radix_blocks, 2 * num_radix_blocks, lwe_array_false, 0,
|
||||
num_radix_blocks);
|
||||
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
mem_ptr->condition_array, i, i + 1,
|
||||
lwe_condition, 0, 1);
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), mem_ptr->condition_array, i,
|
||||
i + 1, lwe_condition, 0, 1);
|
||||
}
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->buffer_out, mem_ptr->buffer_in,
|
||||
streams, mem_ptr->buffer_out, mem_ptr->buffer_in,
|
||||
mem_ptr->condition_array, bsks, ksks, ms_noise_reduction_key,
|
||||
mem_ptr->predicate_lut, 2 * num_radix_blocks, params.message_modulus);
|
||||
|
||||
@@ -87,25 +86,24 @@ __host__ void host_integer_radix_cmux_kb(
|
||||
as_radix_ciphertext_slice<Torus>(&mem_false, mem_ptr->buffer_out,
|
||||
num_radix_blocks, 2 * num_radix_blocks);
|
||||
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], &mem_true, &mem_true,
|
||||
&mem_false, num_radix_blocks, params.message_modulus,
|
||||
params.carry_modulus);
|
||||
host_addition<Torus>(streams.stream(0), streams.gpu_index(0), &mem_true,
|
||||
&mem_true, &mem_false, num_radix_blocks,
|
||||
params.message_modulus, params.carry_modulus);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, &mem_true, bsks, ksks,
|
||||
ms_noise_reduction_key, mem_ptr->message_extract_lut, num_radix_blocks);
|
||||
streams, lwe_array_out, &mem_true, bsks, ksks, ms_noise_reduction_key,
|
||||
mem_ptr->message_extract_lut, num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ uint64_t scratch_cuda_integer_radix_cmux_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_cmux_buffer<Torus> **mem_ptr,
|
||||
CudaStreams streams, int_cmux_buffer<Torus> **mem_ptr,
|
||||
std::function<Torus(Torus)> predicate_lut_f, uint32_t num_radix_blocks,
|
||||
int_radix_params params, bool allocate_gpu_memory) {
|
||||
uint64_t size_tracker = 0;
|
||||
*mem_ptr = new int_cmux_buffer<Torus>(
|
||||
streams, gpu_indexes, gpu_count, predicate_lut_f, params,
|
||||
num_radix_blocks, allocate_gpu_memory, size_tracker);
|
||||
*mem_ptr = new int_cmux_buffer<Torus>(streams, predicate_lut_f, params,
|
||||
num_radix_blocks, allocate_gpu_memory,
|
||||
size_tracker);
|
||||
return size_tracker;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
#include "integer/comparison.cuh"
|
||||
|
||||
uint64_t scratch_cuda_integer_radix_comparison_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
COMPARISON_TYPE op_type, bool is_signed, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, COMPARISON_TYPE op_type, bool is_signed,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
PUSH_RANGE("scratch comparison")
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -20,9 +19,8 @@ uint64_t scratch_cuda_integer_radix_comparison_kb_64(
|
||||
case EQ:
|
||||
case NE:
|
||||
size_tracker += scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params,
|
||||
op_type, false, allocate_gpu_memory);
|
||||
CudaStreams(streams), (int_comparison_buffer<uint64_t> **)mem_ptr,
|
||||
num_radix_blocks, params, op_type, false, allocate_gpu_memory);
|
||||
break;
|
||||
case GT:
|
||||
case GE:
|
||||
@@ -31,9 +29,8 @@ uint64_t scratch_cuda_integer_radix_comparison_kb_64(
|
||||
case MAX:
|
||||
case MIN:
|
||||
size_tracker += scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params,
|
||||
op_type, is_signed, allocate_gpu_memory);
|
||||
CudaStreams(streams), (int_comparison_buffer<uint64_t> **)mem_ptr,
|
||||
num_radix_blocks, params, op_type, is_signed, allocate_gpu_memory);
|
||||
break;
|
||||
}
|
||||
POP_RANGE()
|
||||
@@ -41,8 +38,7 @@ uint64_t scratch_cuda_integer_radix_comparison_kb_64(
|
||||
}
|
||||
|
||||
void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
@@ -60,9 +56,8 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
case EQ:
|
||||
case NE:
|
||||
host_integer_radix_equality_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_1, lwe_array_2, buffer, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, num_radix_blocks);
|
||||
CudaStreams(streams), lwe_array_out, lwe_array_1, lwe_array_2, buffer,
|
||||
bsks, (uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks);
|
||||
break;
|
||||
case GT:
|
||||
case GE:
|
||||
@@ -72,18 +67,17 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
PANIC("Cuda error (comparisons): the number of radix blocks has to be "
|
||||
"even.")
|
||||
host_integer_radix_difference_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_1, lwe_array_2, buffer, buffer->diff_buffer->operator_f, bsks,
|
||||
(uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks);
|
||||
CudaStreams(streams), lwe_array_out, lwe_array_1, lwe_array_2, buffer,
|
||||
buffer->diff_buffer->operator_f, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, num_radix_blocks);
|
||||
break;
|
||||
case MAX:
|
||||
case MIN:
|
||||
if (num_radix_blocks % 2 != 0)
|
||||
PANIC("Cuda error (max/min): the number of radix blocks has to be even.")
|
||||
host_integer_radix_maxmin_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_1, lwe_array_2, buffer, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, num_radix_blocks);
|
||||
CudaStreams(streams), lwe_array_out, lwe_array_1, lwe_array_2, buffer,
|
||||
bsks, (uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error: integer operation not supported")
|
||||
@@ -91,27 +85,25 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
POP_RANGE()
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_comparison(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
void cleanup_cuda_integer_comparison(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
PUSH_RANGE("cleanup comparison")
|
||||
int_comparison_buffer<uint64_t> *mem_ptr =
|
||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
mem_ptr->release(CudaStreams(streams));
|
||||
delete mem_ptr;
|
||||
*mem_ptr_void = nullptr;
|
||||
POP_RANGE()
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -119,14 +111,12 @@ uint64_t scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
|
||||
false, allocate_gpu_memory);
|
||||
CudaStreams(streams), (int_comparison_buffer<uint64_t> **)mem_ptr,
|
||||
num_radix_blocks, params, EQ, false, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
@@ -136,30 +126,28 @@ void cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_are_all_comparisons_block_true_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_in, buffer, bsks, (uint64_t **)(ksks), ms_noise_reduction_key,
|
||||
num_radix_blocks);
|
||||
CudaStreams(streams), lwe_array_out, lwe_array_in, buffer, bsks,
|
||||
(uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_are_all_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr_void) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *mem_ptr =
|
||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
mem_ptr->release(CudaStreams(streams));
|
||||
delete mem_ptr;
|
||||
*mem_ptr_void = nullptr;
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -167,14 +155,12 @@ uint64_t scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
|
||||
false, allocate_gpu_memory);
|
||||
CudaStreams(streams), (int_comparison_buffer<uint64_t> **)mem_ptr,
|
||||
num_radix_blocks, params, EQ, false, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
@@ -184,18 +170,16 @@ void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_is_at_least_one_comparisons_block_true_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_in, buffer, bsks, (uint64_t **)(ksks), ms_noise_reduction_key,
|
||||
num_radix_blocks);
|
||||
CudaStreams(streams), lwe_array_out, lwe_array_in, buffer, bsks,
|
||||
(uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr_void) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *mem_ptr =
|
||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
mem_ptr->release(CudaStreams(streams));
|
||||
delete mem_ptr;
|
||||
*mem_ptr_void = nullptr;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user