mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-04-28 03:01:21 -04:00
Compare commits
3 Commits
al/fix_lut
...
dt/bench/f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cae938a75b | ||
|
|
bae1d1cf77 | ||
|
|
a3bc1a9d9e |
@@ -11,47 +11,15 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
|
||||
jobs:
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (backward-compat-tests)
|
||||
needs: check-user-permission
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
@@ -71,7 +39,7 @@ jobs:
|
||||
name: Backward compatibility tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -79,8 +47,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -123,7 +90,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (backward-compat-tests)
|
||||
@@ -147,4 +114,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
22
.github/workflows/aws_tfhe_fast_tests.yml
vendored
22
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
|
||||
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
|
||||
@@ -62,13 +62,13 @@ jobs:
|
||||
user_docs_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.user_docs_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
ci_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ci_any_changed }}
|
||||
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
|
||||
@@ -76,6 +76,7 @@ jobs:
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -120,9 +121,13 @@ jobs:
|
||||
- '!tfhe/src/c_api/**'
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- README.md
|
||||
ci:
|
||||
- .github/**
|
||||
- ci/**
|
||||
|
||||
- name: Aggregate file changes
|
||||
id: aggregated-changes
|
||||
# CI files are not included in this aggregator.
|
||||
if: ( steps.changed-files.outputs.dependencies_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.csprng_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
|
||||
@@ -137,20 +142,13 @@ jobs:
|
||||
run: |
|
||||
echo "any_changed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
(github.event_name == 'pull_request_target' && needs.should-run.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_triggering_actor.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
60
.github/workflows/aws_tfhe_integer_tests.yml
vendored
60
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -10,31 +10,16 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
|
||||
# nextest
|
||||
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
|
||||
NO_BIG_PARAMS: FALSE
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
types: [labeled]
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
@@ -43,11 +28,12 @@ jobs:
|
||||
should-run:
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'pull_request_target' && contains(github.event.label.name, 'approved')) ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
integer_test: ${{ github.event_name == 'workflow_dispatch' ||
|
||||
steps.changed-files.outputs.integer_any_changed }}
|
||||
@@ -56,14 +42,14 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
integer:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -75,30 +61,13 @@ jobs:
|
||||
- tfhe/src/integer/**
|
||||
- .github/workflows/aws_tfhe_integer_tests.yml
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (unsigned-integer-tests)
|
||||
needs: [ should-run, check-user-permission ]
|
||||
needs: should-run
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
|
||||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -119,7 +88,7 @@ jobs:
|
||||
name: Unsigned integer tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -127,8 +96,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -136,7 +104,7 @@ jobs:
|
||||
toolchain: stable
|
||||
|
||||
- name: Should skip big parameters set
|
||||
if: github.event_name == 'pull_request_target'
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "NO_BIG_PARAMS=TRUE" >> "${GITHUB_ENV}"
|
||||
|
||||
@@ -162,7 +130,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (unsigned-integer-tests)
|
||||
@@ -186,4 +154,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -10,31 +10,16 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
|
||||
# nextest
|
||||
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
|
||||
NO_BIG_PARAMS: FALSE
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
types: [labeled]
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
@@ -44,11 +29,11 @@ jobs:
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
((github.event_name == 'pull_request_target' || github.event_name == 'pull_request_target') && contains(github.event.label.name, 'approved')) ||
|
||||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
integer_test: ${{ github.event_name == 'workflow_dispatch' ||
|
||||
steps.changed-files.outputs.integer_any_changed }}
|
||||
@@ -57,14 +42,14 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
integer:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -76,30 +61,13 @@ jobs:
|
||||
- tfhe/src/integer/**
|
||||
- .github/workflows/aws_tfhe_signed_integer_tests.yml
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (unsigned-integer-tests)
|
||||
needs: [ should-run, check-user-permission ]
|
||||
needs: should-run
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
|
||||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -120,7 +88,7 @@ jobs:
|
||||
name: Signed integer tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -128,8 +96,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -137,7 +104,7 @@ jobs:
|
||||
toolchain: stable
|
||||
|
||||
- name: Should skip big parameters set
|
||||
if: github.event_name == 'pull_request_target'
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "NO_BIG_PARAMS=TRUE" >> "${GITHUB_ENV}"
|
||||
|
||||
@@ -167,7 +134,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (signed-integer-tests)
|
||||
@@ -191,4 +158,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
58
.github/workflows/aws_tfhe_tests.yml
vendored
58
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -10,28 +10,13 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
@@ -42,7 +27,7 @@ jobs:
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
|
||||
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
|
||||
@@ -78,14 +63,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -147,28 +131,11 @@ jobs:
|
||||
run: |
|
||||
echo "any_changed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cpu-tests)
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.any_file_changed == 'true')
|
||||
needs: [ should-run, check-user-permission ]
|
||||
needs: should-run
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
@@ -186,11 +153,11 @@ jobs:
|
||||
|
||||
cpu-tests:
|
||||
name: CPU tests
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
needs: [ should-run, setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -198,8 +165,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -274,7 +240,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cpu-tests)
|
||||
@@ -298,4 +264,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
42
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
42
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -10,49 +10,16 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
|
||||
jobs:
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (wasm-tests)
|
||||
needs: check-user-permission
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -73,7 +40,7 @@ jobs:
|
||||
name: WASM tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -81,8 +48,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -143,7 +109,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (wasm-tests)
|
||||
@@ -167,4 +133,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
6
.github/workflows/benchmark_boolean.yml
vendored
6
.github/workflows/benchmark_boolean.yml
vendored
@@ -51,8 +51,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -104,8 +103,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
6
.github/workflows/benchmark_core_crypto.yml
vendored
6
.github/workflows/benchmark_core_crypto.yml
vendored
@@ -50,8 +50,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -95,8 +94,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
6
.github/workflows/benchmark_erc20.yml
vendored
6
.github/workflows/benchmark_erc20.yml
vendored
@@ -52,8 +52,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -73,8 +72,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
|
||||
58
.github/workflows/benchmark_gpu_4090.yml
vendored
58
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -11,53 +11,20 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
FAST_BENCH: TRUE
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
types: [labeled]
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Friday at 9p.m.
|
||||
- cron: "0 21 * * 5"
|
||||
|
||||
jobs:
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
cuda-integer-benchmarks:
|
||||
name: Cuda integer benchmarks (RTX 4090)
|
||||
needs: check-user-permission
|
||||
if: ${{ github.event_name == 'workflow_dispatch' ||
|
||||
github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs' ||
|
||||
contains(github.event.label.name, '4090_bench') }}
|
||||
@@ -66,14 +33,16 @@ jobs:
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -94,8 +63,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run integer benchmarks
|
||||
run: |
|
||||
@@ -131,7 +99,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
cuda-core-crypto-benchmarks:
|
||||
name: Cuda core crypto benchmarks (RTX 4090)
|
||||
@@ -148,8 +116,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -169,8 +136,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run core crypto benchmarks
|
||||
run: |
|
||||
@@ -216,11 +182,11 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
remove_github_label:
|
||||
name: Remove 4090 bench label
|
||||
if: ${{ always() && github.event_name == 'pull_request_target' }}
|
||||
if: ${{ always() && github.event_name == 'pull_request' }}
|
||||
needs: [cuda-integer-benchmarks, cuda-core-crypto-benchmarks]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
@@ -53,8 +53,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -109,8 +108,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
@@ -14,7 +14,7 @@ on:
|
||||
type: string
|
||||
required: true
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN:
|
||||
FHE_ACTIONS_TOKEN:
|
||||
required: true
|
||||
SLAB_ACTION_TOKEN:
|
||||
required: true
|
||||
@@ -80,8 +80,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -135,8 +134,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
@@ -26,7 +26,7 @@ on:
|
||||
type: boolean
|
||||
default: false
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN:
|
||||
FHE_ACTIONS_TOKEN:
|
||||
required: true
|
||||
SLAB_ACTION_TOKEN:
|
||||
required: true
|
||||
@@ -150,8 +150,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -211,8 +210,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
6
.github/workflows/benchmark_integer.yml
vendored
6
.github/workflows/benchmark_integer.yml
vendored
@@ -119,8 +119,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -140,8 +139,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
|
||||
6
.github/workflows/benchmark_shortint.yml
vendored
6
.github/workflows/benchmark_shortint.yml
vendored
@@ -82,8 +82,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -103,8 +102,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
|
||||
@@ -119,8 +119,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -140,8 +139,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
|
||||
3
.github/workflows/benchmark_tfhe_fft.yml
vendored
3
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -94,8 +94,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
3
.github/workflows/benchmark_tfhe_ntt.yml
vendored
3
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -94,8 +94,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
10
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
10
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
@@ -38,6 +38,7 @@ jobs:
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
zk_pok:
|
||||
- tfhe-zk-pok/**
|
||||
@@ -79,8 +80,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -100,8 +100,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
@@ -132,8 +131,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
12
.github/workflows/benchmark_wasm_client.yml
vendored
12
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
|
||||
steps:
|
||||
@@ -36,13 +36,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
wasm_bench:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -88,8 +88,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -177,8 +176,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
13
.github/workflows/benchmark_zk_pke.yml
vendored
13
.github/workflows/benchmark_zk_pke.yml
vendored
@@ -43,13 +43,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
zk_pok:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -130,8 +130,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -151,8 +150,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
@@ -189,8 +187,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
39
.github/workflows/check_actor_permissions.yml
vendored
39
.github/workflows/check_actor_permissions.yml
vendored
@@ -1,39 +0,0 @@
|
||||
# Check if an actor is a collaborator and has write access
|
||||
name: Check Actor Permissions
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
username:
|
||||
type: string
|
||||
default: ${{ github.triggering_actor }}
|
||||
outputs:
|
||||
is_authorized:
|
||||
value: ${{ jobs.check-actor-permission.outputs.actor_authorized }}
|
||||
secrets:
|
||||
TOKEN:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
check-actor-permission:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
actor_authorized: ${{ steps.check-access.outputs.require-result }}
|
||||
steps:
|
||||
- name: Get User Permission
|
||||
id: check-access
|
||||
uses: actions-cool/check-user-permission@7b90a27f92f3961b368376107661682c441f6103 # v2.3.0
|
||||
with:
|
||||
require: write
|
||||
username: ${{ inputs.username }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.TOKEN }}
|
||||
|
||||
- name: Check User Permission
|
||||
if: ${{ !(inputs.username == 'dependabot[bot]' || inputs.username == 'cla-bot[bot]') &&
|
||||
steps.check-access.outputs.require-result == 'false' }}
|
||||
run: |
|
||||
echo "${{ inputs.username }} does not have permissions on this repo."
|
||||
echo "Current permission level is ${{ steps.check-access.outputs.user-permission }}"
|
||||
echo "Job originally triggered by ${{ github.actor }}"
|
||||
exit 1
|
||||
40
.github/workflows/check_ci_files_change.yml
vendored
40
.github/workflows/check_ci_files_change.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# Check if there is any change in CI files since last commit
|
||||
name: Check changes in CI files
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
checkout_ref:
|
||||
type: string
|
||||
required: true
|
||||
outputs:
|
||||
ci_file_changed:
|
||||
value: ${{ jobs.check-changes.outputs.ci_file_changed }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
check-changes:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
outputs:
|
||||
ci_file_changed: ${{ steps.changed-files.outputs.ci_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ inputs.checkout_ref }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
files_yaml: |
|
||||
ci:
|
||||
- .github/**
|
||||
- ci/**
|
||||
1
.github/workflows/check_commit.yml
vendored
1
.github/workflows/check_commit.yml
vendored
@@ -2,7 +2,6 @@
|
||||
name: Check commit and PR compliance
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
check-commit-pr:
|
||||
name: Check commit and PR
|
||||
|
||||
32
.github/workflows/check_external_pr.yml
vendored
32
.github/workflows/check_external_pr.yml
vendored
@@ -1,32 +0,0 @@
|
||||
# Check if a pull request fulfill pre-conditions to be accepted
|
||||
name: Check PR from fork
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
|
||||
jobs:
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
check-user-permission:
|
||||
name: Check event user permissions
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
with:
|
||||
username: ${{ github.event.pull_request.user.login }}
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
write-comment:
|
||||
name: Write PR comment
|
||||
if: ${{ always() && needs.check-user-permission.outputs.is_authorized == 'false' }}
|
||||
needs: check-user-permission
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Write warning
|
||||
uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b
|
||||
with:
|
||||
message: |
|
||||
CI files have changed. Only Zama organization members are authorized to modify these files.
|
||||
29
.github/workflows/check_triggering_actor.yml
vendored
Normal file
29
.github/workflows/check_triggering_actor.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
# Check if triggering actor is a collaborator and has write access
|
||||
name: Check Triggering Actor
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
secrets:
|
||||
TOKEN:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
check-actor-permission:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get User Permission
|
||||
id: check-access
|
||||
uses: actions-cool/check-user-permission@7b90a27f92f3961b368376107661682c441f6103 # v2.3.0
|
||||
with:
|
||||
require: write
|
||||
username: ${{ github.triggering_actor }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.TOKEN }}
|
||||
|
||||
- name: Check User Permission
|
||||
if: steps.check-access.outputs.require-result == 'false'
|
||||
run: |
|
||||
echo "${{ github.triggering_actor }} does not have permissions on this repo."
|
||||
echo "Current permission level is ${{ steps.check-access.outputs.user-permission }}"
|
||||
echo "Job originally triggered by ${{ github.actor }}"
|
||||
exit 1
|
||||
4
.github/workflows/ci_lint.yml
vendored
4
.github/workflows/ci_lint.yml
vendored
@@ -14,9 +14,6 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Get actionlint
|
||||
run: |
|
||||
@@ -34,4 +31,3 @@ jobs:
|
||||
with:
|
||||
allowlist: |
|
||||
slsa-framework/slsa-github-generator
|
||||
./
|
||||
|
||||
4
.github/workflows/code_coverage.yml
vendored
4
.github/workflows/code_coverage.yml
vendored
@@ -83,7 +83,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
|
||||
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
make test_integer_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
|
||||
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
42
.github/workflows/csprng_randomness_tests.yml
vendored
42
.github/workflows/csprng_randomness_tests.yml
vendored
@@ -10,49 +10,16 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
|
||||
jobs:
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (csprng-randomness-tests)
|
||||
needs: check-user-permission
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -73,7 +40,7 @@ jobs:
|
||||
name: CSPRNG randomness tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -81,8 +48,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -99,7 +65,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (csprng-randomness-tests)
|
||||
@@ -123,4 +89,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
4
.github/workflows/data_pr_close.yml
vendored
4
.github/workflows/data_pr_close.yml
vendored
@@ -8,8 +8,6 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
PR_BRANCH: ${{ github.head_ref || github.ref_name }}
|
||||
CLOSE_TYPE: ${{ github.event.pull_request.merged && 'merge' || 'close' }}
|
||||
|
||||
@@ -17,8 +15,6 @@ env:
|
||||
on:
|
||||
pull_request:
|
||||
types: [ closed ]
|
||||
pull_request_target:
|
||||
types: [ closed ]
|
||||
|
||||
# The same pattern is used for jobs that use the github api:
|
||||
# - save the result of the API call in the env var "GH_API_RES". Since the var is multiline
|
||||
|
||||
44
.github/workflows/gpu_4090_tests.yml
vendored
44
.github/workflows/gpu_4090_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an RTX 4090 machine
|
||||
name: Cuda - 4090 full tests
|
||||
name: TFHE Cuda Backend - 4090 full tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,57 +11,24 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
|
||||
jobs:
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA tests (RTX 4090)
|
||||
needs: check-user-permission
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
contains(github.event.label.name, '4090_test') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
|
||||
@@ -70,8 +37,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -103,7 +69,7 @@ jobs:
|
||||
make test_high_level_api_gpu
|
||||
|
||||
- uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
|
||||
if: ${{ always() && github.event_name == 'pull_request_target' }}
|
||||
if: ${{ always() && github.event_name == 'pull_request' }}
|
||||
with:
|
||||
labels: 4090_test
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -114,4 +80,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
62
.github/workflows/gpu_fast_h100_tests.yml
vendored
62
.github/workflows/gpu_fast_h100_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
|
||||
name: Cuda - Fast tests on H100
|
||||
name: TFHE Cuda Backend - Fast tests on H100
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,34 +11,19 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -46,14 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -72,27 +56,10 @@ jobs:
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
needs: [ should-run, check-user-permission ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
@@ -113,10 +80,10 @@ jobs:
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -132,8 +99,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -179,7 +145,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
@@ -203,4 +169,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
59
.github/workflows/gpu_fast_tests.yml
vendored
59
.github/workflows/gpu_fast_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||
name: Cuda - Fast tests
|
||||
name: TFHE Cuda Backend - Fast tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,32 +11,18 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -44,14 +30,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -70,27 +55,10 @@ jobs:
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-tests)
|
||||
needs: [ should-run, check-user-permission ]
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
needs.should-run.outputs.gpu_test == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -110,10 +78,10 @@ jobs:
|
||||
cuda-tests-linux:
|
||||
name: CUDA tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -129,8 +97,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -176,7 +143,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
@@ -200,4 +167,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
9
.github/workflows/gpu_full_h100_tests.yml
vendored
9
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
|
||||
name: Cuda - Full tests on H100
|
||||
name: TFHE Cuda Backend - Full tests on H100
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,6 +11,7 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -65,7 +66,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -109,7 +110,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
@@ -132,4 +133,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
60
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
60
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||
name: Cuda - Full tests multi-GPU
|
||||
name: TFHE Cuda Backend - Full tests multi-GPU
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,34 +11,19 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -46,14 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -72,27 +56,10 @@ jobs:
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-tests-multi-gpu)
|
||||
needs: [ should-run, check-user-permission ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
@@ -113,10 +80,10 @@ jobs:
|
||||
cuda-tests-linux:
|
||||
name: CUDA multi-GPU tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -132,8 +99,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -182,7 +148,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests-multi-gpu)
|
||||
@@ -206,4 +172,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
43
.github/workflows/gpu_pcc.yml
vendored
43
.github/workflows/gpu_pcc.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Perfom tfhe-cuda-backend post-commit checks on an AWS instance
|
||||
name: Cuda - Post-commit Checks
|
||||
name: TFHE Cuda Backend - Post-commit Checks
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,45 +11,13 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
on:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
|
||||
jobs:
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-pcc)
|
||||
needs: check-user-permission
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
@@ -69,7 +37,7 @@ jobs:
|
||||
name: CUDA post-commit checks
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -88,8 +56,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
@@ -133,7 +100,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-pcc)
|
||||
@@ -157,4 +124,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Signed integer GPU tests on an RTXA6000 VM on hyperstack with classical PBS
|
||||
name: Cuda - Signed integer tests with classical PBS
|
||||
name: TFHE Cuda Backend - Signed integer tests with classical PBS
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,34 +11,19 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -46,14 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -72,27 +56,10 @@ jobs:
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-signed-classic-tests)
|
||||
needs: [ should-run, check-user-permission ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
@@ -113,10 +80,10 @@ jobs:
|
||||
cuda-tests-linux:
|
||||
name: CUDA signed integer tests with classical PBS
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -130,10 +97,6 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -165,7 +128,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-signed-classic-tests)
|
||||
@@ -189,4 +152,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Signed integer GPU tests on an H100 VM on hyperstack
|
||||
name: Cuda - Signed integer tests on H100
|
||||
name: TFHE Cuda Backend - Signed integer tests on H100
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,34 +11,19 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -46,14 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -72,27 +56,10 @@ jobs:
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
needs: [ should-run, check-user-permission ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
@@ -113,10 +80,10 @@ jobs:
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 signed integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -130,10 +97,6 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -165,7 +128,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
@@ -189,4 +152,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
60
.github/workflows/gpu_signed_integer_tests.yml
vendored
60
.github/workflows/gpu_signed_integer_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend signed integer on an AWS instance
|
||||
name: Cuda - Signed integer tests
|
||||
name: TFHE Cuda Backend - Signed integer tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,28 +11,17 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
FAST_TESTS: TRUE
|
||||
NIGHTLY_TESTS: FALSE
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
@@ -41,7 +30,7 @@ jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -49,14 +38,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -75,27 +63,10 @@ jobs:
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-signed-integer-tests)
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ should-run, check-user-permission ]
|
||||
needs: should-run
|
||||
if: (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
needs.should-run.outputs.gpu_test == 'true'
|
||||
@@ -116,10 +87,10 @@ jobs:
|
||||
cuda-signed-integer-tests:
|
||||
name: CUDA signed integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -135,8 +106,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -176,7 +146,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-signed-integer-tests.result }}
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
@@ -200,4 +170,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Test unsigned integers on an RTXA6000 VM on hyperstack with the classical PBS
|
||||
name: Cuda - Unsigned integer tests with classical PBS
|
||||
name: TFHE Cuda Backend - Unsigned integer tests with classical PBS
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,34 +11,19 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -46,14 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -72,27 +56,10 @@ jobs:
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-unsigned-classic-tests)
|
||||
needs: [ should-run, check-user-permission ]
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
@@ -113,10 +80,10 @@ jobs:
|
||||
cuda-tests-linux:
|
||||
name: CUDA unsigned integer tests with classical PBS
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -130,10 +97,6 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -165,7 +128,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-unsigned-classic-tests)
|
||||
@@ -189,4 +152,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Test unsigned integers on an H100 VM on hyperstack
|
||||
name: Cuda - Unsigned integer tests on H100
|
||||
name: TFHE Cuda Backend - Unsigned integer tests on H100
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,34 +11,19 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -46,14 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -72,27 +56,10 @@ jobs:
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
needs: [ should-run, check-user-permission ]
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
@@ -113,10 +80,10 @@ jobs:
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 unsigned integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -130,10 +97,6 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -165,7 +128,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
@@ -189,4 +152,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
61
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
61
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend unsigned integer on an AWS instance
|
||||
name: Cuda - Unsigned integer tests
|
||||
name: TFHE Cuda Backend - Unsigned integer tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,29 +11,16 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
FAST_TESTS: TRUE
|
||||
NIGHTLY_TESTS: FALSE
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
@@ -42,7 +29,7 @@ jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -50,14 +37,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -76,26 +62,9 @@ jobs:
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-unsigned-integer-tests)
|
||||
needs: [ should-run, check-user-permission ]
|
||||
needs: should-run
|
||||
if: (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
needs.should-run.outputs.gpu_test == 'true'
|
||||
@@ -117,10 +86,10 @@ jobs:
|
||||
cuda-unsigned-integer-tests:
|
||||
name: CUDA unsigned integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -134,10 +103,6 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
@@ -177,7 +142,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-unsigned-integer-tests.result }}
|
||||
SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
@@ -201,4 +166,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
2
.github/workflows/integer_long_run_tests.yml
vendored
2
.github/workflows/integer_long_run_tests.yml
vendored
@@ -51,7 +51,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
|
||||
47
.github/workflows/m1_tests.yml
vendored
47
.github/workflows/m1_tests.yml
vendored
@@ -2,20 +2,8 @@ name: Tests on M1 CPU
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
types: [ labeled ]
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
types: [labeled]
|
||||
# Have a nightly build for M1 tests
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
@@ -33,35 +21,14 @@ env:
|
||||
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
|
||||
# nextest
|
||||
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-ci-files:
|
||||
uses: ./.github/workflows/check_ci_files_change.yml
|
||||
with:
|
||||
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: check-ci-files
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_actor_permissions.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
cargo-builds-m1:
|
||||
needs: check-user-permission
|
||||
if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
contains(github.event.label.name, 'm1_test') }}
|
||||
if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'm1_test') }}
|
||||
runs-on: ["self-hosted", "m1mac"]
|
||||
# 12 hours, default is 6 hours, hopefully this is more than enough
|
||||
timeout-minutes: 720
|
||||
@@ -70,8 +37,6 @@ jobs:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -213,7 +178,7 @@ jobs:
|
||||
if: ${{ always() }}
|
||||
steps:
|
||||
- uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
|
||||
if: ${{ github.event_name == 'pull_request_target' }}
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
with:
|
||||
labels: m1_test
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -226,8 +191,6 @@ jobs:
|
||||
SLACK_COLOR: ${{ needs.cargo-builds-m1.result }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
|
||||
6
.github/workflows/make_release.yml
vendored
6
.github/workflows/make_release.yml
vendored
@@ -46,8 +46,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe
|
||||
@@ -83,8 +82,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: Create NPM version tag
|
||||
if: ${{ inputs.npm_latest_tag }}
|
||||
run: |
|
||||
|
||||
3
.github/workflows/make_release_cuda.yml
vendored
3
.github/workflows/make_release_cuda.yml
vendored
@@ -61,8 +61,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
|
||||
@@ -30,7 +30,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-csprng
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: crate-tfhe-csprng
|
||||
path: target/package/*.crate
|
||||
|
||||
2
.github/workflows/make_release_tfhe_fft.yml
vendored
2
.github/workflows/make_release_tfhe_fft.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-fft
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
|
||||
2
.github/workflows/make_release_tfhe_ntt.yml
vendored
2
.github/workflows/make_release_tfhe_ntt.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-ntt
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
|
||||
@@ -30,7 +30,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-versionable-derive
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: crate-tfhe-versionable-derive
|
||||
path: target/package/*.crate
|
||||
@@ -61,8 +61,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
with:
|
||||
@@ -105,13 +103,13 @@ jobs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-versionable
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: crate-tfhe-versionable
|
||||
path: target/package/*.crate
|
||||
@@ -139,7 +137,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Download artifact
|
||||
|
||||
5
.github/workflows/make_release_zk_pok.yml
vendored
5
.github/workflows/make_release_zk_pok.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-zk-pok
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: crate-zk-pok
|
||||
path: target/package/*.crate
|
||||
@@ -61,8 +61,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
with:
|
||||
|
||||
3
.github/workflows/sync_on_push.yml
vendored
3
.github/workflows/sync_on_push.yml
vendored
@@ -16,8 +16,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: git-sync
|
||||
uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
|
||||
with:
|
||||
|
||||
30
Makefile
30
Makefile
@@ -20,7 +20,7 @@ BENCH_OP_FLAVOR?=DEFAULT
|
||||
BENCH_TYPE?=latency
|
||||
NODE_VERSION=22.6
|
||||
BACKWARD_COMPAT_DATA_URL=https://github.com/zama-ai/tfhe-backward-compat-data.git
|
||||
BACKWARD_COMPAT_DATA_BRANCH?=$(shell ./scripts/backward_compat_data_version.py)
|
||||
BACKWARD_COMPAT_DATA_BRANCH?=v0.5
|
||||
BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
|
||||
BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
|
||||
TFHE_SPEC:=tfhe
|
||||
@@ -282,14 +282,14 @@ check_typos: install_typos_checker
|
||||
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
|
||||
clippy_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
|
||||
check_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
@@ -394,10 +394,10 @@ clippy_trivium: install_rs_check_toolchain
|
||||
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
|
||||
clippy_all_targets: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats,experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
|
||||
@@ -1056,35 +1056,35 @@ bench_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_signed_integer # Run benchmarks for signed integer
|
||||
bench_signed_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
|
||||
bench_integer_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
|
||||
bench_integer_compression: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench glwe_packing_compression-integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_compression_gpu
|
||||
bench_integer_compression_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench glwe_packing_compression-integer-bench \
|
||||
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,gpu,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
|
||||
bench_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -1092,7 +1092,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
|
||||
bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -1100,7 +1100,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
|
||||
bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
@@ -1108,7 +1108,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
|
||||
bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
@@ -1116,14 +1116,14 @@ bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) -- ::unsigned
|
||||
|
||||
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
|
||||
bench_integer_zk: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench zk-pke-bench \
|
||||
--features=integer,internal-keycache,zk-pok,nightly-avx512 \
|
||||
--features=integer,internal-keycache,zk-pok,nightly-avx512,pbs-stats \
|
||||
-p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_shortint # Run benchmarks for shortint
|
||||
|
||||
@@ -27,8 +27,6 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
|
||||
std::abort(); \
|
||||
}
|
||||
|
||||
void cuda_set_device(uint32_t gpu_index);
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index);
|
||||
|
||||
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
|
||||
|
||||
@@ -112,8 +112,6 @@ template <typename Torus> struct int_decompression {
|
||||
|
||||
generate_device_accumulator_with_encoding<Torus>(
|
||||
streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
|
||||
decompression_rescale_lut->get_degree(0),
|
||||
decompression_rescale_lut->get_max_degree(0),
|
||||
encryption_params.glwe_dimension, encryption_params.polynomial_size,
|
||||
effective_compression_message_modulus,
|
||||
effective_compression_carry_modulus,
|
||||
|
||||
@@ -38,15 +38,6 @@ enum SIGNED_OPERATION { ADDITION = 1, SUBTRACTION = -1 };
|
||||
enum outputFlag { FLAG_NONE = 0, FLAG_OVERFLOW = 1, FLAG_CARRY = 2 };
|
||||
|
||||
extern "C" {
|
||||
|
||||
typedef struct {
|
||||
void *ptr;
|
||||
uint64_t *degrees;
|
||||
uint64_t *noise_levels;
|
||||
uint32_t num_radix_blocks;
|
||||
uint32_t lwe_dimension;
|
||||
} CudaRadixCiphertextFFI;
|
||||
|
||||
void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
@@ -54,7 +45,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory);
|
||||
void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
@@ -62,12 +53,13 @@ void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
void cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks);
|
||||
uint32_t num_many_lut, bool allocate_gpu_memory);
|
||||
void cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, void *output_radix_lwe,
|
||||
void const *input_radix_lwe,
|
||||
int8_t *mem_ptr, void *const *ksks,
|
||||
void *const *bsks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -81,15 +73,13 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_apply_bivariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe_1,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_radix_blocks,
|
||||
uint32_t shift);
|
||||
void *output_radix_lwe, void const *input_radix_lwe_1,
|
||||
void const *input_radix_lwe_2, int8_t *mem_ptr, void *const *ksks,
|
||||
void *const *bsks, uint32_t num_blocks, uint32_t shift);
|
||||
|
||||
void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -98,10 +88,9 @@ void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
|
||||
|
||||
void cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_luts,
|
||||
uint32_t lut_stride);
|
||||
void *output_radix_lwe, void const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks,
|
||||
uint32_t num_luts, uint32_t lut_stride);
|
||||
|
||||
void scratch_cuda_full_propagation_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -143,14 +132,15 @@ void cleanup_cuda_integer_mult(void *const *streams,
|
||||
|
||||
void cuda_negate_integer_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, uint32_t message_modulus,
|
||||
void *lwe_array_out, void const *lwe_array_in, uint32_t lwe_dimension,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus);
|
||||
|
||||
void cuda_scalar_addition_integer_radix_ciphertext_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, void const *scalar_input,
|
||||
uint32_t num_scalars, uint32_t message_modulus, uint32_t carry_modulus);
|
||||
void *lwe_array, void const *scalar_input, uint32_t lwe_dimension,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus);
|
||||
|
||||
void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -199,8 +189,8 @@ void scratch_cuda_integer_radix_shift_and_rotate_kb_64(
|
||||
|
||||
void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI const *lwe_shift,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks);
|
||||
void *lwe_array, void const *lwe_shift, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_integer_radix_shift_and_rotate(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -243,17 +233,15 @@ void scratch_cuda_integer_radix_bitop_kb_64(
|
||||
|
||||
void cuda_bitop_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t lwe_ciphertext_count);
|
||||
|
||||
void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
|
||||
void *lwe_array_out, void const *lwe_array_input, void const *clear_blocks,
|
||||
uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks);
|
||||
void *const *ksks, uint32_t lwe_ciphertext_count, BITOP_TYPE op);
|
||||
|
||||
void cleanup_cuda_integer_bitop(void *const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -270,11 +258,9 @@ void scratch_cuda_integer_radix_cmux_kb_64(
|
||||
|
||||
void cuda_cmux_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *lwe_array_out, void const *lwe_condition, void const *lwe_array_true,
|
||||
void const *lwe_array_false, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t lwe_ciphertext_count);
|
||||
|
||||
void cleanup_cuda_integer_radix_cmux(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -319,16 +305,15 @@ void scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
|
||||
void cuda_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI *carry_out,
|
||||
const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t requested_flag, uint32_t uses_carry);
|
||||
void *lwe_array, void *carry_out, const void *carry_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks,
|
||||
uint32_t requested_flag, uint32_t uses_carry);
|
||||
|
||||
void cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
|
||||
CudaRadixCiphertextFFI *carry_out, const CudaRadixCiphertextFFI *carry_in,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t requested_flag, uint32_t uses_carry);
|
||||
void *lhs_array, const void *rhs_array, void *carry_out,
|
||||
const void *carry_in, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks, uint32_t requested_flag, uint32_t uses_carry);
|
||||
|
||||
void cleanup_cuda_propagate_single_carry(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -426,13 +411,12 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI *generates_or_propagates, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks);
|
||||
void *output_radix_lwe, void *generates_or_propagates, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks, uint32_t shift);
|
||||
|
||||
void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -440,8 +424,9 @@ void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
|
||||
void cuda_integer_reverse_blocks_64_inplace(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array);
|
||||
uint32_t gpu_count, void *lwe_array,
|
||||
uint32_t num_blocks,
|
||||
uint32_t lwe_size);
|
||||
|
||||
void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -454,8 +439,8 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
|
||||
void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_integer_abs_inplace(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,8 +0,0 @@
|
||||
#ifndef CUDA_RADIX_CIPHERTEXT_H
|
||||
#define CUDA_RADIX_CIPHERTEXT_H
|
||||
|
||||
void release_radix_ciphertext(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *data);
|
||||
|
||||
#endif
|
||||
@@ -1,7 +1,6 @@
|
||||
#ifndef CUDA_LINALG_H_
|
||||
#define CUDA_LINALG_H_
|
||||
|
||||
#include "integer/integer.h"
|
||||
#include <stdint.h>
|
||||
|
||||
extern "C" {
|
||||
@@ -14,14 +13,17 @@ void cuda_negate_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input_1,
|
||||
CudaRadixCiphertextFFI const *input_2);
|
||||
void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input_1,
|
||||
CudaRadixCiphertextFFI const *input_2);
|
||||
void cuda_add_lwe_ciphertext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in_1, void const *lwe_array_in_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in_1, void const *lwe_array_in_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *plaintext_array_in,
|
||||
|
||||
@@ -5,12 +5,12 @@
|
||||
|
||||
template <typename Torus>
|
||||
bool supports_distributed_shared_memory_on_multibit_programmable_bootstrap(
|
||||
uint32_t polynomial_size, int max_shared_memory);
|
||||
uint32_t polynomial_size);
|
||||
|
||||
template <typename Torus>
|
||||
bool has_support_to_cuda_programmable_bootstrap_tbc_multi_bit(
|
||||
uint32_t num_samples, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, int max_shared_memory);
|
||||
uint32_t level_count);
|
||||
|
||||
#if CUDA_ARCH >= 900
|
||||
template <typename Torus>
|
||||
@@ -114,8 +114,6 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t lwe_chunk_size,
|
||||
PBS_VARIANT pbs_variant, bool allocate_gpu_memory) {
|
||||
cuda_set_device(gpu_index);
|
||||
|
||||
this->pbs_variant = pbs_variant;
|
||||
this->lwe_chunk_size = lwe_chunk_size;
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
|
||||
|
||||
@@ -61,7 +61,7 @@ get_buffer_size_partial_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {
|
||||
|
||||
template <typename Torus>
|
||||
bool supports_distributed_shared_memory_on_classic_programmable_bootstrap(
|
||||
uint32_t polynomial_size, int max_shared_memory);
|
||||
uint32_t polynomial_size);
|
||||
|
||||
template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer;
|
||||
|
||||
@@ -77,10 +77,10 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
|
||||
bool allocate_gpu_memory) {
|
||||
cuda_set_device(gpu_index);
|
||||
|
||||
this->pbs_variant = pbs_variant;
|
||||
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(0);
|
||||
|
||||
if (allocate_gpu_memory) {
|
||||
switch (pbs_variant) {
|
||||
@@ -157,7 +157,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
|
||||
bool supports_dsm =
|
||||
supports_distributed_shared_memory_on_classic_programmable_bootstrap<
|
||||
Torus>(polynomial_size, max_shared_memory);
|
||||
Torus>(polynomial_size);
|
||||
|
||||
uint64_t full_sm =
|
||||
get_buffer_size_full_sm_programmable_bootstrap_tbc<Torus>(
|
||||
@@ -218,7 +218,8 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
template <typename Torus>
|
||||
uint64_t get_buffer_size_programmable_bootstrap_cg(
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) {
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
int max_shared_memory = cuda_get_max_shared_memory(0);
|
||||
uint64_t full_sm =
|
||||
get_buffer_size_full_sm_programmable_bootstrap_cg<Torus>(polynomial_size);
|
||||
uint64_t partial_sm =
|
||||
@@ -244,8 +245,7 @@ template <typename Torus>
|
||||
bool has_support_to_cuda_programmable_bootstrap_cg(uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t level_count,
|
||||
uint32_t num_samples,
|
||||
int max_shared_memory);
|
||||
uint32_t num_samples);
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
|
||||
@@ -8,7 +8,7 @@ extern "C" {
|
||||
|
||||
bool has_support_to_cuda_programmable_bootstrap_cg_multi_bit(
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t num_samples, int max_shared_memory);
|
||||
uint32_t num_samples);
|
||||
|
||||
void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64(
|
||||
void *stream, uint32_t gpu_index, void *dest, void const *src,
|
||||
|
||||
@@ -11,7 +11,7 @@ void cuda_convert_lwe_ciphertext_vector_to_gpu(cudaStream_t stream,
|
||||
uint32_t gpu_index, T *dest,
|
||||
T *src, uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
|
||||
cuda_memcpy_async_to_gpu(dest, src, size, stream, gpu_index);
|
||||
}
|
||||
@@ -21,7 +21,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu(cudaStream_t stream,
|
||||
uint32_t gpu_index, T *dest,
|
||||
T *src, uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
|
||||
cuda_memcpy_async_to_cpu(dest, src, size, stream, gpu_index);
|
||||
}
|
||||
@@ -55,7 +55,7 @@ __host__ void host_sample_extract(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus const *glwe_array_in,
|
||||
uint32_t const *nth_array, uint32_t num_nths,
|
||||
uint32_t glwe_dimension) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
dim3 grid(num_nths);
|
||||
dim3 thds(params::degree / params::opt);
|
||||
|
||||
@@ -261,7 +261,7 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
|
||||
|
||||
// Optimization of packing keyswitch when packing many LWEs
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
|
||||
|
||||
@@ -57,7 +57,7 @@ void batch_fft_ggsw_vector(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
if (gpu_count != 1)
|
||||
PANIC("GPU error (batch_fft_ggsw_vector): multi-GPU execution is not "
|
||||
"supported yet.")
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
|
||||
int shared_memory_size = sizeof(double) * polynomial_size;
|
||||
|
||||
|
||||
@@ -105,7 +105,7 @@ __host__ void host_keyswitch_lwe_ciphertext_vector(
|
||||
uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
constexpr int num_threads_y = 32;
|
||||
int num_blocks, num_threads_x;
|
||||
@@ -160,7 +160,7 @@ __host__ void scratch_packing_keyswitch_lwe_list_to_glwe(
|
||||
cudaStream_t stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t num_lwes, bool allocate_gpu_memory) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
|
||||
|
||||
|
||||
@@ -110,7 +110,7 @@ template <typename Torus>
|
||||
__host__ void host_modulus_switch_inplace(cudaStream_t stream,
|
||||
uint32_t gpu_index, Torus *array,
|
||||
int size, uint32_t log_modulus) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
int num_threads = 0, num_blocks = 0;
|
||||
getNumBlocksAndThreads(size, 1024, num_blocks, num_threads);
|
||||
|
||||
@@ -2,12 +2,8 @@
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
void cuda_set_device(uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
}
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
cudaEvent_t event;
|
||||
check_cuda_error(cudaEventCreate(&event));
|
||||
return event;
|
||||
@@ -15,24 +11,24 @@ cudaEvent_t cuda_create_event(uint32_t gpu_index) {
|
||||
|
||||
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaEventRecord(event, stream));
|
||||
}
|
||||
|
||||
void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
|
||||
uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaStreamWaitEvent(stream, event, 0));
|
||||
}
|
||||
|
||||
void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaEventDestroy(event));
|
||||
}
|
||||
|
||||
/// Unsafe function to create a CUDA stream, must check first that GPU exists
|
||||
cudaStream_t cuda_create_stream(uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
cudaStream_t stream;
|
||||
check_cuda_error(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||
return stream;
|
||||
@@ -40,22 +36,15 @@ cudaStream_t cuda_create_stream(uint32_t gpu_index) {
|
||||
|
||||
/// Unsafe function to destroy CUDA stream, must check first the GPU exists
|
||||
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaStreamDestroy(stream));
|
||||
}
|
||||
|
||||
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaStreamSynchronize(stream));
|
||||
}
|
||||
|
||||
void synchronize_streams(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count) {
|
||||
for (uint i = 0; i < gpu_count; i++) {
|
||||
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Determine if a CUDA device is available at runtime
|
||||
uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
|
||||
|
||||
@@ -63,7 +52,7 @@ uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
|
||||
/// or if there's not enough memory. A safe wrapper around it must call
|
||||
/// cuda_check_valid_malloc() first
|
||||
void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
void *ptr;
|
||||
check_cuda_error(cudaMalloc((void **)&ptr, size));
|
||||
|
||||
@@ -74,7 +63,7 @@ void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
|
||||
/// asynchronously.
|
||||
void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
void *ptr;
|
||||
|
||||
#ifndef CUDART_VERSION
|
||||
@@ -97,7 +86,7 @@ void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
|
||||
|
||||
/// Check that allocation is valid
|
||||
void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
size_t total_mem, free_mem;
|
||||
check_cuda_error(cudaMemGetInfo(&free_mem, &total_mem));
|
||||
if (size > free_mem) {
|
||||
@@ -145,7 +134,7 @@ void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
|
||||
PANIC("Cuda error: invalid device pointer in async copy to GPU.")
|
||||
}
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
@@ -165,7 +154,7 @@ void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
if (attr_src.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
|
||||
}
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
if (attr_src.device == attr_dest.device) {
|
||||
check_cuda_error(
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice, stream));
|
||||
@@ -190,7 +179,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,
|
||||
if (attr_src.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
|
||||
}
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
if (attr_src.device == attr_dest.device) {
|
||||
check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToDevice));
|
||||
} else {
|
||||
@@ -201,7 +190,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,
|
||||
|
||||
/// Synchronizes device
|
||||
void cuda_synchronize_device(uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
@@ -214,7 +203,7 @@ void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid dest device pointer in cuda memset.")
|
||||
}
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaMemsetAsync(dest, val, size, stream));
|
||||
}
|
||||
|
||||
@@ -234,7 +223,7 @@ void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
if (attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid dest device pointer in cuda set value.")
|
||||
}
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
int block_size = 256;
|
||||
int num_blocks = (n + block_size - 1) / block_size;
|
||||
|
||||
@@ -264,7 +253,7 @@ void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
|
||||
}
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream));
|
||||
}
|
||||
@@ -278,14 +267,14 @@ int cuda_get_number_of_gpus() {
|
||||
|
||||
/// Drop a cuda array
|
||||
void cuda_drop(void *ptr, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaFree(ptr));
|
||||
}
|
||||
|
||||
/// Drop a cuda array asynchronously, if supported on the device
|
||||
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
#ifndef CUDART_VERSION
|
||||
#error CUDART_VERSION Undefined!
|
||||
#elif (CUDART_VERSION >= 11020)
|
||||
|
||||
@@ -22,14 +22,15 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
|
||||
void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks) {
|
||||
|
||||
auto mem = (int_abs_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_abs_kb<uint64_t>((cudaStream_t *)(streams), gpu_indexes,
|
||||
gpu_count, ct, bsks, (uint64_t **)(ksks), mem,
|
||||
is_signed);
|
||||
gpu_count, static_cast<uint64_t *>(ct), bsks,
|
||||
(uint64_t **)(ksks), mem, is_signed,
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_abs_inplace(void *const *streams,
|
||||
|
||||
@@ -2,12 +2,15 @@
|
||||
#define TFHE_RS_ABS_CUH
|
||||
|
||||
#include "crypto/keyswitch.cuh"
|
||||
#include "device.h"
|
||||
#include "integer/bitwise_ops.cuh"
|
||||
#include "integer/comparison.cuh"
|
||||
#include "integer/integer.cuh"
|
||||
#include "integer/integer_utilities.h"
|
||||
#include "integer/negation.cuh"
|
||||
#include "integer/scalar_shifts.cuh"
|
||||
#include "radix_ciphertext.cuh"
|
||||
#include "linear_algebra.h"
|
||||
#include "pbs/programmable_bootstrap.h"
|
||||
#include "utils/helper.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <fstream>
|
||||
@@ -29,15 +32,16 @@ __host__ void scratch_cuda_integer_abs_kb(
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_abs_kb_async(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *ct, void *const *bsks, uint64_t *const *ksks,
|
||||
int_abs_buffer<uint64_t> *mem_ptr, bool is_signed, uint32_t num_blocks) {
|
||||
__host__ void
|
||||
host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *ct, void *const *bsks,
|
||||
uint64_t *const *ksks, int_abs_buffer<uint64_t> *mem_ptr,
|
||||
bool is_signed, uint32_t num_blocks) {
|
||||
if (!is_signed)
|
||||
return;
|
||||
|
||||
auto radix_params = mem_ptr->params;
|
||||
auto mask = (Torus *)(mem_ptr->mask->ptr);
|
||||
auto mask = mem_ptr->mask;
|
||||
|
||||
auto big_lwe_dimension = radix_params.big_lwe_dimension;
|
||||
auto big_lwe_size = big_lwe_dimension + 1;
|
||||
@@ -48,56 +52,20 @@ __host__ void legacy_host_integer_abs_kb_async(
|
||||
cuda_memcpy_async_gpu_to_gpu(mask, ct, num_blocks * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
|
||||
host_integer_radix_arithmetic_scalar_shift_kb_inplace(
|
||||
streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
|
||||
mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks, num_blocks);
|
||||
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
|
||||
radix_params.big_lwe_dimension, num_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
|
||||
radix_params.big_lwe_dimension, num_blocks);
|
||||
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
legacy_host_propagate_single_carry<Torus>(
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, ct, nullptr, nullptr, mem_ptr->scp_mem,
|
||||
bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
|
||||
// legacy bitop
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, ct, mask, ct, bsks, ksks, num_blocks,
|
||||
mem_ptr->bitxor_mem->lut, mem_ptr->bitxor_mem->params.message_modulus);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *ct,
|
||||
void *const *bsks, uint64_t *const *ksks,
|
||||
int_abs_buffer<uint64_t> *mem_ptr, bool is_signed) {
|
||||
if (!is_signed)
|
||||
return;
|
||||
|
||||
auto mask = mem_ptr->mask;
|
||||
|
||||
uint32_t num_bits_in_ciphertext =
|
||||
(31 - __builtin_clz(mem_ptr->params.message_modulus)) *
|
||||
ct->num_radix_blocks;
|
||||
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], mask, ct);
|
||||
|
||||
host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
|
||||
streams, gpu_indexes, gpu_count, (Torus *)(mask->ptr),
|
||||
num_bits_in_ciphertext - 1, mem_ptr->arithmetic_scalar_shift_mem, bsks,
|
||||
ksks, ct->num_radix_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
|
||||
ct->num_radix_blocks);
|
||||
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<Torus>(streams, gpu_indexes, gpu_count, ct,
|
||||
nullptr, nullptr, mem_ptr->scp_mem, bsks,
|
||||
ksks, requested_flag, uses_carry);
|
||||
|
||||
host_integer_radix_bitop_kb<Torus>(streams, gpu_indexes, gpu_count, ct, mask,
|
||||
ct, mem_ptr->bitxor_mem, bsks, ksks);
|
||||
host_integer_radix_bitop_kb(streams, gpu_indexes, gpu_count, ct, mask, ct,
|
||||
mem_ptr->bitxor_mem, bsks, ksks, num_blocks);
|
||||
}
|
||||
|
||||
#endif // TFHE_RS_ABS_CUH
|
||||
|
||||
@@ -22,15 +22,17 @@ void scratch_cuda_integer_radix_bitop_kb_64(
|
||||
|
||||
void cuda_bitop_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t lwe_ciphertext_count) {
|
||||
|
||||
host_integer_radix_bitop_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_1, lwe_array_2, (int_bitop_buffer<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks));
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_1),
|
||||
static_cast<const uint64_t *>(lwe_array_2),
|
||||
(int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_bitop(void *const *streams,
|
||||
@@ -41,50 +43,3 @@ void cleanup_cuda_integer_bitop(void *const *streams,
|
||||
(int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void update_degrees_after_bitand(uint64_t *output_degrees,
|
||||
uint64_t *lwe_array_1_degrees,
|
||||
uint64_t *lwe_array_2_degrees,
|
||||
uint32_t num_radix_blocks) {
|
||||
for (uint i = 0; i < num_radix_blocks; i++) {
|
||||
output_degrees[i] =
|
||||
std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void update_degrees_after_bitor(uint64_t *output_degrees,
|
||||
uint64_t *lwe_array_1_degrees,
|
||||
uint64_t *lwe_array_2_degrees,
|
||||
uint32_t num_radix_blocks) {
|
||||
for (uint i = 0; i < num_radix_blocks; i++) {
|
||||
auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
|
||||
auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
|
||||
auto result = max;
|
||||
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max | j > result) {
|
||||
result = max | j;
|
||||
}
|
||||
}
|
||||
output_degrees[i] = result;
|
||||
}
|
||||
}
|
||||
|
||||
void update_degrees_after_bitxor(uint64_t *output_degrees,
|
||||
uint64_t *lwe_array_1_degrees,
|
||||
uint64_t *lwe_array_2_degrees,
|
||||
uint32_t num_radix_blocks) {
|
||||
for (uint i = 0; i < num_radix_blocks; i++) {
|
||||
auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
|
||||
auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
|
||||
auto result = max;
|
||||
|
||||
// Try every possibility to find the worst case
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max ^ j > result) {
|
||||
result = max ^ j;
|
||||
}
|
||||
}
|
||||
output_degrees[i] = result;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,34 +14,15 @@
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_bitop_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
|
||||
void *const *bsks, Torus *const *ksks) {
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_1,
|
||||
Torus const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
|
||||
void *const *bsks, Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
auto lut = mem_ptr->lut;
|
||||
uint64_t degrees[lwe_array_1->num_radix_blocks];
|
||||
if (mem_ptr->op == BITOP_TYPE::BITAND) {
|
||||
update_degrees_after_bitand(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
} else if (mem_ptr->op == BITOP_TYPE::BITOR) {
|
||||
update_degrees_after_bitor(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
} else if (mem_ptr->op == BITXOR) {
|
||||
update_degrees_after_bitxor(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
}
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_1, lwe_array_2,
|
||||
bsks, ksks, lut, lwe_array_out->num_radix_blocks,
|
||||
lut->params.message_modulus);
|
||||
|
||||
memcpy(lwe_array_out->degrees, degrees,
|
||||
lwe_array_out->num_radix_blocks * sizeof(uint64_t));
|
||||
bsks, ksks, num_radix_blocks, lut, lut->params.message_modulus);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -25,16 +25,19 @@ void scratch_cuda_integer_radix_cmux_kb_64(
|
||||
|
||||
void cuda_cmux_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
void *lwe_array_out, void const *lwe_condition, void const *lwe_array_true,
|
||||
void const *lwe_array_false, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t lwe_ciphertext_count) {
|
||||
|
||||
host_integer_radix_cmux_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_condition, lwe_array_true, lwe_array_false,
|
||||
(int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks));
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_condition),
|
||||
static_cast<const uint64_t *>(lwe_array_true),
|
||||
static_cast<const uint64_t *>(lwe_array_false),
|
||||
(int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
|
||||
lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_radix_cmux(void *const *streams,
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
#define CUDA_INTEGER_CMUX_CUH
|
||||
|
||||
#include "integer.cuh"
|
||||
#include "radix_ciphertext.cuh"
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void zero_out_if(cudaStream_t const *streams,
|
||||
@@ -12,7 +11,7 @@ __host__ void zero_out_if(cudaStream_t const *streams,
|
||||
int_zero_out_if_buffer<Torus> *mem_ptr,
|
||||
int_radix_lut<Torus> *predicate, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
auto params = mem_ptr->params;
|
||||
|
||||
// We can't use integer_radix_apply_bivariate_lookup_table_kb since the
|
||||
@@ -24,13 +23,13 @@ __host__ void zero_out_if(cudaStream_t const *streams,
|
||||
predicate->lwe_indexes_in, params.big_lwe_dimension,
|
||||
params.message_modulus, num_radix_blocks);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, tmp_lwe_array_input, bsks,
|
||||
ksks, num_radix_blocks, predicate);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_radix_cmux_kb(
|
||||
__host__ void host_integer_radix_cmux_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_condition,
|
||||
Torus const *lwe_array_true, Torus const *lwe_array_false,
|
||||
@@ -40,88 +39,34 @@ __host__ void legacy_host_integer_radix_cmux_kb(
|
||||
auto params = mem_ptr->params;
|
||||
Torus lwe_size = params.big_lwe_dimension + 1;
|
||||
Torus radix_lwe_size = lwe_size * num_radix_blocks;
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in->ptr, lwe_array_true,
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in, lwe_array_true,
|
||||
radix_lwe_size * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
(Torus *)(mem_ptr->buffer_in->ptr) + radix_lwe_size, lwe_array_false,
|
||||
radix_lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in + radix_lwe_size,
|
||||
lwe_array_false, radix_lwe_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
(Torus *)(mem_ptr->condition_array->ptr) + i * lwe_size, lwe_condition,
|
||||
lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
}
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, (Torus *)(mem_ptr->buffer_out->ptr),
|
||||
(Torus *)(mem_ptr->buffer_in->ptr),
|
||||
(Torus *)(mem_ptr->condition_array->ptr), bsks, ksks,
|
||||
2 * num_radix_blocks, mem_ptr->predicate_lut, params.message_modulus);
|
||||
|
||||
// If the condition was true, true_ct will have kept its value and false_ct
|
||||
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
|
||||
// have kept its value
|
||||
auto mem_true = (Torus *)(mem_ptr->buffer_out->ptr);
|
||||
auto ptr = (Torus *)mem_ptr->buffer_out->ptr;
|
||||
auto mem_false = &ptr[radix_lwe_size];
|
||||
auto added_cts = mem_true;
|
||||
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
|
||||
mem_false, params.big_lwe_dimension,
|
||||
num_radix_blocks);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
|
||||
num_radix_blocks, mem_ptr->message_extract_lut);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_cmux_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false,
|
||||
int_cmux_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks) {
|
||||
|
||||
if (lwe_array_out->num_radix_blocks != lwe_array_true->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be the same")
|
||||
if (lwe_array_out->num_radix_blocks != lwe_array_false->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be the same")
|
||||
|
||||
auto num_radix_blocks = lwe_array_out->num_radix_blocks;
|
||||
auto params = mem_ptr->params;
|
||||
Torus lwe_size = params.big_lwe_dimension + 1;
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], mem_ptr->buffer_in, 0, num_radix_blocks,
|
||||
lwe_array_true, 0, num_radix_blocks);
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], mem_ptr->buffer_in, num_radix_blocks,
|
||||
2 * num_radix_blocks, lwe_array_false, 0, num_radix_blocks);
|
||||
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
mem_ptr->condition_array, i, i + 1,
|
||||
lwe_condition, 0, 1);
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->condition_array + i * lwe_size,
|
||||
lwe_condition, lwe_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->buffer_out, mem_ptr->buffer_in,
|
||||
mem_ptr->condition_array, bsks, ksks, mem_ptr->predicate_lut,
|
||||
2 * num_radix_blocks, params.message_modulus);
|
||||
mem_ptr->condition_array, bsks, ksks, 2 * num_radix_blocks,
|
||||
mem_ptr->predicate_lut, params.message_modulus);
|
||||
|
||||
// If the condition was true, true_ct will have kept its value and false_ct
|
||||
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
|
||||
// have kept its value
|
||||
CudaRadixCiphertextFFI mem_true;
|
||||
CudaRadixCiphertextFFI mem_false;
|
||||
as_radix_ciphertext_slice<Torus>(&mem_true, mem_ptr->buffer_out, 0,
|
||||
num_radix_blocks);
|
||||
as_radix_ciphertext_slice<Torus>(&mem_false, mem_ptr->buffer_out,
|
||||
num_radix_blocks, 2 * num_radix_blocks);
|
||||
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], &mem_true, &mem_true,
|
||||
&mem_false, num_radix_blocks);
|
||||
auto mem_true = mem_ptr->buffer_out;
|
||||
auto mem_false = &mem_ptr->buffer_out[radix_lwe_size];
|
||||
auto added_cts = mem_true;
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
|
||||
mem_false, params.big_lwe_dimension, num_radix_blocks);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, &mem_true, bsks, ksks,
|
||||
mem_ptr->message_extract_lut, num_radix_blocks);
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
|
||||
num_radix_blocks, mem_ptr->message_extract_lut);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -38,7 +38,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
|
||||
uint32_t lwe_dimension,
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = (lwe_dimension + 1);
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
@@ -122,9 +122,7 @@ __host__ void are_all_comparisons_block_true(
|
||||
};
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], is_max_value_lut->get_lut(0, 1),
|
||||
is_max_value_lut->get_degree(1),
|
||||
is_max_value_lut->get_max_degree(1), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
is_equal_to_num_blocks_lut_f);
|
||||
|
||||
Torus *h_lut_indexes = (Torus *)malloc(num_chunks * sizeof(Torus));
|
||||
@@ -148,12 +146,12 @@ __host__ void are_all_comparisons_block_true(
|
||||
// Applies the LUT
|
||||
if (remaining_blocks == 1) {
|
||||
// In the last iteration we copy the output to the final address
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
|
||||
ksks, 1, lut);
|
||||
return;
|
||||
} else {
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, tmp_out, accumulator, bsks, ksks,
|
||||
num_chunks, lut);
|
||||
}
|
||||
@@ -219,12 +217,12 @@ __host__ void is_at_least_one_comparisons_block_true(
|
||||
// Applies the LUT
|
||||
if (remaining_blocks == 1) {
|
||||
// In the last iteration we copy the output to the final address
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
|
||||
ksks, 1, lut);
|
||||
return;
|
||||
} else {
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->tmp_lwe_array_out,
|
||||
accumulator, bsks, ksks, num_chunks, lut);
|
||||
}
|
||||
@@ -305,7 +303,7 @@ __host__ void host_compare_with_zero_equality(
|
||||
}
|
||||
}
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, sum, sum, bsks, ksks, num_sum_blocks,
|
||||
zero_comparison);
|
||||
are_all_comparisons_block_true<Torus>(streams, gpu_indexes, gpu_count,
|
||||
@@ -324,7 +322,7 @@ __host__ void host_integer_radix_equality_check_kb(
|
||||
|
||||
// Applies the LUT for the comparison operation
|
||||
auto comparisons = mem_ptr->tmp_block_comparisons;
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, comparisons, lwe_array_1, lwe_array_2,
|
||||
bsks, ksks, num_radix_blocks, eq_buffer->operator_lut,
|
||||
eq_buffer->operator_lut->params.message_modulus);
|
||||
@@ -371,7 +369,7 @@ __host__ void compare_radix_blocks_kb(
|
||||
|
||||
// Apply LUT to compare to 0
|
||||
auto is_non_zero_lut = mem_ptr->eq_buffer->is_non_zero_lut;
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_out, bsks, ksks,
|
||||
num_radix_blocks, is_non_zero_lut);
|
||||
|
||||
@@ -422,7 +420,7 @@ __host__ void tree_sign_reduction(
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], y, x, big_lwe_dimension,
|
||||
partial_block_count, 4);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, x, y, bsks, ksks,
|
||||
partial_block_count >> 1, inner_tree_leaf);
|
||||
|
||||
@@ -462,13 +460,12 @@ __host__ void tree_sign_reduction(
|
||||
f = sign_handler_f;
|
||||
}
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], last_lut->get_lut(0, 0),
|
||||
last_lut->get_degree(0), last_lut->get_max_degree(0), glwe_dimension,
|
||||
streams[0], gpu_indexes[0], last_lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus, f);
|
||||
last_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
// Last leaf
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, y, bsks, ksks, 1,
|
||||
last_lut);
|
||||
}
|
||||
@@ -514,7 +511,7 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
|
||||
// Clean noise
|
||||
auto identity_lut = mem_ptr->identity_lut;
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
|
||||
2 * packed_num_radix_blocks, identity_lut);
|
||||
|
||||
@@ -552,11 +549,11 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
packed_left + packed_num_radix_blocks * big_lwe_size;
|
||||
Torus *last_right_block_before_sign_block =
|
||||
packed_right + packed_num_radix_blocks * big_lwe_size;
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
|
||||
lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
|
||||
identity_lut);
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, last_right_block_before_sign_block,
|
||||
lwe_array_right + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks,
|
||||
1, identity_lut);
|
||||
@@ -566,7 +563,7 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
last_left_block_before_sign_block, last_right_block_before_sign_block,
|
||||
mem_ptr, bsks, ksks, 1);
|
||||
// Compare the sign block separately
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count,
|
||||
comparisons + (packed_num_radix_blocks + 1) * big_lwe_size,
|
||||
lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
|
||||
@@ -579,7 +576,7 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
streams, gpu_indexes, gpu_count, comparisons, lwe_array_left,
|
||||
lwe_array_right, mem_ptr, bsks, ksks, num_radix_blocks - 1);
|
||||
// Compare the sign block separately
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count,
|
||||
comparisons + (num_radix_blocks - 1) * big_lwe_size,
|
||||
lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
|
||||
@@ -623,7 +620,7 @@ __host__ void host_integer_radix_maxmin_kb(
|
||||
ksks, total_num_radix_blocks);
|
||||
|
||||
// Selector
|
||||
legacy_host_integer_radix_cmux_kb<Torus>(
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right,
|
||||
mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);
|
||||
|
||||
@@ -50,7 +50,7 @@ __host__ void host_pack(cudaStream_t stream, uint32_t gpu_index,
|
||||
if (array_in == array_out)
|
||||
PANIC("Cuda error: Input and output must be different");
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
auto compression_params = mem_ptr->compression_params;
|
||||
|
||||
auto log_modulus = mem_ptr->storage_log_modulus;
|
||||
@@ -185,7 +185,7 @@ __host__ void host_extract(cudaStream_t stream, uint32_t gpu_index,
|
||||
if (array_in == glwe_array_out)
|
||||
PANIC("Cuda error: Input and output must be different");
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
auto compression_params = mem_ptr->compression_params;
|
||||
|
||||
|
||||
@@ -285,7 +285,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
// Shift the mask so that we will only keep bits we should
|
||||
uint32_t shifted_mask = full_message_mask >> shift_amount;
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, interesting_divisor.last_block(),
|
||||
interesting_divisor.last_block(), bsks, ksks, 1,
|
||||
mem_ptr->masking_luts_1[shifted_mask]);
|
||||
@@ -314,7 +314,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
// the estimated degree of the output is < msg_modulus
|
||||
shifted_mask = shifted_mask & full_message_mask;
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, divisor_ms_blocks.first_block(),
|
||||
divisor_ms_blocks.first_block(), bsks, ksks, 1,
|
||||
mem_ptr->masking_luts_2[shifted_mask]);
|
||||
@@ -347,7 +347,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
interesting_remainder1.len - 1, streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
legacy_host_radix_blocks_rotate_left<Torus>(
|
||||
host_radix_blocks_rotate_left<Torus>(
|
||||
streams, gpu_indexes, gpu_count, interesting_remainder1.data,
|
||||
tmp_radix.data, 1, interesting_remainder1.len, big_lwe_size);
|
||||
|
||||
@@ -402,7 +402,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
// but in that position, interesting_remainder2 always has a 0
|
||||
auto &merged_interesting_remainder = interesting_remainder1;
|
||||
|
||||
legacy_host_addition<Torus>(
|
||||
host_addition<Torus>(
|
||||
streams[0], gpu_indexes[0], merged_interesting_remainder.data,
|
||||
merged_interesting_remainder.data, interesting_remainder2.data,
|
||||
radix_params.big_lwe_dimension, merged_interesting_remainder.len);
|
||||
@@ -481,7 +481,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
auto create_clean_version_of_merged_remainder =
|
||||
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count,
|
||||
cleaned_merged_interesting_remainder.data,
|
||||
cleaned_merged_interesting_remainder.data, bsks, ksks,
|
||||
@@ -507,10 +507,10 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_3[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], overflow_sum.data,
|
||||
subtraction_overflowed.data,
|
||||
at_least_one_upper_block_is_non_zero.data,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], overflow_sum.data,
|
||||
subtraction_overflowed.data,
|
||||
at_least_one_upper_block_is_non_zero.data,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
|
||||
int factor = (i) ? 3 : 2;
|
||||
int factor_lut_id = factor - 2;
|
||||
@@ -521,7 +521,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
auto conditionally_zero_out_merged_interesting_remainder =
|
||||
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count,
|
||||
cleaned_merged_interesting_remainder.data,
|
||||
cleaned_merged_interesting_remainder.data,
|
||||
@@ -534,7 +534,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
auto conditionally_zero_out_merged_new_remainder =
|
||||
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, new_remainder.data,
|
||||
new_remainder.data, overflow_sum_radix.data, bsks, ksks,
|
||||
new_remainder.len,
|
||||
@@ -544,7 +544,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
auto set_quotient_bit = [&](cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, did_not_overflow.data,
|
||||
subtraction_overflowed.data,
|
||||
at_least_one_upper_block_is_non_zero.data, bsks, ksks, 1,
|
||||
@@ -552,7 +552,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
mem_ptr->merge_overflow_flags_luts[pos_in_block]
|
||||
->params.message_modulus);
|
||||
|
||||
legacy_host_addition<Torus>(
|
||||
host_addition<Torus>(
|
||||
streams[0], gpu_indexes[0], "ient[block_of_bit * big_lwe_size],
|
||||
"ient[block_of_bit * big_lwe_size], did_not_overflow.data,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
@@ -588,17 +588,17 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
|
||||
// Clean the quotient and remainder
|
||||
// as even though they have no carries, they are not at nominal noise level
|
||||
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], remainder,
|
||||
remainder1.data, remainder2.data,
|
||||
radix_params.big_lwe_dimension, remainder1.len);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], remainder, remainder1.data,
|
||||
remainder2.data, radix_params.big_lwe_dimension,
|
||||
remainder1.len);
|
||||
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem_ptr->sub_streams_1, gpu_indexes, gpu_count, remainder, remainder,
|
||||
bsks, ksks, num_blocks, mem_ptr->message_extract_lut_1);
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem_ptr->sub_streams_2, gpu_indexes, gpu_count, quotient, quotient, bsks,
|
||||
ksks, num_blocks, mem_ptr->message_extract_lut_2);
|
||||
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
|
||||
@@ -636,14 +636,12 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
legacy_host_integer_abs_kb_async<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
|
||||
positive_numerator.data, bsks, ksks, int_mem_ptr->abs_mem_1, true,
|
||||
num_blocks);
|
||||
legacy_host_integer_abs_kb_async<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
positive_divisor.data, bsks, ksks, int_mem_ptr->abs_mem_2, true,
|
||||
num_blocks);
|
||||
host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_1, gpu_indexes,
|
||||
gpu_count, positive_numerator.data, bsks, ksks,
|
||||
int_mem_ptr->abs_mem_1, true, num_blocks);
|
||||
host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_2, gpu_indexes,
|
||||
gpu_count, positive_divisor.data, bsks, ksks,
|
||||
int_mem_ptr->abs_mem_2, true, num_blocks);
|
||||
for (uint j = 0; j < int_mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
@@ -654,7 +652,7 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
positive_numerator.data, positive_divisor.data, bsks, ksks,
|
||||
int_mem_ptr->unsigned_mem, num_blocks);
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->sign_bits_are_different,
|
||||
&numerator[big_lwe_size * (num_blocks - 1)],
|
||||
@@ -667,36 +665,36 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
legacy_host_integer_radix_negation(
|
||||
host_integer_radix_negation(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_quotient, quotient, radix_params.big_lwe_dimension,
|
||||
num_blocks, radix_params.message_modulus, radix_params.carry_modulus);
|
||||
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
legacy_host_propagate_single_carry<Torus>(
|
||||
host_propagate_single_carry<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_quotient, nullptr, nullptr, int_mem_ptr->scp_mem_1,
|
||||
bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
|
||||
legacy_host_integer_radix_negation(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_remainder, remainder,
|
||||
radix_params.big_lwe_dimension, num_blocks,
|
||||
radix_params.message_modulus, radix_params.carry_modulus);
|
||||
host_integer_radix_negation(int_mem_ptr->sub_streams_2, gpu_indexes,
|
||||
gpu_count, int_mem_ptr->negated_remainder,
|
||||
remainder, radix_params.big_lwe_dimension,
|
||||
num_blocks, radix_params.message_modulus,
|
||||
radix_params.carry_modulus);
|
||||
|
||||
legacy_host_propagate_single_carry<Torus>(
|
||||
host_propagate_single_carry<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_remainder, nullptr, nullptr,
|
||||
int_mem_ptr->scp_mem_2, bsks, ksks, num_blocks, requested_flag,
|
||||
uses_carry);
|
||||
|
||||
legacy_host_integer_radix_cmux_kb<Torus>(
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count, quotient,
|
||||
int_mem_ptr->sign_bits_are_different, int_mem_ptr->negated_quotient,
|
||||
quotient, int_mem_ptr->cmux_quotient_mem, bsks, ksks, num_blocks);
|
||||
|
||||
legacy_host_integer_radix_cmux_kb<Torus>(
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count, remainder,
|
||||
&numerator[big_lwe_size * (num_blocks - 1)],
|
||||
int_mem_ptr->negated_remainder, remainder,
|
||||
|
||||
@@ -106,27 +106,32 @@ void scratch_cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
|
||||
void cuda_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI *carry_out,
|
||||
const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t requested_flag, uint32_t uses_carry) {
|
||||
void *lwe_array, void *carry_out, const void *carry_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks,
|
||||
uint32_t requested_flag, uint32_t uses_carry) {
|
||||
|
||||
host_propagate_single_carry<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array, carry_out,
|
||||
carry_in, (int_sc_prop_memory<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), requested_flag, uses_carry);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array), static_cast<uint64_t *>(carry_out),
|
||||
static_cast<const uint64_t *>(carry_in),
|
||||
(int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
|
||||
void cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
|
||||
CudaRadixCiphertextFFI *carry_out, const CudaRadixCiphertextFFI *carry_in,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t requested_flag, uint32_t uses_carry) {
|
||||
void *lhs_array, const void *rhs_array, void *carry_out,
|
||||
const void *carry_in, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks, uint32_t requested_flag, uint32_t uses_carry) {
|
||||
|
||||
host_add_and_propagate_single_carry<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lhs_array, rhs_array,
|
||||
carry_out, carry_in, (int_sc_prop_memory<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), requested_flag, uses_carry);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lhs_array),
|
||||
static_cast<const uint64_t *>(rhs_array),
|
||||
static_cast<uint64_t *>(carry_out),
|
||||
static_cast<const uint64_t *>(carry_in),
|
||||
(int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
|
||||
void cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
@@ -179,7 +184,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
@@ -190,7 +195,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
lut_degree, allocate_gpu_memory);
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
@@ -200,7 +205,7 @@ void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory) {
|
||||
uint32_t num_many_lut, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
@@ -211,19 +216,22 @@ void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
num_many_lut, lut_degree, allocate_gpu_memory);
|
||||
num_many_lut, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks) {
|
||||
void cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, void *output_radix_lwe,
|
||||
void const *input_radix_lwe,
|
||||
int8_t *mem_ptr, void *const *ksks,
|
||||
void *const *bsks, uint32_t num_blocks) {
|
||||
|
||||
host_apply_univariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, output_radix_lwe,
|
||||
input_radix_lwe, (int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks),
|
||||
bsks);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<const uint64_t *>(input_radix_lwe),
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks,
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
@@ -236,15 +244,16 @@ void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
|
||||
void cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
void *output_radix_lwe, void const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
host_apply_many_univariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, output_radix_lwe,
|
||||
input_radix_lwe, (int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks),
|
||||
bsks, num_many_lut, lut_stride);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<const uint64_t *>(input_radix_lwe),
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks, num_blocks,
|
||||
num_many_lut, lut_stride);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
@@ -254,7 +263,7 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
@@ -265,21 +274,22 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
lut_degree, allocate_gpu_memory);
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_apply_bivariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe_1,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_radix_blocks,
|
||||
uint32_t shift) {
|
||||
void *output_radix_lwe, void const *input_radix_lwe_1,
|
||||
void const *input_radix_lwe_2, int8_t *mem_ptr, void *const *ksks,
|
||||
void *const *bsks, uint32_t num_blocks, uint32_t shift) {
|
||||
|
||||
host_apply_bivariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, output_radix_lwe,
|
||||
input_radix_lwe_1, input_radix_lwe_2, (int_radix_lut<uint64_t> *)mem_ptr,
|
||||
(uint64_t **)(ksks), bsks, num_radix_blocks, shift);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<const uint64_t *>(input_radix_lwe_1),
|
||||
static_cast<const uint64_t *>(input_radix_lwe_2),
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks, num_blocks,
|
||||
shift);
|
||||
}
|
||||
|
||||
void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
|
||||
@@ -297,7 +307,7 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
@@ -308,21 +318,22 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
lut_degree, allocate_gpu_memory);
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI *generates_or_propagates, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_radix_blocks) {
|
||||
void *output_radix_lwe, void *generates_or_propagates, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks, uint32_t shift) {
|
||||
|
||||
int_radix_params params = ((int_radix_lut<uint64_t> *)mem_ptr)->params;
|
||||
|
||||
host_compute_prefix_sum_hillis_steele<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, output_radix_lwe,
|
||||
generates_or_propagates, params, (int_radix_lut<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), num_radix_blocks);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<uint64_t *>(generates_or_propagates), params,
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
@@ -334,9 +345,11 @@ void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
|
||||
void cuda_integer_reverse_blocks_64_inplace(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array) {
|
||||
uint32_t gpu_count, void *lwe_array,
|
||||
uint32_t num_blocks,
|
||||
uint32_t lwe_size) {
|
||||
|
||||
host_radix_blocks_reverse_inplace<uint64_t>((cudaStream_t *)(streams),
|
||||
gpu_indexes, lwe_array);
|
||||
host_radix_blocks_reverse_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes,
|
||||
static_cast<uint64_t *>(lwe_array), num_blocks, lwe_size);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -228,9 +228,9 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
if (num_radix_in_vec == 2) {
|
||||
legacy_host_addition<Torus>(
|
||||
streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks[num_blocks * big_lwe_size], big_lwe_dimension, num_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks[num_blocks * big_lwe_size],
|
||||
big_lwe_dimension, num_blocks);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -280,13 +280,10 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
|
||||
// generate accumulators
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], message_acc,
|
||||
luts_message_carry->get_degree(0), luts_message_carry->get_max_degree(0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
lut_f_message);
|
||||
streams[0], gpu_indexes[0], message_acc, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, lut_f_message);
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], carry_acc, luts_message_carry->get_degree(1),
|
||||
luts_message_carry->get_max_degree(1), glwe_dimension, polynomial_size,
|
||||
streams[0], gpu_indexes[0], carry_acc, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, lut_f_carry);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
@@ -297,7 +294,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
ch_amount++;
|
||||
dim3 add_grid(ch_amount, num_blocks, 1);
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
tree_add_chunks<Torus><<<add_grid, 512, 0, streams[0]>>>(
|
||||
new_blocks, old_blocks, min(r, chunk_size), big_lwe_size, num_blocks);
|
||||
|
||||
@@ -448,9 +445,9 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
luts_message_carry->release(streams, gpu_indexes, gpu_count);
|
||||
delete (luts_message_carry);
|
||||
|
||||
legacy_host_addition<Torus>(
|
||||
streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks[num_blocks * big_lwe_size], big_lwe_dimension, num_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks[num_blocks * big_lwe_size],
|
||||
big_lwe_dimension, num_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
@@ -541,13 +538,13 @@ __host__ void host_integer_mult_radix_kb(
|
||||
dim3 grid(lsb_vector_block_count, 1, 1);
|
||||
dim3 thds(params::degree / params::opt, 1, 1);
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
all_shifted_lhs_rhs<Torus, params><<<grid, thds, 0, streams[0]>>>(
|
||||
radix_lwe_left, vector_result_lsb, vector_result_msb, radix_lwe_right,
|
||||
vector_lsb_rhs, vector_msb_rhs, num_blocks);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, block_mul_res, block_mul_res,
|
||||
vector_result_sb, bsks, ksks, total_block_count, luts_array,
|
||||
luts_array->params.message_modulus);
|
||||
@@ -556,7 +553,7 @@ __host__ void host_integer_mult_radix_kb(
|
||||
vector_result_msb = &block_mul_res[lsb_vector_block_count *
|
||||
(polynomial_size * glwe_dimension + 1)];
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
fill_radix_from_lsb_msb<Torus, params>
|
||||
<<<num_blocks * num_blocks, params::degree / params::opt, 0,
|
||||
streams[0]>>>(vector_result_sb, vector_result_lsb, vector_result_msb,
|
||||
@@ -587,7 +584,7 @@ __host__ void host_integer_mult_radix_kb(
|
||||
auto scp_mem_ptr = mem_ptr->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
legacy_host_propagate_single_carry<Torus>(
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, radix_lwe_out, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
|
||||
@@ -2,11 +2,13 @@
|
||||
|
||||
void cuda_negate_integer_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, uint32_t message_modulus,
|
||||
void *lwe_array_out, void const *lwe_array_in, uint32_t lwe_dimension,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus) {
|
||||
|
||||
host_integer_radix_negation<uint64_t>((cudaStream_t *)(streams), gpu_indexes,
|
||||
gpu_count, lwe_array_out, lwe_array_in,
|
||||
message_modulus, carry_modulus);
|
||||
host_integer_radix_negation<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in), lwe_dimension,
|
||||
lwe_ciphertext_count, message_modulus, carry_modulus);
|
||||
}
|
||||
|
||||
@@ -54,69 +54,12 @@ device_integer_radix_negation(Torus *output, Torus const *input,
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
host_integer_radix_negation(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in,
|
||||
uint64_t message_modulus, uint64_t carry_modulus) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
|
||||
if (lwe_array_out->num_radix_blocks != lwe_array_in->num_radix_blocks)
|
||||
PANIC("Cuda error: lwe_array_in and lwe_array_out num radix blocks must be "
|
||||
"the same")
|
||||
|
||||
if (lwe_array_out->lwe_dimension != lwe_array_in->lwe_dimension)
|
||||
PANIC("Cuda error: lwe_array_in and lwe_array_out lwe_dimension must be "
|
||||
"the same")
|
||||
|
||||
auto num_radix_blocks = lwe_array_out->num_radix_blocks;
|
||||
auto lwe_dimension = lwe_array_out->lwe_dimension;
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = lwe_dimension + 1;
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = lwe_size;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
// Value of the shift we multiply our messages by
|
||||
// If message_modulus and carry_modulus are always powers of 2 we can simplify
|
||||
// this
|
||||
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
|
||||
|
||||
device_integer_radix_negation<Torus><<<grid, thds, 0, streams[0]>>>(
|
||||
static_cast<Torus *>(lwe_array_out->ptr),
|
||||
static_cast<Torus *>(lwe_array_in->ptr), num_radix_blocks, lwe_dimension,
|
||||
message_modulus, delta);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
uint8_t zb = 0;
|
||||
for (uint i = 0; i < lwe_array_out->num_radix_blocks; i++) {
|
||||
auto input_degree = lwe_array_in->degrees[i];
|
||||
|
||||
if (zb != 0) {
|
||||
input_degree += static_cast<uint64_t>(zb);
|
||||
}
|
||||
Torus z =
|
||||
std::max(static_cast<Torus>(1),
|
||||
static_cast<Torus>(ceil(input_degree / message_modulus))) *
|
||||
message_modulus;
|
||||
|
||||
lwe_array_out->degrees[i] = z - static_cast<uint64_t>(zb);
|
||||
lwe_array_out->noise_levels[i] = lwe_array_in->noise_levels[i];
|
||||
zb = z / message_modulus;
|
||||
}
|
||||
}
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_radix_negation(
|
||||
__host__ void host_integer_radix_negation(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *output, Torus const *input,
|
||||
uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count,
|
||||
uint64_t message_modulus, uint64_t carry_modulus) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
#include "radix_ciphertext.cuh"
|
||||
|
||||
void release_radix_ciphertext(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *data) {
|
||||
cuda_drop_async(data->ptr, stream, gpu_index);
|
||||
free(data->degrees);
|
||||
free(data->noise_levels);
|
||||
cuda_synchronize_stream(stream, gpu_index);
|
||||
}
|
||||
@@ -1,142 +0,0 @@
|
||||
#ifndef CUDA_INTEGER_RADIX_CIPHERTEXT_CUH
|
||||
#define CUDA_INTEGER_RADIX_CIPHERTEXT_CUH
|
||||
|
||||
#include "device.h"
|
||||
#include "integer/integer.h"
|
||||
|
||||
template <typename Torus>
|
||||
void create_zero_radix_ciphertext_async(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *radix,
|
||||
const uint32_t num_radix_blocks,
|
||||
const uint32_t lwe_dimension) {
|
||||
radix->lwe_dimension = lwe_dimension;
|
||||
radix->num_radix_blocks = num_radix_blocks;
|
||||
uint32_t size = (lwe_dimension + 1) * num_radix_blocks * sizeof(Torus);
|
||||
radix->ptr = (void *)cuda_malloc_async(size, stream, gpu_index);
|
||||
cuda_memset_async(radix->ptr, 0, size, stream, gpu_index);
|
||||
|
||||
radix->degrees = (uint64_t *)(calloc(num_radix_blocks, sizeof(uint64_t)));
|
||||
radix->noise_levels =
|
||||
(uint64_t *)(calloc(num_radix_blocks, sizeof(uint64_t)));
|
||||
if (radix->degrees == NULL || radix->noise_levels == NULL) {
|
||||
PANIC("Cuda error: degrees / noise levels not allocated correctly")
|
||||
}
|
||||
}
|
||||
|
||||
// end_input_lwe_index is exclusive
|
||||
template <typename Torus>
|
||||
void as_radix_ciphertext_slice(CudaRadixCiphertextFFI *output_radix,
|
||||
const CudaRadixCiphertextFFI *input_radix,
|
||||
const uint32_t start_input_lwe_index,
|
||||
const uint32_t end_input_lwe_index) {
|
||||
if (input_radix->num_radix_blocks <
|
||||
end_input_lwe_index - start_input_lwe_index)
|
||||
PANIC("Cuda error: input radix should have more blocks than the specified "
|
||||
"range")
|
||||
if (start_input_lwe_index >= end_input_lwe_index)
|
||||
PANIC("Cuda error: slice range should be non negative")
|
||||
|
||||
auto lwe_size = input_radix->lwe_dimension + 1;
|
||||
output_radix->num_radix_blocks = end_input_lwe_index - start_input_lwe_index;
|
||||
output_radix->lwe_dimension = input_radix->lwe_dimension;
|
||||
Torus *in_ptr = (Torus *)input_radix->ptr;
|
||||
output_radix->ptr = (void *)(in_ptr + start_input_lwe_index * lwe_size);
|
||||
output_radix->degrees = input_radix->degrees + start_input_lwe_index;
|
||||
output_radix->noise_levels =
|
||||
input_radix->noise_levels + start_input_lwe_index;
|
||||
}
|
||||
|
||||
// end_lwe_index are exclusive
|
||||
template <typename Torus>
|
||||
void copy_radix_ciphertext_slice_async(
|
||||
cudaStream_t const stream, uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *output_radix, const uint32_t output_start_lwe_index,
|
||||
const uint32_t output_end_lwe_index,
|
||||
const CudaRadixCiphertextFFI *input_radix,
|
||||
const uint32_t input_start_lwe_index, const uint32_t input_end_lwe_index) {
|
||||
if (output_radix->lwe_dimension != input_radix->lwe_dimension)
|
||||
PANIC("Cuda error: input lwe dimension should be equal to output lwe "
|
||||
"dimension")
|
||||
if (output_end_lwe_index - output_start_lwe_index !=
|
||||
input_end_lwe_index - input_start_lwe_index)
|
||||
PANIC("Cuda error: output and input ranges should have the same size")
|
||||
if (output_end_lwe_index - output_start_lwe_index >
|
||||
output_radix->num_radix_blocks)
|
||||
PANIC("Cuda error: output range should be lower or equal to output num "
|
||||
"blocks")
|
||||
if (input_end_lwe_index - input_start_lwe_index >
|
||||
input_radix->num_radix_blocks)
|
||||
PANIC(
|
||||
"Cuda error: input range should be lower or equal to input num blocks")
|
||||
if (output_end_lwe_index - output_start_lwe_index <= 0)
|
||||
PANIC("Cuda error: output range should be greater than zero")
|
||||
if (input_end_lwe_index - input_start_lwe_index <= 0)
|
||||
PANIC("Cuda error: input range should be greater than zero")
|
||||
if (output_end_lwe_index <= output_start_lwe_index)
|
||||
PANIC("Cuda error: output end index should be greater or equal to output "
|
||||
"start index")
|
||||
if (input_end_lwe_index <= input_start_lwe_index)
|
||||
PANIC("Cuda error: input end index should be greater or equal to input "
|
||||
"start index")
|
||||
if (output_start_lwe_index > output_radix->num_radix_blocks)
|
||||
PANIC("Cuda error: output start index should be smaller than the number "
|
||||
"of blocks")
|
||||
if (input_start_lwe_index > input_radix->num_radix_blocks)
|
||||
PANIC("Cuda error: input start index should be smaller than the number "
|
||||
"of blocks")
|
||||
|
||||
auto lwe_size = input_radix->lwe_dimension + 1;
|
||||
Torus *out_ptr = (Torus *)output_radix->ptr;
|
||||
out_ptr = &out_ptr[output_start_lwe_index * lwe_size];
|
||||
Torus *in_ptr = (Torus *)input_radix->ptr;
|
||||
in_ptr = &in_ptr[input_start_lwe_index * lwe_size];
|
||||
auto num_blocks = input_end_lwe_index - input_start_lwe_index;
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(out_ptr, in_ptr,
|
||||
num_blocks * lwe_size * sizeof(Torus), stream,
|
||||
gpu_index);
|
||||
for (uint i = 0; i < num_blocks; i++) {
|
||||
output_radix->degrees[i + output_start_lwe_index] =
|
||||
input_radix->degrees[i + input_start_lwe_index];
|
||||
output_radix->noise_levels[i + output_start_lwe_index] =
|
||||
input_radix->noise_levels[i + input_start_lwe_index];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void copy_radix_ciphertext_async(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *output_radix,
|
||||
const CudaRadixCiphertextFFI *input_radix) {
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
stream, gpu_index, output_radix, 0, output_radix->num_radix_blocks,
|
||||
input_radix, 0, input_radix->num_radix_blocks);
|
||||
}
|
||||
|
||||
// end_lwe_index is exclusive
|
||||
template <typename Torus>
|
||||
void set_zero_radix_ciphertext_slice_async(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *radix,
|
||||
const uint32_t start_lwe_index,
|
||||
const uint32_t end_lwe_index) {
|
||||
if (radix->num_radix_blocks < end_lwe_index - start_lwe_index)
|
||||
PANIC("Cuda error: input radix should have more blocks than the specified "
|
||||
"range")
|
||||
if (start_lwe_index >= end_lwe_index)
|
||||
PANIC("Cuda error: slice range should be non negative")
|
||||
|
||||
auto lwe_size = radix->lwe_dimension + 1;
|
||||
auto num_blocks_to_set = end_lwe_index - start_lwe_index;
|
||||
auto lwe_array_out_block = (Torus *)radix->ptr + start_lwe_index * lwe_size;
|
||||
cuda_memset_async(lwe_array_out_block, 0,
|
||||
num_blocks_to_set * lwe_size * sizeof(Torus), stream,
|
||||
gpu_index);
|
||||
memset(&radix->degrees[start_lwe_index], 0,
|
||||
num_blocks_to_set * sizeof(uint64_t));
|
||||
memset(&radix->noise_levels[start_lwe_index], 0,
|
||||
num_blocks_to_set * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -2,11 +2,13 @@
|
||||
|
||||
void cuda_scalar_addition_integer_radix_ciphertext_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, void const *scalar_input,
|
||||
uint32_t num_scalars, uint32_t message_modulus, uint32_t carry_modulus) {
|
||||
void *lwe_array, void const *scalar_input, uint32_t lwe_dimension,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus) {
|
||||
|
||||
host_integer_radix_scalar_addition_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array,
|
||||
static_cast<const uint64_t *>(scalar_input), num_scalars, message_modulus,
|
||||
carry_modulus);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(scalar_input), lwe_dimension,
|
||||
lwe_ciphertext_count, message_modulus, carry_modulus);
|
||||
}
|
||||
|
||||
@@ -24,12 +24,12 @@ __global__ void device_integer_radix_scalar_addition_inplace(
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_radix_scalar_addition_inplace(
|
||||
__host__ void host_integer_radix_scalar_addition_inplace(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array, Torus const *scalar_input,
|
||||
uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
@@ -49,42 +49,6 @@ __host__ void legacy_host_integer_radix_scalar_addition_inplace(
|
||||
delta);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_scalar_addition_inplace(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array,
|
||||
Torus const *scalar_input, uint32_t num_scalars, uint32_t message_modulus,
|
||||
uint32_t carry_modulus) {
|
||||
if (lwe_array->num_radix_blocks < num_scalars)
|
||||
PANIC("Cuda error: num scalars should be smaller or equal to input num "
|
||||
"radix blocks")
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = num_scalars;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
// Value of the shift we multiply our messages by
|
||||
// If message_modulus and carry_modulus are always powers of 2 we can simplify
|
||||
// this
|
||||
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
|
||||
|
||||
device_integer_radix_scalar_addition_inplace<Torus>
|
||||
<<<grid, thds, 0, streams[0]>>>((Torus *)lwe_array->ptr, scalar_input,
|
||||
num_scalars, lwe_array->lwe_dimension,
|
||||
delta);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
Torus scalar_input_cpu[num_scalars];
|
||||
cuda_memcpy_async_to_cpu(&scalar_input_cpu, scalar_input,
|
||||
num_scalars * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
for (uint i = 0; i < num_scalars; i++) {
|
||||
lwe_array->degrees[i] = lwe_array->degrees[i] + scalar_input_cpu[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ void device_integer_radix_add_scalar_one_inplace(
|
||||
@@ -104,7 +68,7 @@ __host__ void host_integer_radix_add_scalar_one_inplace(
|
||||
uint32_t gpu_count, Torus *lwe_array, uint32_t lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
@@ -144,7 +108,7 @@ __host__ void host_integer_radix_scalar_subtraction_inplace(
|
||||
uint32_t gpu_count, Torus *lwe_array, Torus *scalar_input,
|
||||
uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
|
||||
@@ -2,58 +2,15 @@
|
||||
|
||||
void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
|
||||
void *lwe_array_out, void const *lwe_array_input, void const *clear_blocks,
|
||||
uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks) {
|
||||
void *const *ksks, uint32_t lwe_ciphertext_count, BITOP_TYPE op) {
|
||||
|
||||
host_integer_radix_scalar_bitop_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_input, static_cast<const uint64_t *>(clear_blocks),
|
||||
num_clear_blocks, (int_bitop_buffer<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks));
|
||||
}
|
||||
|
||||
void update_degrees_after_scalar_bitand(uint64_t *output_degrees,
|
||||
uint64_t *clear_degrees,
|
||||
uint64_t *input_degrees,
|
||||
uint32_t num_clear_blocks) {
|
||||
for (uint i = 0; i < num_clear_blocks; i++) {
|
||||
output_degrees[i] = std::min(clear_degrees[i], input_degrees[i]);
|
||||
}
|
||||
}
|
||||
void update_degrees_after_scalar_bitor(uint64_t *output_degrees,
|
||||
uint64_t *clear_degrees,
|
||||
uint64_t *input_degrees,
|
||||
uint32_t num_clear_blocks) {
|
||||
for (uint i = 0; i < num_clear_blocks; i++) {
|
||||
auto max = std::max(clear_degrees[i], input_degrees[i]);
|
||||
auto min = std::min(clear_degrees[i], input_degrees[i]);
|
||||
auto result = max;
|
||||
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max | j > result) {
|
||||
result = max | j;
|
||||
}
|
||||
}
|
||||
output_degrees[i] = result;
|
||||
}
|
||||
}
|
||||
void update_degrees_after_scalar_bitxor(uint64_t *output_degrees,
|
||||
uint64_t *clear_degrees,
|
||||
uint64_t *input_degrees,
|
||||
uint32_t num_clear_blocks) {
|
||||
for (uint i = 0; i < num_clear_blocks; i++) {
|
||||
auto max = std::max(clear_degrees[i], input_degrees[i]);
|
||||
auto min = std::min(clear_degrees[i], input_degrees[i]);
|
||||
auto result = max;
|
||||
|
||||
// Try every possibility to find the worst case
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max ^ j > result) {
|
||||
result = max ^ j;
|
||||
}
|
||||
}
|
||||
output_degrees[i] = result;
|
||||
}
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_input),
|
||||
static_cast<const uint64_t *>(clear_blocks), num_clear_blocks,
|
||||
(int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
lwe_ciphertext_count, op);
|
||||
}
|
||||
|
||||
@@ -7,60 +7,45 @@
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_scalar_bitop_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input, Torus const *clear_blocks,
|
||||
uint32_t num_clear_blocks, int_bitop_buffer<Torus> *mem_ptr,
|
||||
void *const *bsks, Torus *const *ksks) {
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_input,
|
||||
Torus const *clear_blocks, uint32_t num_clear_blocks,
|
||||
int_bitop_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks,
|
||||
uint32_t num_radix_blocks, BITOP_TYPE op) {
|
||||
|
||||
if (output->num_radix_blocks != input->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be equal")
|
||||
if (output->lwe_dimension != input->lwe_dimension)
|
||||
PANIC("Cuda error: input and output num radix blocks must be equal")
|
||||
auto lut = mem_ptr->lut;
|
||||
auto op = mem_ptr->op;
|
||||
auto num_radix_blocks = output->num_radix_blocks;
|
||||
auto params = lut->params;
|
||||
auto big_lwe_dimension = params.big_lwe_dimension;
|
||||
|
||||
uint32_t lwe_size = big_lwe_dimension + 1;
|
||||
|
||||
if (num_clear_blocks == 0) {
|
||||
if (op == SCALAR_BITAND) {
|
||||
set_zero_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
output, 0, num_radix_blocks);
|
||||
cuda_memset_async(lwe_array_out, 0,
|
||||
num_radix_blocks * lwe_size * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
} else {
|
||||
if (input != output)
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], output,
|
||||
input);
|
||||
cuda_memcpy_async_gpu_to_gpu(lwe_array_out, lwe_array_input,
|
||||
num_radix_blocks * lwe_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
} else {
|
||||
// We have all possible LUTs pre-computed and we use the decomposed scalar
|
||||
// as index to recover the right one
|
||||
uint64_t degrees[num_clear_blocks];
|
||||
uint64_t clear_degrees[num_clear_blocks];
|
||||
cuda_memcpy_async_to_cpu(&clear_degrees, clear_blocks,
|
||||
num_clear_blocks * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
if (mem_ptr->op == BITOP_TYPE::SCALAR_BITAND) {
|
||||
update_degrees_after_scalar_bitand(degrees, clear_degrees, input->degrees,
|
||||
num_clear_blocks);
|
||||
} else if (mem_ptr->op == BITOP_TYPE::SCALAR_BITOR) {
|
||||
update_degrees_after_scalar_bitor(degrees, clear_degrees, input->degrees,
|
||||
num_clear_blocks);
|
||||
} else if (mem_ptr->op == SCALAR_BITXOR) {
|
||||
update_degrees_after_scalar_bitxor(degrees, clear_degrees, input->degrees,
|
||||
num_clear_blocks);
|
||||
}
|
||||
cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(0, 0), clear_blocks,
|
||||
num_clear_blocks * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, output, input, bsks, ksks, lut,
|
||||
num_clear_blocks);
|
||||
memcpy(output->degrees, degrees, num_clear_blocks * sizeof(uint64_t));
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_input, bsks,
|
||||
ksks, num_clear_blocks, lut);
|
||||
|
||||
if (op == SCALAR_BITAND && num_clear_blocks < num_radix_blocks) {
|
||||
set_zero_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
output, num_clear_blocks,
|
||||
num_radix_blocks);
|
||||
auto lwe_array_out_block = lwe_array_out + num_clear_blocks * lwe_size;
|
||||
cuda_memset_async(lwe_array_out_block, 0,
|
||||
(num_radix_blocks - num_clear_blocks) * lwe_size *
|
||||
sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ __host__ void scalar_compare_radix_blocks_kb(
|
||||
|
||||
// Apply LUT to compare to 0
|
||||
auto sign_lut = mem_ptr->eq_buffer->is_non_zero_lut;
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, subtracted_blocks, bsks,
|
||||
ksks, num_radix_blocks, sign_lut);
|
||||
|
||||
@@ -110,13 +110,13 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
};
|
||||
|
||||
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), lut->get_degree(0),
|
||||
lut->get_max_degree(0), glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, scalar_last_leaf_lut_f);
|
||||
generate_device_accumulator<Torus>(streams[0], gpu_indexes[0],
|
||||
lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus,
|
||||
carry_modulus, scalar_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
mem_ptr->tmp_lwe_array_out, bsks, ksks, 1, lut);
|
||||
|
||||
@@ -195,12 +195,12 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
|
||||
auto lut = diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), lut->get_degree(0),
|
||||
lut->get_max_degree(0), glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, scalar_bivariate_last_leaf_lut_f);
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_bivariate_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_lsb_out,
|
||||
lwe_array_msb_out, bsks, ksks, 1, lut, lut->params.message_modulus);
|
||||
|
||||
@@ -331,12 +331,12 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
|
||||
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), lut->get_degree(0),
|
||||
lut->get_max_degree(0), glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, scalar_bivariate_last_leaf_lut_f);
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_bivariate_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, are_all_msb_zeros,
|
||||
sign_block, bsks, ksks, 1, lut, lut->params.message_modulus);
|
||||
|
||||
@@ -426,13 +426,12 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
auto signed_msb_lut = mem_ptr->signed_msb_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
msb_streams[0], gpu_indexes[0], signed_msb_lut->get_lut(0, 0),
|
||||
signed_msb_lut->get_degree(0), signed_msb_lut->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, lut_f);
|
||||
signed_msb_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
Torus const *sign_block = msb + (num_msb_radix_blocks - 1) * big_lwe_size;
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
msb_streams, gpu_indexes, gpu_count, lwe_array_msb_out, sign_block,
|
||||
are_all_msb_zeros, bsks, ksks, 1, signed_msb_lut,
|
||||
signed_msb_lut->params.message_modulus);
|
||||
@@ -492,7 +491,7 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
msb_streams[0], gpu_indexes[0], trivial_sign_block, scalar_sign_block,
|
||||
big_lwe_dimension, 1, 1, message_modulus, carry_modulus);
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
msb_streams, gpu_indexes, gpu_count, lwe_array_sign_out,
|
||||
encrypted_sign_block, trivial_sign_block, bsks, ksks, 1,
|
||||
mem_ptr->signed_lut, mem_ptr->signed_lut->params.message_modulus);
|
||||
@@ -541,10 +540,10 @@ __host__ void integer_radix_signed_scalar_maxmin_kb(
|
||||
|
||||
// Selector
|
||||
// CMUX for Max or Min
|
||||
legacy_host_integer_radix_cmux_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, sign, lwe_array_left,
|
||||
lwe_array_right, mem_ptr->cmux_buffer, bsks, ksks,
|
||||
total_num_radix_blocks);
|
||||
host_integer_radix_cmux_kb<Torus>(streams, gpu_indexes, gpu_count,
|
||||
lwe_array_out, sign, lwe_array_left,
|
||||
lwe_array_right, mem_ptr->cmux_buffer, bsks,
|
||||
ksks, total_num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
@@ -622,7 +621,7 @@ __host__ void host_integer_radix_scalar_maxmin_kb(
|
||||
|
||||
// Selector
|
||||
// CMUX for Max or Min
|
||||
legacy_host_integer_radix_cmux_kb<Torus>(
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right,
|
||||
mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);
|
||||
@@ -686,7 +685,7 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
|
||||
lsb_streams[0], gpu_indexes[0]);
|
||||
scalar_comparison_luts->broadcast_lut(lsb_streams, gpu_indexes, 0);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
lsb_streams, gpu_indexes, gpu_count, lwe_array_lsb_out, packed_blocks,
|
||||
bsks, ksks, num_halved_lsb_radix_blocks, scalar_comparison_luts);
|
||||
}
|
||||
|
||||
@@ -47,6 +47,9 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
void *const *bsks, T *const *ksks, uint32_t input_lwe_dimension,
|
||||
uint32_t message_modulus, uint32_t num_radix_blocks, uint32_t num_scalars) {
|
||||
|
||||
if (num_radix_blocks == 0 | num_scalars == 0)
|
||||
return;
|
||||
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
uint32_t lwe_size = input_lwe_dimension + 1;
|
||||
@@ -80,7 +83,7 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
preshifted_buffer + (i % msg_bits) * num_radix_blocks * lwe_size;
|
||||
T *block_shift_buffer =
|
||||
all_shifted_buffer + j * num_radix_blocks * lwe_size;
|
||||
legacy_host_radix_blocks_rotate_right<T>(
|
||||
host_radix_blocks_rotate_right<T>(
|
||||
streams, gpu_indexes, gpu_count, block_shift_buffer,
|
||||
preshifted_radix_ct, i / msg_bits, num_radix_blocks, lwe_size);
|
||||
// create trivial assign for value = 0
|
||||
@@ -114,7 +117,7 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
auto scp_mem_ptr = mem->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
legacy_host_propagate_single_carry<T>(
|
||||
host_propagate_single_carry<T>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, num_radix_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
@@ -122,12 +125,12 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
|
||||
// Small scalar_mul is used in shift/rotate
|
||||
template <typename T>
|
||||
__host__ void host_legacy_integer_small_scalar_mul_radix(
|
||||
__host__ void host_integer_small_scalar_mul_radix(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, T *output_lwe_array, T *input_lwe_array, T scalar,
|
||||
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
@@ -143,42 +146,4 @@ __host__ void host_legacy_integer_small_scalar_mul_radix(
|
||||
input_lwe_ciphertext_count);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
// Small scalar_mul is used in shift/rotate
|
||||
template <typename T>
|
||||
__host__ void host_integer_small_scalar_mul_radix(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *output_lwe_array,
|
||||
CudaRadixCiphertextFFI *input_lwe_array, T scalar) {
|
||||
|
||||
if (output_lwe_array->num_radix_blocks != input_lwe_array->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be the same")
|
||||
if (output_lwe_array->lwe_dimension != input_lwe_array->lwe_dimension)
|
||||
PANIC("Cuda error: input and output lwe_dimension must be the same")
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
auto lwe_dimension = input_lwe_array->lwe_dimension;
|
||||
auto num_radix_blocks = input_lwe_array->num_radix_blocks;
|
||||
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = lwe_dimension + 1;
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = num_radix_blocks * lwe_size;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
device_small_scalar_radix_multiplication<<<grid, thds, 0, streams[0]>>>(
|
||||
(T *)output_lwe_array->ptr, (T *)input_lwe_array->ptr, scalar,
|
||||
lwe_dimension, num_radix_blocks);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
for (int i = 0; i < num_radix_blocks; i++) {
|
||||
output_lwe_array->noise_levels[i] =
|
||||
input_lwe_array->noise_levels[i] * scalar;
|
||||
output_lwe_array->degrees[i] = input_lwe_array->degrees[i] * scalar;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -57,9 +57,9 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
|
||||
// one block is responsible to process single lwe ciphertext
|
||||
if (mem->shift_type == LEFT_SHIFT) {
|
||||
// rotate right as the blocks are from LSB to MSB
|
||||
legacy_host_radix_blocks_rotate_right<Torus>(
|
||||
streams, gpu_indexes, gpu_count, rotated_buffer, lwe_array, rotations,
|
||||
num_blocks, big_lwe_size);
|
||||
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
|
||||
rotated_buffer, lwe_array, rotations,
|
||||
num_blocks, big_lwe_size);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer,
|
||||
num_blocks * big_lwe_size_bytes, streams[0],
|
||||
@@ -71,22 +71,22 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
|
||||
|
||||
auto receiver_blocks = lwe_array;
|
||||
auto giver_blocks = rotated_buffer;
|
||||
legacy_host_radix_blocks_rotate_right<Torus>(
|
||||
streams, gpu_indexes, gpu_count, giver_blocks, lwe_array, 1, num_blocks,
|
||||
big_lwe_size);
|
||||
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
|
||||
giver_blocks, lwe_array, 1,
|
||||
num_blocks, big_lwe_size);
|
||||
|
||||
auto lut_bivariate = mem->lut_buffers_bivariate[shift_within_block - 1];
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, receiver_blocks,
|
||||
giver_blocks, bsks, ksks, num_blocks, lut_bivariate,
|
||||
lut_bivariate->params.message_modulus);
|
||||
|
||||
} else {
|
||||
// rotate left as the blocks are from LSB to MSB
|
||||
legacy_host_radix_blocks_rotate_left<Torus>(
|
||||
streams, gpu_indexes, gpu_count, rotated_buffer, lwe_array, rotations,
|
||||
num_blocks, big_lwe_size);
|
||||
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
|
||||
rotated_buffer, lwe_array, rotations,
|
||||
num_blocks, big_lwe_size);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer,
|
||||
num_blocks * big_lwe_size_bytes, streams[0],
|
||||
@@ -98,13 +98,13 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
|
||||
|
||||
auto receiver_blocks = lwe_array;
|
||||
auto giver_blocks = rotated_buffer;
|
||||
legacy_host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
|
||||
giver_blocks, lwe_array, 1,
|
||||
num_blocks, big_lwe_size);
|
||||
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
|
||||
giver_blocks, lwe_array, 1, num_blocks,
|
||||
big_lwe_size);
|
||||
|
||||
auto lut_bivariate = mem->lut_buffers_bivariate[shift_within_block - 1];
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, receiver_blocks,
|
||||
giver_blocks, bsks, ksks, num_blocks, lut_bivariate,
|
||||
lut_bivariate->params.message_modulus);
|
||||
|
||||
@@ -53,9 +53,9 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(
|
||||
|
||||
if (mem->shift_type == LEFT_SHIFT) {
|
||||
// rotate right as the blocks are from LSB to MSB
|
||||
legacy_host_radix_blocks_rotate_right<Torus>(
|
||||
streams, gpu_indexes, gpu_count, rotated_buffer, lwe_array, rotations,
|
||||
num_blocks, big_lwe_size);
|
||||
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
|
||||
rotated_buffer, lwe_array, rotations,
|
||||
num_blocks, big_lwe_size);
|
||||
|
||||
// create trivial assign for value = 0
|
||||
cuda_memset_async(rotated_buffer, 0, rotations * big_lwe_size_bytes,
|
||||
@@ -75,7 +75,7 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(
|
||||
|
||||
size_t partial_block_count = num_blocks - rotations;
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, partial_current_blocks,
|
||||
partial_current_blocks, partial_previous_blocks, bsks, ksks,
|
||||
partial_block_count, lut_bivariate,
|
||||
@@ -83,9 +83,9 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(
|
||||
|
||||
} else {
|
||||
// right shift
|
||||
legacy_host_radix_blocks_rotate_left<Torus>(
|
||||
streams, gpu_indexes, gpu_count, rotated_buffer, lwe_array, rotations,
|
||||
num_blocks, big_lwe_size);
|
||||
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
|
||||
rotated_buffer, lwe_array, rotations,
|
||||
num_blocks, big_lwe_size);
|
||||
|
||||
// rotate left as the blocks are from LSB to MSB
|
||||
// create trivial assign for value = 0
|
||||
@@ -106,7 +106,7 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(
|
||||
|
||||
size_t partial_block_count = num_blocks - rotations;
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, partial_current_blocks,
|
||||
partial_current_blocks, partial_next_blocks, bsks, ksks,
|
||||
partial_block_count, lut_bivariate,
|
||||
@@ -156,9 +156,9 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
|
||||
Torus *last_block_copy = &padding_block[big_lwe_size];
|
||||
|
||||
if (mem->shift_type == RIGHT_SHIFT) {
|
||||
legacy_host_radix_blocks_rotate_left<Torus>(
|
||||
streams, gpu_indexes, gpu_count, rotated_buffer, lwe_array, rotations,
|
||||
num_blocks, big_lwe_size);
|
||||
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
|
||||
rotated_buffer, lwe_array, rotations,
|
||||
num_blocks, big_lwe_size);
|
||||
cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer,
|
||||
num_blocks * big_lwe_size_bytes, streams[0],
|
||||
gpu_indexes[0]);
|
||||
@@ -200,7 +200,7 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
|
||||
size_t partial_block_count = num_blocks - rotations;
|
||||
auto lut_bivariate = mem->lut_buffers_bivariate[shift_within_block - 1];
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, partial_current_blocks,
|
||||
partial_current_blocks, partial_next_blocks, bsks, ksks,
|
||||
partial_block_count, lut_bivariate,
|
||||
@@ -213,7 +213,7 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
|
||||
}
|
||||
auto lut_univariate_padding_block =
|
||||
mem->lut_buffers_univariate[num_bits_in_block - 1];
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem->local_streams_1, gpu_indexes, gpu_count, padding_block,
|
||||
last_block_copy, bsks, ksks, 1, lut_univariate_padding_block);
|
||||
// Replace blocks 'pulled' from the left with the correct padding
|
||||
@@ -227,7 +227,7 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
|
||||
if (shift_within_block != 0) {
|
||||
auto lut_univariate_shift_last_block =
|
||||
mem->lut_buffers_univariate[shift_within_block - 1];
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem->local_streams_2, gpu_indexes, gpu_count, last_block,
|
||||
last_block_copy, bsks, ksks, 1, lut_univariate_shift_last_block);
|
||||
}
|
||||
|
||||
@@ -22,13 +22,15 @@ void scratch_cuda_integer_radix_shift_and_rotate_kb_64(
|
||||
|
||||
void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI const *lwe_shift,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks) {
|
||||
void *lwe_array, void const *lwe_shift, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks) {
|
||||
|
||||
host_integer_radix_shift_and_rotate_kb_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array, lwe_shift,
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(lwe_shift),
|
||||
(int_shift_and_rotate_buffer<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks));
|
||||
(uint64_t **)(ksks), num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_radix_shift_and_rotate(void *const *streams,
|
||||
|
||||
@@ -26,37 +26,22 @@ __host__ void scratch_cuda_integer_radix_shift_and_rotate_kb(
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_shift_and_rotate_kb_inplace(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array,
|
||||
CudaRadixCiphertextFFI const *lwe_shift,
|
||||
uint32_t gpu_count, Torus *lwe_array, Torus const *lwe_shift,
|
||||
int_shift_and_rotate_buffer<Torus> *mem, void *const *bsks,
|
||||
Torus *const *ksks) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
|
||||
if (lwe_array->num_radix_blocks != lwe_shift->num_radix_blocks)
|
||||
PANIC("Cuda error: lwe_shift and lwe_array num radix blocks must be "
|
||||
"the same")
|
||||
|
||||
if (lwe_array->lwe_dimension != lwe_shift->lwe_dimension)
|
||||
PANIC("Cuda error: lwe_shift and lwe_array lwe_dimension must be "
|
||||
"the same")
|
||||
|
||||
auto num_radix_blocks = lwe_array->num_radix_blocks;
|
||||
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
uint32_t bits_per_block = log2_int(mem->params.message_modulus);
|
||||
uint32_t total_nb_bits = bits_per_block * num_radix_blocks;
|
||||
if (total_nb_bits == 0)
|
||||
return;
|
||||
|
||||
auto big_lwe_dimension = mem->params.big_lwe_dimension;
|
||||
|
||||
if (lwe_array->lwe_dimension != big_lwe_dimension)
|
||||
PANIC("Cuda error: lwe_shift lwe_dimension must be equal to "
|
||||
"big_lwe_dimension")
|
||||
auto big_lwe_size = big_lwe_dimension + 1;
|
||||
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
|
||||
// Extract all bits
|
||||
auto bits = mem->tmp_bits;
|
||||
extract_n_bits<Torus>(streams, gpu_indexes, gpu_count, bits, lwe_array, bsks,
|
||||
ksks, num_radix_blocks * bits_per_block,
|
||||
ksks, num_radix_blocks, bits_per_block,
|
||||
mem->bit_extract_luts);
|
||||
|
||||
// Extract shift bits
|
||||
@@ -76,14 +61,13 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
|
||||
// Extracts bits and put them in the bit index 2 (=> bit number 3)
|
||||
// so that it is already aligned to the correct position of the cmux input
|
||||
// and we reduce noise growth
|
||||
extract_n_bits<Torus>(streams, gpu_indexes, gpu_count, shift_bits, lwe_shift,
|
||||
bsks, ksks, max_num_bits_that_tell_shift,
|
||||
extract_n_bits<Torus>(streams, gpu_indexes, gpu_count, shift_bits,
|
||||
(Torus *)lwe_shift, bsks, ksks, 1,
|
||||
max_num_bits_that_tell_shift,
|
||||
mem->bit_extract_luts_with_offset_2);
|
||||
|
||||
// If signed, do an "arithmetic shift" by padding with the sign bit
|
||||
CudaRadixCiphertextFFI last_bit;
|
||||
as_radix_ciphertext_slice<Torus>(&last_bit, bits, (total_nb_bits - 1),
|
||||
total_nb_bits);
|
||||
auto last_bit = bits + (total_nb_bits - 1) * big_lwe_size;
|
||||
|
||||
// Apply op
|
||||
auto rotated_input = mem->tmp_rotated;
|
||||
@@ -92,59 +76,60 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
|
||||
auto mux_lut = mem->mux_lut;
|
||||
auto mux_inputs = mem->tmp_mux_inputs;
|
||||
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], input_bits_a,
|
||||
bits);
|
||||
cuda_memcpy_async_gpu_to_gpu(input_bits_a, bits,
|
||||
total_nb_bits * big_lwe_size_bytes, streams[0],
|
||||
gpu_indexes[0]);
|
||||
for (int d = 0; d < max_num_bits_that_tell_shift; d++) {
|
||||
CudaRadixCiphertextFFI shift_bit;
|
||||
as_radix_ciphertext_slice<Torus>(&shift_bit, shift_bits, d, d + 1);
|
||||
auto shift_bit = shift_bits + d * big_lwe_size;
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(input_bits_b, input_bits_a,
|
||||
total_nb_bits * big_lwe_size_bytes, streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], input_bits_b,
|
||||
input_bits_a);
|
||||
auto rotations = 1 << d;
|
||||
switch (mem->shift_type) {
|
||||
case LEFT_SHIFT:
|
||||
// rotate right as the blocks are from LSB to MSB
|
||||
if (input_bits_b->num_radix_blocks != total_nb_bits)
|
||||
PANIC("Cuda error: incorrect number of blocks")
|
||||
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
|
||||
rotated_input, input_bits_b,
|
||||
rotations);
|
||||
host_radix_blocks_rotate_right<Torus>(
|
||||
streams, gpu_indexes, gpu_count, rotated_input, input_bits_b,
|
||||
rotations, total_nb_bits, big_lwe_size);
|
||||
|
||||
set_zero_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
rotated_input, 0, rotations);
|
||||
if (mem->is_signed && mem->shift_type == RIGHT_SHIFT)
|
||||
for (int i = 0; i < rotations; i++)
|
||||
cuda_memcpy_async_gpu_to_gpu(rotated_input + i * big_lwe_size,
|
||||
last_bit, big_lwe_size_bytes, streams[0],
|
||||
gpu_indexes[0]);
|
||||
else
|
||||
cuda_memset_async(rotated_input, 0, rotations * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
break;
|
||||
case RIGHT_SHIFT:
|
||||
// rotate left as the blocks are from LSB to MSB
|
||||
if (input_bits_b->num_radix_blocks != total_nb_bits)
|
||||
PANIC("Cuda error: incorrect number of blocks")
|
||||
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
|
||||
rotated_input, input_bits_b,
|
||||
rotations);
|
||||
host_radix_blocks_rotate_left<Torus>(
|
||||
streams, gpu_indexes, gpu_count, rotated_input, input_bits_b,
|
||||
rotations, total_nb_bits, big_lwe_size);
|
||||
|
||||
if (mem->is_signed)
|
||||
for (int i = 0; i < rotations; i++) {
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], rotated_input,
|
||||
total_nb_bits - rotations + i, total_nb_bits - rotations + i + 1,
|
||||
&last_bit, 0, 1);
|
||||
}
|
||||
else {
|
||||
set_zero_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], rotated_input,
|
||||
total_nb_bits - rotations, total_nb_bits);
|
||||
}
|
||||
for (int i = 0; i < rotations; i++)
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
rotated_input + (total_nb_bits - rotations + i) * big_lwe_size,
|
||||
last_bit, big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
else
|
||||
cuda_memset_async(
|
||||
rotated_input + (total_nb_bits - rotations) * big_lwe_size, 0,
|
||||
rotations * big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
break;
|
||||
case LEFT_ROTATE:
|
||||
// rotate right as the blocks are from LSB to MSB
|
||||
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
|
||||
rotated_input, input_bits_b,
|
||||
rotations);
|
||||
host_radix_blocks_rotate_right<Torus>(
|
||||
streams, gpu_indexes, gpu_count, rotated_input, input_bits_b,
|
||||
rotations, total_nb_bits, big_lwe_size);
|
||||
break;
|
||||
case RIGHT_ROTATE:
|
||||
// rotate left as the blocks are from LSB to MSB
|
||||
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
|
||||
rotated_input, input_bits_b,
|
||||
rotations);
|
||||
host_radix_blocks_rotate_left<Torus>(
|
||||
streams, gpu_indexes, gpu_count, rotated_input, input_bits_b,
|
||||
rotations, total_nb_bits, big_lwe_size);
|
||||
break;
|
||||
default:
|
||||
PANIC("Unknown operation")
|
||||
@@ -152,52 +137,61 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
|
||||
|
||||
// host_pack bits into one block so that we have
|
||||
// control_bit|b|a
|
||||
pack_bivariate_blocks<Torus>(
|
||||
streams, gpu_indexes, gpu_count, (Torus *)mux_inputs->ptr,
|
||||
mux_lut->lwe_indexes_out, (Torus *)rotated_input->ptr,
|
||||
(Torus *)input_bits_a->ptr, mux_lut->lwe_indexes_in, big_lwe_dimension,
|
||||
2, total_nb_bits);
|
||||
pack_bivariate_blocks<Torus>(streams, gpu_indexes, gpu_count, mux_inputs,
|
||||
mux_lut->lwe_indexes_out, rotated_input,
|
||||
input_bits_a, mux_lut->lwe_indexes_in,
|
||||
big_lwe_dimension, 2, total_nb_bits);
|
||||
|
||||
// The shift bit is already properly aligned/positioned
|
||||
host_add_the_same_block_to_all_blocks<Torus>(
|
||||
streams[0], gpu_indexes[0], mux_inputs, mux_inputs, &shift_bit);
|
||||
for (int i = 0; i < total_nb_bits; i++)
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0],
|
||||
mux_inputs + i * big_lwe_size,
|
||||
mux_inputs + i * big_lwe_size, shift_bit,
|
||||
mem->params.big_lwe_dimension, 1);
|
||||
|
||||
// we have
|
||||
// control_bit|b|a
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, input_bits_a, mux_inputs, bsks, ksks,
|
||||
mux_lut, total_nb_bits);
|
||||
total_nb_bits, mux_lut);
|
||||
}
|
||||
|
||||
// Initializes the output
|
||||
// Copy the last bit for each radix block
|
||||
auto lwe_last_out = lwe_array;
|
||||
last_bit = input_bits_a + (bits_per_block - 1) * big_lwe_size;
|
||||
for (int i = 0; i < num_radix_blocks; i++) {
|
||||
auto last_bit_index = (bits_per_block - 1) + i * bits_per_block;
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], lwe_array, i, i + 1, input_bits_a,
|
||||
last_bit_index, last_bit_index + 1);
|
||||
cuda_memcpy_async_gpu_to_gpu(lwe_last_out, last_bit, big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
lwe_last_out += big_lwe_size;
|
||||
last_bit += bits_per_block * big_lwe_size;
|
||||
}
|
||||
|
||||
// Bitshift and add the other bits
|
||||
lwe_last_out = lwe_array;
|
||||
for (int i = bits_per_block - 2; i >= 0; i--) {
|
||||
host_integer_small_scalar_mul_radix<Torus>(streams, gpu_indexes, gpu_count,
|
||||
lwe_array, lwe_array, 2);
|
||||
|
||||
host_integer_small_scalar_mul_radix<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_last_out, lwe_last_out, 2,
|
||||
big_lwe_dimension, num_radix_blocks);
|
||||
|
||||
auto block = lwe_last_out;
|
||||
auto bit_to_add = input_bits_a + i * big_lwe_size;
|
||||
|
||||
for (int j = 0; j < num_radix_blocks; j++) {
|
||||
CudaRadixCiphertextFFI block;
|
||||
CudaRadixCiphertextFFI bit_to_add;
|
||||
as_radix_ciphertext_slice<Torus>(&block, lwe_array, j, j + 1);
|
||||
as_radix_ciphertext_slice<Torus>(&bit_to_add, input_bits_a,
|
||||
i + j * bits_per_block,
|
||||
i + j * bits_per_block + 1);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], &block, &block,
|
||||
&bit_to_add, 1);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], block, block, bit_to_add,
|
||||
big_lwe_dimension, 1);
|
||||
|
||||
block += big_lwe_size;
|
||||
bit_to_add += bits_per_block * big_lwe_size;
|
||||
}
|
||||
|
||||
// To give back a clean ciphertext
|
||||
auto cleaning_lut = mem->cleaning_lut;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, lwe_array, bsks, ksks,
|
||||
cleaning_lut, num_radix_blocks);
|
||||
streams, gpu_indexes, gpu_count, lwe_last_out, lwe_last_out, bsks, ksks,
|
||||
num_radix_blocks, cleaning_lut);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,16 +1,20 @@
|
||||
#include "integer/integer.h"
|
||||
#include "linearalgebra/addition.cuh"
|
||||
|
||||
void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input_1,
|
||||
CudaRadixCiphertextFFI const *input_2) {
|
||||
/*
|
||||
* Perform the addition of two u32 input LWE ciphertext vectors.
|
||||
* See the equivalent operation on u64 ciphertexts for more details.
|
||||
*/
|
||||
void cuda_add_lwe_ciphertext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in_1, void const *lwe_array_in_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
if (output->num_radix_blocks != input_1->num_radix_blocks ||
|
||||
output->num_radix_blocks != input_2->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be the same")
|
||||
host_addition<uint32_t>(static_cast<cudaStream_t>(stream), gpu_index, output,
|
||||
input_1, input_2, output->num_radix_blocks);
|
||||
host_addition<uint32_t>(static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint32_t *>(lwe_array_out),
|
||||
static_cast<const uint32_t *>(lwe_array_in_1),
|
||||
static_cast<const uint32_t *>(lwe_array_in_2),
|
||||
input_lwe_dimension, input_lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -18,14 +22,14 @@ void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
* - `v_stream` is a void pointer to the Cuda stream to be used in the kernel
|
||||
* launch
|
||||
* - `gpu_index` is the index of the GPU to be used in the kernel launch
|
||||
* - `output` is an array of size
|
||||
* - `lwe_array_out` is an array of size
|
||||
* `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have
|
||||
* been allocated on the GPU before calling this function, and that will hold
|
||||
* the result of the computation.
|
||||
* - `input_1` is the first LWE ciphertext vector used as input, it
|
||||
* - `lwe_array_in_1` is the first LWE ciphertext vector used as input, it
|
||||
* should have been allocated and initialized before calling this function. It
|
||||
* has the same size as the output array.
|
||||
* - `input_2` is the second LWE ciphertext vector used as input, it
|
||||
* - `lwe_array_in_2` is the second LWE ciphertext vector used as input, it
|
||||
* should have been allocated and initialized before calling this function. It
|
||||
* has the same size as the output array.
|
||||
* - `input_lwe_dimension` is the number of mask elements in the two input and
|
||||
@@ -39,16 +43,17 @@ void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
* vectors are left unchanged. This function is a wrapper to a device function
|
||||
* that performs the operation on the GPU.
|
||||
*/
|
||||
void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input_1,
|
||||
CudaRadixCiphertextFFI const *input_2) {
|
||||
void cuda_add_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in_1, void const *lwe_array_in_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
if (output->num_radix_blocks != input_1->num_radix_blocks ||
|
||||
output->num_radix_blocks != input_2->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be the same")
|
||||
host_addition<uint64_t>(static_cast<cudaStream_t>(stream), gpu_index, output,
|
||||
input_1, input_2, output->num_radix_blocks);
|
||||
host_addition<uint64_t>(static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in_1),
|
||||
static_cast<const uint64_t *>(lwe_array_in_2),
|
||||
input_lwe_dimension, input_lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
|
||||
#include "device.h"
|
||||
#include "helper_multi_gpu.h"
|
||||
#include "integer/integer.h"
|
||||
#include "linear_algebra.h"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <stdio.h>
|
||||
@@ -51,7 +50,7 @@ __host__ void host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index,
|
||||
const uint32_t lwe_dimension,
|
||||
const uint32_t lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = lwe_ciphertext_count;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
@@ -72,7 +71,7 @@ __host__ void host_addition_plaintext_scalar(
|
||||
const T plaintext_input, const uint32_t lwe_dimension,
|
||||
const uint32_t lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = lwe_ciphertext_count;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
@@ -100,110 +99,13 @@ __global__ void addition(T *output, T const *input_1, T const *input_2,
|
||||
}
|
||||
|
||||
// Coefficient-wise addition
|
||||
// num_radix_blocks selects the amount of blocks to be added from the inputs
|
||||
template <typename T>
|
||||
__host__ void host_addition(cudaStream_t stream, uint32_t gpu_index,
|
||||
CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input_1,
|
||||
CudaRadixCiphertextFFI const *input_2,
|
||||
uint32_t num_radix_blocks) {
|
||||
if (output->lwe_dimension != input_1->lwe_dimension ||
|
||||
output->lwe_dimension != input_2->lwe_dimension)
|
||||
PANIC("Cuda error: input and output num radix blocks must be the same")
|
||||
if (output->num_radix_blocks < num_radix_blocks ||
|
||||
input_1->num_radix_blocks < num_radix_blocks ||
|
||||
input_2->num_radix_blocks < num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be larger or "
|
||||
"equal to the num blocks to add")
|
||||
__host__ void host_addition(cudaStream_t stream, uint32_t gpu_index, T *output,
|
||||
T const *input_1, T const *input_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = output->lwe_dimension + 1;
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = num_radix_blocks * lwe_size;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
addition<T><<<grid, thds, 0, stream>>>(
|
||||
static_cast<T *>(output->ptr), static_cast<const T *>(input_1->ptr),
|
||||
static_cast<const T *>(input_2->ptr), num_entries);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
for (uint i = 0; i < num_radix_blocks; i++) {
|
||||
output->degrees[i] = input_1->degrees[i] + input_2->degrees[i];
|
||||
output->noise_levels[i] =
|
||||
input_1->noise_levels[i] + input_2->noise_levels[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void constant_addition(T *output, T const *input_1, T const *input_2,
|
||||
uint32_t lwe_size, uint32_t num_entries) {
|
||||
|
||||
int tid = threadIdx.x;
|
||||
int index = blockIdx.x * blockDim.x + tid;
|
||||
if (index < num_entries) {
|
||||
// Here we take advantage of the wrapping behaviour of uint
|
||||
output[index] = input_1[index] + input_2[index % lwe_size];
|
||||
}
|
||||
}
|
||||
|
||||
// Coefficient-wise addition by the same block
|
||||
// input_with_multiple_blocks is a radix ciphertext with potentially multiple
|
||||
// blocks input_with_single_block is a radix ciphertext with a single block
|
||||
//
|
||||
// This function adds the single block in input_with_single_block to each block
|
||||
// in input_with_multiple_blocks. The result is written to output
|
||||
template <typename T>
|
||||
__host__ void host_add_the_same_block_to_all_blocks(
|
||||
cudaStream_t stream, uint32_t gpu_index, CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input_with_multiple_blocks,
|
||||
CudaRadixCiphertextFFI const *input_with_single_block) {
|
||||
if (output->num_radix_blocks != input_with_multiple_blocks->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be the same")
|
||||
if (input_with_single_block->num_radix_blocks != 1)
|
||||
PANIC(
|
||||
"Cuda error: input_with_single_block must be a single-block ciphertext")
|
||||
if (output->lwe_dimension != input_with_multiple_blocks->lwe_dimension ||
|
||||
output->lwe_dimension != input_with_single_block->lwe_dimension)
|
||||
PANIC("Cuda error: input and output lwe dimensions must be the same")
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = output->lwe_dimension + 1;
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = output->num_radix_blocks * lwe_size;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
constant_addition<T><<<grid, thds, 0, stream>>>(
|
||||
static_cast<T *>(output->ptr),
|
||||
static_cast<const T *>(input_with_multiple_blocks->ptr),
|
||||
static_cast<const T *>(input_with_single_block->ptr), lwe_size,
|
||||
num_entries);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
for (uint i = 0; i < output->num_radix_blocks; i++) {
|
||||
output->degrees[i] = input_with_multiple_blocks->degrees[i] +
|
||||
input_with_single_block->degrees[0];
|
||||
output->noise_levels[i] = input_with_multiple_blocks->noise_levels[i] +
|
||||
input_with_single_block->noise_levels[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Coefficient-wise addition
|
||||
template <typename T>
|
||||
__host__ void legacy_host_addition(cudaStream_t stream, uint32_t gpu_index,
|
||||
T *output, T const *input_1,
|
||||
T const *input_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
@@ -239,7 +141,7 @@ __host__ void host_pack_for_overflowing_ops(cudaStream_t stream,
|
||||
uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
@@ -277,7 +179,7 @@ __host__ void host_subtraction(cudaStream_t stream, uint32_t gpu_index,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
@@ -315,7 +217,7 @@ __host__ void host_subtraction_plaintext(cudaStream_t stream,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = input_lwe_ciphertext_count;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
@@ -361,7 +263,7 @@ __host__ void host_unchecked_sub_with_correcting_term(
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t degree) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
|
||||
@@ -34,7 +34,7 @@ __host__ void host_cleartext_vec_multiplication(
|
||||
T const *cleartext_input, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
@@ -70,7 +70,7 @@ host_cleartext_multiplication(cudaStream_t stream, uint32_t gpu_index,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
|
||||
@@ -26,7 +26,7 @@ __host__ void host_negation(cudaStream_t stream, uint32_t gpu_index, T *output,
|
||||
T const *input, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
|
||||
@@ -96,7 +96,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
|
||||
uint32_t total_polynomials) {
|
||||
|
||||
auto stream = static_cast<cudaStream_t>(stream_v);
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
auto input1 = (double2 *)_input1;
|
||||
auto input2 = (double2 *)_input2;
|
||||
auto output = (double2 *)_output;
|
||||
@@ -106,12 +106,10 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
|
||||
int gridSize = total_polynomials;
|
||||
int blockSize = polynomial_size / choose_opt_amortized(polynomial_size);
|
||||
|
||||
int max_shared_memory = cuda_get_max_shared_memory(gpu_index);
|
||||
|
||||
double2 *buffer;
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_polynomial_mul<FFTDegree<AmortizedDegree<256>, ForwardFFT>,
|
||||
@@ -132,7 +130,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
|
||||
}
|
||||
break;
|
||||
case 512:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_polynomial_mul<FFTDegree<AmortizedDegree<521>, ForwardFFT>,
|
||||
@@ -153,7 +151,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
|
||||
}
|
||||
break;
|
||||
case 1024:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_polynomial_mul<FFTDegree<AmortizedDegree<1024>, ForwardFFT>,
|
||||
@@ -174,7 +172,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
|
||||
}
|
||||
break;
|
||||
case 2048:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_polynomial_mul<FFTDegree<AmortizedDegree<2048>, ForwardFFT>,
|
||||
@@ -195,7 +193,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
|
||||
}
|
||||
break;
|
||||
case 4096:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_polynomial_mul<FFTDegree<AmortizedDegree<4096>, ForwardFFT>,
|
||||
@@ -216,7 +214,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
|
||||
}
|
||||
break;
|
||||
case 8192:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_polynomial_mul<FFTDegree<AmortizedDegree<8192>, ForwardFFT>,
|
||||
@@ -237,7 +235,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
|
||||
}
|
||||
break;
|
||||
case 16384:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_polynomial_mul<FFTDegree<AmortizedDegree<16384>, ForwardFFT>,
|
||||
|
||||
@@ -78,7 +78,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
|
||||
double2 *dest, ST const *src,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t total_polynomials) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
int shared_memory_size = sizeof(double) * polynomial_size;
|
||||
|
||||
// Here the buffer size is the size of double2 times the number of polynomials
|
||||
@@ -111,12 +111,10 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
|
||||
|
||||
cuda_memcpy_async_to_gpu(d_bsk, h_bsk, buffer_size, stream, gpu_index);
|
||||
|
||||
int max_shared_memory = cuda_get_max_shared_memory(gpu_index);
|
||||
|
||||
double2 *buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_NSMFFT<FFTDegree<AmortizedDegree<256>, ForwardFFT>, FULLSM>,
|
||||
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
|
||||
@@ -134,7 +132,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
|
||||
}
|
||||
break;
|
||||
case 512:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_NSMFFT<FFTDegree<AmortizedDegree<512>, ForwardFFT>, FULLSM>,
|
||||
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
|
||||
@@ -152,7 +150,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
|
||||
}
|
||||
break;
|
||||
case 1024:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_NSMFFT<FFTDegree<AmortizedDegree<1024>, ForwardFFT>, FULLSM>,
|
||||
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
|
||||
@@ -170,7 +168,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
|
||||
}
|
||||
break;
|
||||
case 2048:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_NSMFFT<FFTDegree<AmortizedDegree<2048>, ForwardFFT>, FULLSM>,
|
||||
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
|
||||
@@ -188,7 +186,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
|
||||
}
|
||||
break;
|
||||
case 4096:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_NSMFFT<FFTDegree<AmortizedDegree<4096>, ForwardFFT>, FULLSM>,
|
||||
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
|
||||
@@ -206,7 +204,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
|
||||
}
|
||||
break;
|
||||
case 8192:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_NSMFFT<FFTDegree<AmortizedDegree<8192>, ForwardFFT>, FULLSM>,
|
||||
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
|
||||
@@ -224,7 +222,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
|
||||
}
|
||||
break;
|
||||
case 16384:
|
||||
if (shared_memory_size <= max_shared_memory) {
|
||||
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_NSMFFT<FFTDegree<AmortizedDegree<16384>, ForwardFFT>, FULLSM>,
|
||||
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user