Compare commits

..

3 Commits

Author SHA1 Message Date
David Testé
cae938a75b wip: measure latencies of a list of cts 2025-01-28 11:19:27 +01:00
David Testé
bae1d1cf77 WIP: fix gpu streams and use iter_batched 2025-01-22 10:56:08 +01:00
David Testé
a3bc1a9d9e chore(bench): new heuristic to define elements for throughput
This is done to fill up backend with enough elements to fill the
backend and avoid having long execution time for heavy operations
like multiplication or division.
2025-01-20 15:21:05 +01:00
286 changed files with 5654 additions and 9593 deletions

View File

@@ -11,47 +11,15 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
paths:
- '**'
- '!.github/**'
- '!ci/**'
jobs:
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (backward-compat-tests)
needs: check-user-permission
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
@@ -71,7 +39,7 @@ jobs:
name: Backward compatibility tests
needs: [ setup-instance ]
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
@@ -79,8 +47,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -123,7 +90,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (backward-compat-tests)
@@ -147,4 +114,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -36,7 +36,7 @@ jobs:
should-run:
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
@@ -62,13 +62,13 @@ jobs:
user_docs_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.user_docs_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
ci_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ci_any_changed }}
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
@@ -76,6 +76,7 @@ jobs:
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
dependencies:
- tfhe/Cargo.toml
@@ -120,9 +121,13 @@ jobs:
- '!tfhe/src/c_api/**'
- 'tfhe/docs/**/**.md'
- README.md
ci:
- .github/**
- ci/**
- name: Aggregate file changes
id: aggregated-changes
# CI files are not included in this aggregator.
if: ( steps.changed-files.outputs.dependencies_any_changed == 'true' ||
steps.changed-files.outputs.csprng_any_changed == 'true' ||
steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
@@ -137,20 +142,13 @@ jobs:
run: |
echo "any_changed=true" >> "$GITHUB_OUTPUT"
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
needs: should-run
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
(github.event_name == 'pull_request_target' && needs.should-run.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_triggering_actor.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -10,31 +10,16 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
# nextest
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
NO_BIG_PARAMS: FALSE
REF: ${{ github.event.pull_request.head.sha || github.sha }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
types: [labeled]
push:
branches:
- main
@@ -43,11 +28,12 @@ jobs:
should-run:
if:
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
(github.event_name == 'pull_request_target' && contains(github.event.label.name, 'approved')) ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
integer_test: ${{ github.event_name == 'workflow_dispatch' ||
steps.changed-files.outputs.integer_any_changed }}
@@ -56,14 +42,14 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
persist-credentials: "false"
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
integer:
- tfhe/Cargo.toml
@@ -75,30 +61,13 @@ jobs:
- tfhe/src/integer/**
- .github/workflows/aws_tfhe_integer_tests.yml
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (unsigned-integer-tests)
needs: [ should-run, check-user-permission ]
needs: should-run
if:
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
outputs:
@@ -119,7 +88,7 @@ jobs:
name: Unsigned integer tests
needs: setup-instance
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
@@ -127,8 +96,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: "false"
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -136,7 +104,7 @@ jobs:
toolchain: stable
- name: Should skip big parameters set
if: github.event_name == 'pull_request_target'
if: github.event_name == 'pull_request'
run: |
echo "NO_BIG_PARAMS=TRUE" >> "${GITHUB_ENV}"
@@ -162,7 +130,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (unsigned-integer-tests)
@@ -186,4 +154,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -10,31 +10,16 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
# nextest
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
NO_BIG_PARAMS: FALSE
REF: ${{ github.event.pull_request.head.sha || github.sha }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
types: [labeled]
push:
branches:
- main
@@ -44,11 +29,11 @@ jobs:
if:
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
((github.event_name == 'pull_request_target' || github.event_name == 'pull_request_target') && contains(github.event.label.name, 'approved')) ||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
integer_test: ${{ github.event_name == 'workflow_dispatch' ||
steps.changed-files.outputs.integer_any_changed }}
@@ -57,14 +42,14 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
persist-credentials: "false"
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
integer:
- tfhe/Cargo.toml
@@ -76,30 +61,13 @@ jobs:
- tfhe/src/integer/**
- .github/workflows/aws_tfhe_signed_integer_tests.yml
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (unsigned-integer-tests)
needs: [ should-run, check-user-permission ]
needs: should-run
if:
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
outputs:
@@ -120,7 +88,7 @@ jobs:
name: Signed integer tests
needs: setup-instance
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
@@ -128,8 +96,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: "false"
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -137,7 +104,7 @@ jobs:
toolchain: stable
- name: Should skip big parameters set
if: github.event_name == 'pull_request_target'
if: github.event_name == 'pull_request'
run: |
echo "NO_BIG_PARAMS=TRUE" >> "${GITHUB_ENV}"
@@ -167,7 +134,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (signed-integer-tests)
@@ -191,4 +158,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -10,28 +10,13 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
schedule:
# Nightly tests @ 1AM after each work day
- cron: "0 1 * * MON-FRI"
@@ -42,7 +27,7 @@ jobs:
if: github.event_name != 'schedule' ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
permissions:
pull-requests: read
pull-requests: write
outputs:
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
@@ -78,14 +63,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
dependencies:
- tfhe/Cargo.toml
@@ -147,28 +131,11 @@ jobs:
run: |
echo "any_changed=true" >> "$GITHUB_OUTPUT"
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (cpu-tests)
if: github.event_name != 'pull_request_target' ||
if: github.event_name != 'pull_request' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.any_file_changed == 'true')
needs: [ should-run, check-user-permission ]
needs: should-run
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
@@ -186,11 +153,11 @@ jobs:
cpu-tests:
name: CPU tests
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
needs: [ should-run, setup-instance ]
concurrency:
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
@@ -198,8 +165,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -274,7 +240,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cpu-tests)
@@ -298,4 +264,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -10,49 +10,16 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
jobs:
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (wasm-tests)
needs: check-user-permission
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
runs-on: ubuntu-latest
outputs:
@@ -73,7 +40,7 @@ jobs:
name: WASM tests
needs: setup-instance
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
@@ -81,8 +48,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -143,7 +109,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (wasm-tests)
@@ -167,4 +133,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -51,8 +51,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Get benchmark details
run: |
@@ -104,8 +103,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash

View File

@@ -50,8 +50,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Get benchmark details
run: |
@@ -95,8 +94,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash

View File

@@ -52,8 +52,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Get benchmark details
run: |
@@ -73,8 +72,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Run benchmarks
run: |

View File

@@ -11,53 +11,20 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
FAST_BENCH: TRUE
REF: ${{ github.event.pull_request.head.sha || github.sha }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
types: [labeled]
schedule:
# Weekly benchmarks will be triggered each Friday at 9p.m.
- cron: "0 21 * * 5"
jobs:
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
cuda-integer-benchmarks:
name: Cuda integer benchmarks (RTX 4090)
needs: check-user-permission
if: ${{ github.event_name == 'workflow_dispatch' ||
github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs' ||
contains(github.event.label.name, '4090_bench') }}
@@ -66,14 +33,16 @@ jobs:
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ["self-hosted", "4090-desktop"]
timeout-minutes: 1440 # 24 hours
strategy:
fail-fast: false
max-parallel: 1
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Get benchmark details
run: |
@@ -94,8 +63,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Run integer benchmarks
run: |
@@ -131,7 +99,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
cuda-core-crypto-benchmarks:
name: Cuda core crypto benchmarks (RTX 4090)
@@ -148,8 +116,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Get benchmark details
run: |
@@ -169,8 +136,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Run core crypto benchmarks
run: |
@@ -216,11 +182,11 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
remove_github_label:
name: Remove 4090 bench label
if: ${{ always() && github.event_name == 'pull_request_target' }}
if: ${{ always() && github.event_name == 'pull_request' }}
needs: [cuda-integer-benchmarks, cuda-core-crypto-benchmarks]
runs-on: ubuntu-latest
steps:

View File

@@ -53,8 +53,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -109,8 +108,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash

View File

@@ -14,7 +14,7 @@ on:
type: string
required: true
secrets:
REPO_CHECKOUT_TOKEN:
FHE_ACTIONS_TOKEN:
required: true
SLAB_ACTION_TOKEN:
required: true
@@ -80,8 +80,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -135,8 +134,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash

View File

@@ -26,7 +26,7 @@ on:
type: boolean
default: false
secrets:
REPO_CHECKOUT_TOKEN:
FHE_ACTIONS_TOKEN:
required: true
SLAB_ACTION_TOKEN:
required: true
@@ -150,8 +150,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -211,8 +210,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash

View File

@@ -119,8 +119,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Get benchmark details
run: |
@@ -140,8 +139,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Should run benchmarks with all precisions
if: inputs.all_precisions

View File

@@ -82,8 +82,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Get benchmark details
run: |
@@ -103,8 +102,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Run benchmarks with AVX512
run: |

View File

@@ -119,8 +119,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Get benchmark details
run: |
@@ -140,8 +139,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Should run benchmarks with all precisions
if: inputs.all_precisions

View File

@@ -94,8 +94,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash

View File

@@ -94,8 +94,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash

View File

@@ -38,6 +38,7 @@ jobs:
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
zk_pok:
- tfhe-zk-pok/**
@@ -79,8 +80,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Get benchmark details
run: |
@@ -100,8 +100,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Run benchmarks
run: |
@@ -132,8 +131,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash

View File

@@ -28,7 +28,7 @@ jobs:
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
permissions:
pull-requests: read
pull-requests: write
outputs:
wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
steps:
@@ -36,13 +36,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
wasm_bench:
- tfhe/Cargo.toml
@@ -88,8 +88,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Get benchmark details
run: |
@@ -177,8 +176,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash

View File

@@ -43,13 +43,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
zk_pok:
- tfhe/Cargo.toml
@@ -130,8 +130,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Get benchmark details
run: |
@@ -151,8 +150,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Run benchmarks with AVX512
run: |
@@ -189,8 +187,7 @@ jobs:
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash

View File

@@ -1,39 +0,0 @@
# Check if an actor is a collaborator and has write access
name: Check Actor Permissions
on:
workflow_call:
inputs:
username:
type: string
default: ${{ github.triggering_actor }}
outputs:
is_authorized:
value: ${{ jobs.check-actor-permission.outputs.actor_authorized }}
secrets:
TOKEN:
required: true
jobs:
check-actor-permission:
runs-on: ubuntu-latest
outputs:
actor_authorized: ${{ steps.check-access.outputs.require-result }}
steps:
- name: Get User Permission
id: check-access
uses: actions-cool/check-user-permission@7b90a27f92f3961b368376107661682c441f6103 # v2.3.0
with:
require: write
username: ${{ inputs.username }}
env:
GITHUB_TOKEN: ${{ secrets.TOKEN }}
- name: Check User Permission
if: ${{ !(inputs.username == 'dependabot[bot]' || inputs.username == 'cla-bot[bot]') &&
steps.check-access.outputs.require-result == 'false' }}
run: |
echo "${{ inputs.username }} does not have permissions on this repo."
echo "Current permission level is ${{ steps.check-access.outputs.user-permission }}"
echo "Job originally triggered by ${{ github.actor }}"
exit 1

View File

@@ -1,40 +0,0 @@
# Check if there is any change in CI files since last commit
name: Check changes in CI files
on:
workflow_call:
inputs:
checkout_ref:
type: string
required: true
outputs:
ci_file_changed:
value: ${{ jobs.check-changes.outputs.ci_file_changed }}
secrets:
REPO_CHECKOUT_TOKEN:
required: true
jobs:
check-changes:
runs-on: ubuntu-latest
permissions:
pull-requests: read
outputs:
ci_file_changed: ${{ steps.changed-files.outputs.ci_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ inputs.checkout_ref }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
files_yaml: |
ci:
- .github/**
- ci/**

View File

@@ -2,7 +2,6 @@
name: Check commit and PR compliance
on:
pull_request:
jobs:
check-commit-pr:
name: Check commit and PR

View File

@@ -1,32 +0,0 @@
# Check if a pull request fulfill pre-conditions to be accepted
name: Check PR from fork
on:
pull_request_target:
paths:
- '.github/**'
- 'ci/**'
jobs:
# Fail if the triggering actor is not part of Zama organization.
check-user-permission:
name: Check event user permissions
uses: ./.github/workflows/check_actor_permissions.yml
with:
username: ${{ github.event.pull_request.user.login }}
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
write-comment:
name: Write PR comment
if: ${{ always() && needs.check-user-permission.outputs.is_authorized == 'false' }}
needs: check-user-permission
runs-on: ubuntu-latest
permissions:
pull-requests: write
steps:
- name: Write warning
uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b
with:
message: |
CI files have changed. Only Zama organization members are authorized to modify these files.

View File

@@ -0,0 +1,29 @@
# Check if triggering actor is a collaborator and has write access
name: Check Triggering Actor
on:
workflow_call:
secrets:
TOKEN:
required: true
jobs:
check-actor-permission:
runs-on: ubuntu-latest
steps:
- name: Get User Permission
id: check-access
uses: actions-cool/check-user-permission@7b90a27f92f3961b368376107661682c441f6103 # v2.3.0
with:
require: write
username: ${{ github.triggering_actor }}
env:
GITHUB_TOKEN: ${{ secrets.TOKEN }}
- name: Check User Permission
if: steps.check-access.outputs.require-result == 'false'
run: |
echo "${{ github.triggering_actor }} does not have permissions on this repo."
echo "Current permission level is ${{ steps.check-access.outputs.user-permission }}"
echo "Job originally triggered by ${{ github.actor }}"
exit 1

View File

@@ -14,9 +14,6 @@ jobs:
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Get actionlint
run: |
@@ -34,4 +31,3 @@ jobs:
with:
allowlist: |
slsa-framework/slsa-github-generator
./

View File

@@ -83,7 +83,7 @@ jobs:
make test_shortint_cov
- name: Upload tfhe coverage to Codecov
uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
with:
token: ${{ secrets.CODECOV_TOKEN }}
@@ -97,7 +97,7 @@ jobs:
make test_integer_cov
- name: Upload tfhe coverage to Codecov
uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
with:
token: ${{ secrets.CODECOV_TOKEN }}

View File

@@ -10,49 +10,16 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
jobs:
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (csprng-randomness-tests)
needs: check-user-permission
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
runs-on: ubuntu-latest
outputs:
@@ -73,7 +40,7 @@ jobs:
name: CSPRNG randomness tests
needs: setup-instance
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
@@ -81,8 +48,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -99,7 +65,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (csprng-randomness-tests)
@@ -123,4 +89,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -8,8 +8,6 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
PR_BRANCH: ${{ github.head_ref || github.ref_name }}
CLOSE_TYPE: ${{ github.event.pull_request.merged && 'merge' || 'close' }}
@@ -17,8 +15,6 @@ env:
on:
pull_request:
types: [ closed ]
pull_request_target:
types: [ closed ]
# The same pattern is used for jobs that use the github api:
# - save the result of the API call in the env var "GH_API_RES". Since the var is multiline

View File

@@ -1,5 +1,5 @@
# Compile and test tfhe-cuda-backend on an RTX 4090 machine
name: Cuda - 4090 full tests
name: TFHE Cuda Backend - 4090 full tests
env:
CARGO_TERM_COLOR: always
@@ -11,57 +11,24 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
schedule:
# Nightly tests @ 1AM after each work day
- cron: "0 1 * * MON-FRI"
jobs:
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
cuda-tests-linux:
name: CUDA tests (RTX 4090)
needs: check-user-permission
if: github.event_name == 'workflow_dispatch' ||
contains(github.event.label.name, '4090_test') ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: true
runs-on: ["self-hosted", "4090-desktop"]
@@ -70,8 +37,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -103,7 +69,7 @@ jobs:
make test_high_level_api_gpu
- uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
if: ${{ always() && github.event_name == 'pull_request_target' }}
if: ${{ always() && github.event_name == 'pull_request' }}
with:
labels: 4090_test
github_token: ${{ secrets.GITHUB_TOKEN }}
@@ -114,4 +80,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,5 +1,5 @@
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
name: Cuda - Fast tests on H100
name: TFHE Cuda Backend - Fast tests on H100
env:
CARGO_TERM_COLOR: always
@@ -11,34 +11,19 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
types: [ labeled ]
jobs:
should-run:
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
@@ -46,14 +31,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
gpu:
- tfhe/Cargo.toml
@@ -72,27 +56,10 @@ jobs:
- scripts/integer-tests.sh
- ci/slab.toml
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (cuda-h100-tests)
needs: [ should-run, check-user-permission ]
if: github.event_name != 'pull_request_target' ||
needs: should-run
if: github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
runs-on: ubuntu-latest
@@ -113,10 +80,10 @@ jobs:
cuda-tests-linux:
name: CUDA H100 tests
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
@@ -132,8 +99,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -179,7 +145,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-h100-tests)
@@ -203,4 +169,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,5 +1,5 @@
# Compile and test tfhe-cuda-backend on an AWS instance
name: Cuda - Fast tests
name: TFHE Cuda Backend - Fast tests
env:
CARGO_TERM_COLOR: always
@@ -11,32 +11,18 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
paths:
- '**'
- '!.github/**'
- '!ci/**'
jobs:
should-run:
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
@@ -44,14 +30,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
gpu:
- tfhe/Cargo.toml
@@ -70,27 +55,10 @@ jobs:
- scripts/integer-tests.sh
- ci/slab.toml
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (cuda-tests)
needs: [ should-run, check-user-permission ]
if: github.event_name == 'workflow_dispatch' ||
needs: should-run
if: github.event_name != 'pull_request' ||
needs.should-run.outputs.gpu_test == 'true'
runs-on: ubuntu-latest
outputs:
@@ -110,10 +78,10 @@ jobs:
cuda-tests-linux:
name: CUDA tests
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
@@ -129,8 +97,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -176,7 +143,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-tests)
@@ -200,4 +167,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,5 +1,5 @@
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
name: Cuda - Full tests on H100
name: TFHE Cuda Backend - Full tests on H100
env:
CARGO_TERM_COLOR: always
@@ -11,6 +11,7 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
workflow_dispatch:
@@ -65,7 +66,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -109,7 +110,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-h100-tests)
@@ -132,4 +133,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,5 +1,5 @@
# Compile and test tfhe-cuda-backend on an AWS instance
name: Cuda - Full tests multi-GPU
name: TFHE Cuda Backend - Full tests multi-GPU
env:
CARGO_TERM_COLOR: always
@@ -11,34 +11,19 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
jobs:
should-run:
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
@@ -46,14 +31,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
gpu:
- tfhe/Cargo.toml
@@ -72,27 +56,10 @@ jobs:
- scripts/integer-tests.sh
- ci/slab.toml
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (cuda-tests-multi-gpu)
needs: [ should-run, check-user-permission ]
if: github.event_name != 'pull_request_target' ||
needs: should-run
if: github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
runs-on: ubuntu-latest
@@ -113,10 +80,10 @@ jobs:
cuda-tests-linux:
name: CUDA multi-GPU tests
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
@@ -132,8 +99,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -182,7 +148,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-tests-multi-gpu)
@@ -206,4 +172,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,5 +1,5 @@
# Perfom tfhe-cuda-backend post-commit checks on an AWS instance
name: Cuda - Post-commit Checks
name: TFHE Cuda Backend - Post-commit Checks
env:
CARGO_TERM_COLOR: always
@@ -11,45 +11,13 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
on:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
paths:
- '**'
- '!.github/**'
- '!ci/**'
jobs:
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (cuda-pcc)
needs: check-user-permission
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
@@ -69,7 +37,7 @@ jobs:
name: CUDA post-commit checks
needs: setup-instance
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
@@ -88,8 +56,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Set up home
run: |
@@ -133,7 +100,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-pcc)
@@ -157,4 +124,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,5 +1,5 @@
# Signed integer GPU tests on an RTXA6000 VM on hyperstack with classical PBS
name: Cuda - Signed integer tests with classical PBS
name: TFHE Cuda Backend - Signed integer tests with classical PBS
env:
CARGO_TERM_COLOR: always
@@ -11,34 +11,19 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
types: [ labeled ]
jobs:
should-run:
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
@@ -46,14 +31,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
gpu:
- tfhe/Cargo.toml
@@ -72,27 +56,10 @@ jobs:
- scripts/integer-tests.sh
- ci/slab.toml
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (cuda-signed-classic-tests)
needs: [ should-run, check-user-permission ]
if: github.event_name != 'pull_request_target' ||
needs: should-run
if: github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
runs-on: ubuntu-latest
@@ -113,10 +80,10 @@ jobs:
cuda-tests-linux:
name: CUDA signed integer tests with classical PBS
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
@@ -130,10 +97,6 @@ jobs:
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -165,7 +128,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-signed-classic-tests)
@@ -189,4 +152,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,5 +1,5 @@
# Signed integer GPU tests on an H100 VM on hyperstack
name: Cuda - Signed integer tests on H100
name: TFHE Cuda Backend - Signed integer tests on H100
env:
CARGO_TERM_COLOR: always
@@ -11,34 +11,19 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
types: [ labeled ]
jobs:
should-run:
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
@@ -46,14 +31,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
gpu:
- tfhe/Cargo.toml
@@ -72,27 +56,10 @@ jobs:
- scripts/integer-tests.sh
- ci/slab.toml
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (cuda-h100-tests)
needs: [ should-run, check-user-permission ]
if: github.event_name != 'pull_request_target' ||
needs: should-run
if: github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
runs-on: ubuntu-latest
@@ -113,10 +80,10 @@ jobs:
cuda-tests-linux:
name: CUDA H100 signed integer tests
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
@@ -130,10 +97,6 @@ jobs:
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -165,7 +128,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-h100-tests)
@@ -189,4 +152,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,5 +1,5 @@
# Compile and test tfhe-cuda-backend signed integer on an AWS instance
name: Cuda - Signed integer tests
name: TFHE Cuda Backend - Signed integer tests
env:
CARGO_TERM_COLOR: always
@@ -11,28 +11,17 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
FAST_TESTS: TRUE
NIGHTLY_TESTS: FALSE
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
paths:
- '**'
- '!.github/**'
- '!ci/**'
types:
- opened
- synchronize
schedule:
# Nightly tests @ 1AM after each work day
- cron: "0 1 * * MON-FRI"
@@ -41,7 +30,7 @@ jobs:
should-run:
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
@@ -49,14 +38,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
gpu:
- tfhe/Cargo.toml
@@ -75,27 +63,10 @@ jobs:
- scripts/integer-tests.sh
- ci/slab.toml
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (cuda-signed-integer-tests)
runs-on: ubuntu-latest
needs: [ should-run, check-user-permission ]
needs: should-run
if: (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
github.event_name == 'workflow_dispatch' ||
needs.should-run.outputs.gpu_test == 'true'
@@ -116,10 +87,10 @@ jobs:
cuda-signed-integer-tests:
name: CUDA signed integer tests
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
@@ -135,8 +106,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -176,7 +146,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-signed-integer-tests.result }}
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-tests)
@@ -200,4 +170,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,5 +1,5 @@
# Test unsigned integers on an RTXA6000 VM on hyperstack with the classical PBS
name: Cuda - Unsigned integer tests with classical PBS
name: TFHE Cuda Backend - Unsigned integer tests with classical PBS
env:
CARGO_TERM_COLOR: always
@@ -11,34 +11,19 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
types: [ labeled ]
jobs:
should-run:
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
@@ -46,14 +31,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
gpu:
- tfhe/Cargo.toml
@@ -72,27 +56,10 @@ jobs:
- scripts/integer-tests.sh
- ci/slab.toml
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (cuda-unsigned-classic-tests)
needs: [ should-run, check-user-permission ]
if: github.event_name == 'workflow_dispatch' ||
needs: should-run
if: github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
runs-on: ubuntu-latest
@@ -113,10 +80,10 @@ jobs:
cuda-tests-linux:
name: CUDA unsigned integer tests with classical PBS
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
@@ -130,10 +97,6 @@ jobs:
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -165,7 +128,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-unsigned-classic-tests)
@@ -189,4 +152,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,5 +1,5 @@
# Test unsigned integers on an H100 VM on hyperstack
name: Cuda - Unsigned integer tests on H100
name: TFHE Cuda Backend - Unsigned integer tests on H100
env:
CARGO_TERM_COLOR: always
@@ -11,34 +11,19 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
types: [ labeled ]
jobs:
should-run:
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
@@ -46,14 +31,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
gpu:
- tfhe/Cargo.toml
@@ -72,27 +56,10 @@ jobs:
- scripts/integer-tests.sh
- ci/slab.toml
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (cuda-h100-tests)
needs: [ should-run, check-user-permission ]
if: github.event_name == 'workflow_dispatch' ||
needs: should-run
if: github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
runs-on: ubuntu-latest
@@ -113,10 +80,10 @@ jobs:
cuda-tests-linux:
name: CUDA H100 unsigned integer tests
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
@@ -130,10 +97,6 @@ jobs:
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -165,7 +128,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-h100-tests)
@@ -189,4 +152,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -1,5 +1,5 @@
# Compile and test tfhe-cuda-backend unsigned integer on an AWS instance
name: Cuda - Unsigned integer tests
name: TFHE Cuda Backend - Unsigned integer tests
env:
CARGO_TERM_COLOR: always
@@ -11,29 +11,16 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
FAST_TESTS: TRUE
NIGHTLY_TESTS: FALSE
REF: ${{ github.event.pull_request.head.sha || github.sha }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
types:
- opened
- synchronize
schedule:
# Nightly tests @ 1AM after each work day
- cron: "0 1 * * MON-FRI"
@@ -42,7 +29,7 @@ jobs:
should-run:
runs-on: ubuntu-latest
permissions:
pull-requests: read
pull-requests: write
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
@@ -50,14 +37,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
gpu:
- tfhe/Cargo.toml
@@ -76,26 +62,9 @@ jobs:
- scripts/integer-tests.sh
- ci/slab.toml
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (cuda-unsigned-integer-tests)
needs: [ should-run, check-user-permission ]
needs: should-run
if: (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
github.event_name == 'workflow_dispatch' ||
needs.should-run.outputs.gpu_test == 'true'
@@ -117,10 +86,10 @@ jobs:
cuda-unsigned-integer-tests:
name: CUDA unsigned integer tests
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
@@ -134,10 +103,6 @@ jobs:
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
@@ -177,7 +142,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-unsigned-integer-tests.result }}
SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (cuda-tests)
@@ -201,4 +166,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -51,7 +51,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203

View File

@@ -2,20 +2,8 @@ name: Tests on M1 CPU
on:
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
types: [ labeled ]
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
types: [ labeled ]
paths:
- '**'
- '!.github/**'
- '!ci/**'
types: [labeled]
# Have a nightly build for M1 tests
schedule:
# * is a special character in YAML so you have to quote this string
@@ -33,35 +21,14 @@ env:
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
# nextest
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
REF: ${{ github.event.pull_request.head.sha || github.sha }}
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}-${{ github.head_ref }}
cancel-in-progress: true
jobs:
check-ci-files:
uses: ./.github/workflows/check_ci_files_change.yml
with:
checkout_ref: ${{ github.event.pull_request.head.sha || github.sha }}
secrets:
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: check-ci-files
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.check-ci-files.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_actor_permissions.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
cargo-builds-m1:
needs: check-user-permission
if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
github.event_name == 'workflow_dispatch' ||
contains(github.event.label.name, 'm1_test') }}
if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'm1_test') }}
runs-on: ["self-hosted", "m1mac"]
# 12 hours, default is 6 hours, hopefully this is more than enough
timeout-minutes: 720
@@ -70,8 +37,6 @@ jobs:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: "false"
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -213,7 +178,7 @@ jobs:
if: ${{ always() }}
steps:
- uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
if: ${{ github.event_name == 'pull_request_target' }}
if: ${{ github.event_name == 'pull_request' }}
with:
labels: m1_test
github_token: ${{ secrets.GITHUB_TOKEN }}
@@ -226,8 +191,6 @@ jobs:
SLACK_COLOR: ${{ needs.cargo-builds-m1.result }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }}. (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}

View File

@@ -46,8 +46,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Prepare package
run: |
cargo package -p tfhe
@@ -83,8 +82,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Create NPM version tag
if: ${{ inputs.npm_latest_tag }}
run: |

View File

@@ -61,8 +61,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Set up home
run: |

View File

@@ -30,7 +30,7 @@ jobs:
- name: Prepare package
run: |
cargo package -p tfhe-csprng
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: crate-tfhe-csprng
path: target/package/*.crate

View File

@@ -33,7 +33,7 @@ jobs:
- name: Prepare package
run: |
cargo package -p tfhe-fft
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
with:
name: crate
path: target/package/*.crate

View File

@@ -33,7 +33,7 @@ jobs:
- name: Prepare package
run: |
cargo package -p tfhe-ntt
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
with:
name: crate
path: target/package/*.crate

View File

@@ -30,7 +30,7 @@ jobs:
- name: Prepare package
run: |
cargo package -p tfhe-versionable-derive
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: crate-tfhe-versionable-derive
path: target/package/*.crate
@@ -61,8 +61,6 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Download artifact
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
@@ -105,13 +103,13 @@ jobs:
hash: ${{ steps.hash.outputs.hash }}
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
with:
fetch-depth: 0
- name: Prepare package
run: |
cargo package -p tfhe-versionable
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: crate-tfhe-versionable
path: target/package/*.crate
@@ -139,7 +137,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
with:
fetch-depth: 0
- name: Download artifact

View File

@@ -24,7 +24,7 @@ jobs:
- name: Prepare package
run: |
cargo package -p tfhe-zk-pok
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: crate-zk-pok
path: target/package/*.crate
@@ -61,8 +61,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Download artifact
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:

View File

@@ -16,8 +16,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: git-sync
uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
with:

View File

@@ -20,7 +20,7 @@ BENCH_OP_FLAVOR?=DEFAULT
BENCH_TYPE?=latency
NODE_VERSION=22.6
BACKWARD_COMPAT_DATA_URL=https://github.com/zama-ai/tfhe-backward-compat-data.git
BACKWARD_COMPAT_DATA_BRANCH?=$(shell ./scripts/backward_compat_data_version.py)
BACKWARD_COMPAT_DATA_BRANCH?=v0.5
BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
TFHE_SPEC:=tfhe
@@ -282,14 +282,14 @@ check_typos: install_typos_checker
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
clippy_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean,shortint,integer,internal-keycache,gpu \
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
--all-targets \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
check_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
--features=boolean,shortint,integer,internal-keycache,gpu \
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
--all-targets \
-p $(TFHE_SPEC)
@@ -394,10 +394,10 @@ clippy_trivium: install_rs_check_toolchain
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
clippy_all_targets: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats,experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
@@ -1056,35 +1056,35 @@ bench_integer: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_signed_integer # Run benchmarks for signed integer
bench_signed_integer: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-signed-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
bench_integer_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
bench_integer_compression: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench glwe_packing_compression-integer-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_integer_compression_gpu
bench_integer_compression_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench glwe_packing_compression-integer-bench \
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --
--features=integer,internal-keycache,gpu,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
bench_integer_multi_bit: install_rs_check_toolchain
@@ -1092,7 +1092,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
bench_signed_integer_multi_bit: install_rs_check_toolchain
@@ -1100,7 +1100,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-signed-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
bench_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1108,7 +1108,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1116,14 +1116,14 @@ bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) -- ::unsigned
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
bench_integer_zk: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench zk-pke-bench \
--features=integer,internal-keycache,zk-pok,nightly-avx512 \
--features=integer,internal-keycache,zk-pok,nightly-avx512,pbs-stats \
-p $(TFHE_SPEC) --
.PHONY: bench_shortint # Run benchmarks for shortint

View File

@@ -27,8 +27,6 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
std::abort(); \
}
void cuda_set_device(uint32_t gpu_index);
cudaEvent_t cuda_create_event(uint32_t gpu_index);
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,

View File

@@ -112,8 +112,6 @@ template <typename Torus> struct int_decompression {
generate_device_accumulator_with_encoding<Torus>(
streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
decompression_rescale_lut->get_degree(0),
decompression_rescale_lut->get_max_degree(0),
encryption_params.glwe_dimension, encryption_params.polynomial_size,
effective_compression_message_modulus,
effective_compression_carry_modulus,

View File

@@ -38,15 +38,6 @@ enum SIGNED_OPERATION { ADDITION = 1, SUBTRACTION = -1 };
enum outputFlag { FLAG_NONE = 0, FLAG_OVERFLOW = 1, FLAG_CARRY = 2 };
extern "C" {
typedef struct {
void *ptr;
uint64_t *degrees;
uint64_t *noise_levels;
uint32_t num_radix_blocks;
uint32_t lwe_dimension;
} CudaRadixCiphertextFFI;
void scratch_cuda_apply_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
@@ -54,7 +45,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint64_t lut_degree, bool allocate_gpu_memory);
bool allocate_gpu_memory);
void scratch_cuda_apply_many_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
@@ -62,12 +53,13 @@ void scratch_cuda_apply_many_univariate_lut_kb_64(
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory);
void cuda_apply_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *output_radix_lwe,
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
void *const *ksks, void *const *bsks);
uint32_t num_many_lut, bool allocate_gpu_memory);
void cuda_apply_univariate_lut_kb_64(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count, void *output_radix_lwe,
void const *input_radix_lwe,
int8_t *mem_ptr, void *const *ksks,
void *const *bsks, uint32_t num_blocks);
void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
uint32_t const *gpu_indexes,
@@ -81,14 +73,13 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint64_t lut_degree, bool allocate_gpu_memory);
bool allocate_gpu_memory);
void cuda_apply_bivariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *output_radix_lwe,
CudaRadixCiphertextFFI const *input_radix_lwe_1,
CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
void *const *ksks, void *const *bsks, uint32_t shift);
void *output_radix_lwe, void const *input_radix_lwe_1,
void const *input_radix_lwe_2, int8_t *mem_ptr, void *const *ksks,
void *const *bsks, uint32_t num_blocks, uint32_t shift);
void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
uint32_t const *gpu_indexes,
@@ -97,10 +88,9 @@ void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
void cuda_apply_many_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *output_radix_lwe,
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
void *const *ksks, void *const *bsks, uint32_t num_luts,
uint32_t lut_stride);
void *output_radix_lwe, void const *input_radix_lwe, int8_t *mem_ptr,
void *const *ksks, void *const *bsks, uint32_t num_blocks,
uint32_t num_luts, uint32_t lut_stride);
void scratch_cuda_full_propagation_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -142,14 +132,15 @@ void cleanup_cuda_integer_mult(void *const *streams,
void cuda_negate_integer_radix_ciphertext_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_in, uint32_t message_modulus,
void *lwe_array_out, void const *lwe_array_in, uint32_t lwe_dimension,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus);
void cuda_scalar_addition_integer_radix_ciphertext_64_inplace(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array, void const *scalar_input,
uint32_t num_scalars, uint32_t message_modulus, uint32_t carry_modulus);
void *lwe_array, void const *scalar_input, uint32_t lwe_dimension,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus);
void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -198,8 +189,8 @@ void scratch_cuda_integer_radix_shift_and_rotate_kb_64(
void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI const *lwe_shift,
int8_t *mem_ptr, void *const *bsks, void *const *ksks);
void *lwe_array, void const *lwe_shift, int8_t *mem_ptr, void *const *bsks,
void *const *ksks, uint32_t num_blocks);
void cleanup_cuda_integer_radix_shift_and_rotate(void *const *streams,
uint32_t const *gpu_indexes,
@@ -242,17 +233,15 @@ void scratch_cuda_integer_radix_bitop_kb_64(
void cuda_bitop_integer_radix_ciphertext_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_1,
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
void *const *bsks, void *const *ksks);
void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
uint32_t lwe_ciphertext_count);
void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
void *lwe_array_out, void const *lwe_array_input, void const *clear_blocks,
uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
void *const *ksks);
void *const *ksks, uint32_t lwe_ciphertext_count, BITOP_TYPE op);
void cleanup_cuda_integer_bitop(void *const *streams,
uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -269,11 +258,9 @@ void scratch_cuda_integer_radix_cmux_kb_64(
void cuda_cmux_integer_radix_ciphertext_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_condition,
CudaRadixCiphertextFFI const *lwe_array_true,
CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
void *const *bsks, void *const *ksks);
void *lwe_array_out, void const *lwe_condition, void const *lwe_array_true,
void const *lwe_array_false, int8_t *mem_ptr, void *const *bsks,
void *const *ksks, uint32_t lwe_ciphertext_count);
void cleanup_cuda_integer_radix_cmux(void *const *streams,
uint32_t const *gpu_indexes,
@@ -424,7 +411,7 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint64_t lut_degree, bool allocate_gpu_memory);
bool allocate_gpu_memory);
void cuda_integer_compute_prefix_sum_hillis_steele_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -452,8 +439,8 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
void *const *bsks, void *const *ksks);
void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
void *const *ksks, uint32_t num_blocks);
void cleanup_cuda_integer_abs_inplace(void *const *streams,
uint32_t const *gpu_indexes,

View File

@@ -1,8 +0,0 @@
#ifndef CUDA_RADIX_CIPHERTEXT_H
#define CUDA_RADIX_CIPHERTEXT_H
void release_radix_ciphertext(cudaStream_t const stream,
uint32_t const gpu_index,
CudaRadixCiphertextFFI *data);
#endif

View File

@@ -1,7 +1,6 @@
#ifndef CUDA_LINALG_H_
#define CUDA_LINALG_H_
#include "integer/integer.h"
#include <stdint.h>
extern "C" {
@@ -15,13 +14,16 @@ void cuda_negate_lwe_ciphertext_vector_64(
void const *lwe_array_in, const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_add_lwe_ciphertext_vector_32(
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_in_1,
CudaRadixCiphertextFFI const *lwe_array_in_2);
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in_1, void const *lwe_array_in_2,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_add_lwe_ciphertext_vector_64(
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_in_1,
CudaRadixCiphertextFFI const *lwe_array_in_2);
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in_1, void const *lwe_array_in_2,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, void const *plaintext_array_in,

View File

@@ -5,12 +5,12 @@
template <typename Torus>
bool supports_distributed_shared_memory_on_multibit_programmable_bootstrap(
uint32_t polynomial_size, int max_shared_memory);
uint32_t polynomial_size);
template <typename Torus>
bool has_support_to_cuda_programmable_bootstrap_tbc_multi_bit(
uint32_t num_samples, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t level_count, int max_shared_memory);
uint32_t level_count);
#if CUDA_ARCH >= 900
template <typename Torus>
@@ -114,8 +114,6 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, uint32_t lwe_chunk_size,
PBS_VARIANT pbs_variant, bool allocate_gpu_memory) {
cuda_set_device(gpu_index);
this->pbs_variant = pbs_variant;
this->lwe_chunk_size = lwe_chunk_size;
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);

View File

@@ -61,7 +61,7 @@ get_buffer_size_partial_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {
template <typename Torus>
bool supports_distributed_shared_memory_on_classic_programmable_bootstrap(
uint32_t polynomial_size, int max_shared_memory);
uint32_t polynomial_size);
template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer;
@@ -77,10 +77,10 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
bool allocate_gpu_memory) {
cuda_set_device(gpu_index);
this->pbs_variant = pbs_variant;
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
auto max_shared_memory = cuda_get_max_shared_memory(0);
if (allocate_gpu_memory) {
switch (pbs_variant) {
@@ -157,7 +157,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
bool supports_dsm =
supports_distributed_shared_memory_on_classic_programmable_bootstrap<
Torus>(polynomial_size, max_shared_memory);
Torus>(polynomial_size);
uint64_t full_sm =
get_buffer_size_full_sm_programmable_bootstrap_tbc<Torus>(
@@ -218,7 +218,8 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
template <typename Torus>
uint64_t get_buffer_size_programmable_bootstrap_cg(
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) {
uint32_t input_lwe_ciphertext_count) {
int max_shared_memory = cuda_get_max_shared_memory(0);
uint64_t full_sm =
get_buffer_size_full_sm_programmable_bootstrap_cg<Torus>(polynomial_size);
uint64_t partial_sm =
@@ -244,8 +245,7 @@ template <typename Torus>
bool has_support_to_cuda_programmable_bootstrap_cg(uint32_t glwe_dimension,
uint32_t polynomial_size,
uint32_t level_count,
uint32_t num_samples,
int max_shared_memory);
uint32_t num_samples);
template <typename Torus>
void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(

View File

@@ -8,7 +8,7 @@ extern "C" {
bool has_support_to_cuda_programmable_bootstrap_cg_multi_bit(
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t num_samples, int max_shared_memory);
uint32_t num_samples);
void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64(
void *stream, uint32_t gpu_index, void *dest, void const *src,

View File

@@ -11,7 +11,7 @@ void cuda_convert_lwe_ciphertext_vector_to_gpu(cudaStream_t stream,
uint32_t gpu_index, T *dest,
T *src, uint32_t number_of_cts,
uint32_t lwe_dimension) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
cuda_memcpy_async_to_gpu(dest, src, size, stream, gpu_index);
}
@@ -21,7 +21,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu(cudaStream_t stream,
uint32_t gpu_index, T *dest,
T *src, uint32_t number_of_cts,
uint32_t lwe_dimension) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
cuda_memcpy_async_to_cpu(dest, src, size, stream, gpu_index);
}
@@ -55,7 +55,7 @@ __host__ void host_sample_extract(cudaStream_t stream, uint32_t gpu_index,
Torus const *glwe_array_in,
uint32_t const *nth_array, uint32_t num_nths,
uint32_t glwe_dimension) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
dim3 grid(num_nths);
dim3 thds(params::degree / params::opt);

View File

@@ -261,7 +261,7 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
// Optimization of packing keyswitch when packing many LWEs
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
check_cuda_error(cudaGetLastError());
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;

View File

@@ -57,7 +57,7 @@ void batch_fft_ggsw_vector(cudaStream_t *streams, uint32_t *gpu_indexes,
if (gpu_count != 1)
PANIC("GPU error (batch_fft_ggsw_vector): multi-GPU execution is not "
"supported yet.")
cuda_set_device(gpu_indexes[0]);
cudaSetDevice(gpu_indexes[0]);
int shared_memory_size = sizeof(double) * polynomial_size;

View File

@@ -105,7 +105,7 @@ __host__ void host_keyswitch_lwe_ciphertext_vector(
uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
uint32_t num_samples) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
constexpr int num_threads_y = 32;
int num_blocks, num_threads_x;
@@ -160,7 +160,7 @@ __host__ void scratch_packing_keyswitch_lwe_list_to_glwe(
cudaStream_t stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t num_lwes, bool allocate_gpu_memory) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;

View File

@@ -110,7 +110,7 @@ template <typename Torus>
__host__ void host_modulus_switch_inplace(cudaStream_t stream,
uint32_t gpu_index, Torus *array,
int size, uint32_t log_modulus) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
int num_threads = 0, num_blocks = 0;
getNumBlocksAndThreads(size, 1024, num_blocks, num_threads);

View File

@@ -2,12 +2,8 @@
#include <cstdint>
#include <cuda_runtime.h>
void cuda_set_device(uint32_t gpu_index) {
check_cuda_error(cudaSetDevice(gpu_index));
}
cudaEvent_t cuda_create_event(uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
cudaEvent_t event;
check_cuda_error(cudaEventCreate(&event));
return event;
@@ -15,24 +11,24 @@ cudaEvent_t cuda_create_event(uint32_t gpu_index) {
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
check_cuda_error(cudaEventRecord(event, stream));
}
void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
check_cuda_error(cudaStreamWaitEvent(stream, event, 0));
}
void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
check_cuda_error(cudaEventDestroy(event));
}
/// Unsafe function to create a CUDA stream, must check first that GPU exists
cudaStream_t cuda_create_stream(uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
cudaStream_t stream;
check_cuda_error(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
return stream;
@@ -40,22 +36,15 @@ cudaStream_t cuda_create_stream(uint32_t gpu_index) {
/// Unsafe function to destroy CUDA stream, must check first the GPU exists
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
check_cuda_error(cudaStreamDestroy(stream));
}
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
check_cuda_error(cudaStreamSynchronize(stream));
}
void synchronize_streams(cudaStream_t const *streams,
uint32_t const *gpu_indexes, uint32_t gpu_count) {
for (uint i = 0; i < gpu_count; i++) {
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
}
}
// Determine if a CUDA device is available at runtime
uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
@@ -63,7 +52,7 @@ uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
/// or if there's not enough memory. A safe wrapper around it must call
/// cuda_check_valid_malloc() first
void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
void *ptr;
check_cuda_error(cudaMalloc((void **)&ptr, size));
@@ -74,7 +63,7 @@ void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
/// asynchronously.
void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
void *ptr;
#ifndef CUDART_VERSION
@@ -97,7 +86,7 @@ void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
/// Check that allocation is valid
void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
size_t total_mem, free_mem;
check_cuda_error(cudaMemGetInfo(&free_mem, &total_mem));
if (size > free_mem) {
@@ -145,7 +134,7 @@ void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
PANIC("Cuda error: invalid device pointer in async copy to GPU.")
}
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
check_cuda_error(
cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream));
}
@@ -165,7 +154,7 @@ void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
if (attr_src.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
}
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
if (attr_src.device == attr_dest.device) {
check_cuda_error(
cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice, stream));
@@ -190,7 +179,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,
if (attr_src.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
}
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
if (attr_src.device == attr_dest.device) {
check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToDevice));
} else {
@@ -201,7 +190,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,
/// Synchronizes device
void cuda_synchronize_device(uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
check_cuda_error(cudaDeviceSynchronize());
}
@@ -214,7 +203,7 @@ void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid dest device pointer in cuda memset.")
}
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
check_cuda_error(cudaMemsetAsync(dest, val, size, stream));
}
@@ -234,7 +223,7 @@ void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
if (attr.type != cudaMemoryTypeDevice) {
PANIC("Cuda error: invalid dest device pointer in cuda set value.")
}
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
int block_size = 256;
int num_blocks = (n + block_size - 1) / block_size;
@@ -264,7 +253,7 @@ void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
}
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
check_cuda_error(
cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream));
}
@@ -278,14 +267,14 @@ int cuda_get_number_of_gpus() {
/// Drop a cuda array
void cuda_drop(void *ptr, uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
check_cuda_error(cudaFree(ptr));
}
/// Drop a cuda array asynchronously, if supported on the device
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index) {
cuda_set_device(gpu_index);
check_cuda_error(cudaSetDevice(gpu_index));
#ifndef CUDART_VERSION
#error CUDART_VERSION Undefined!
#elif (CUDART_VERSION >= 11020)

View File

@@ -22,14 +22,15 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
void *const *bsks, void *const *ksks) {
void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
void *const *ksks, uint32_t num_blocks) {
auto mem = (int_abs_buffer<uint64_t> *)mem_ptr;
host_integer_abs_kb<uint64_t>((cudaStream_t *)(streams), gpu_indexes,
gpu_count, ct, bsks, (uint64_t **)(ksks), mem,
is_signed);
gpu_count, static_cast<uint64_t *>(ct), bsks,
(uint64_t **)(ksks), mem, is_signed,
num_blocks);
}
void cleanup_cuda_integer_abs_inplace(void *const *streams,

View File

@@ -2,12 +2,15 @@
#define TFHE_RS_ABS_CUH
#include "crypto/keyswitch.cuh"
#include "device.h"
#include "integer/bitwise_ops.cuh"
#include "integer/comparison.cuh"
#include "integer/integer.cuh"
#include "integer/integer_utilities.h"
#include "integer/negation.cuh"
#include "integer/scalar_shifts.cuh"
#include "radix_ciphertext.cuh"
#include "linear_algebra.h"
#include "pbs/programmable_bootstrap.h"
#include "utils/helper.cuh"
#include "utils/kernel_dimensions.cuh"
#include <fstream>
@@ -29,15 +32,16 @@ __host__ void scratch_cuda_integer_abs_kb(
}
template <typename Torus>
__host__ void legacy_host_integer_abs_kb_async(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *ct, void *const *bsks, uint64_t *const *ksks,
int_abs_buffer<uint64_t> *mem_ptr, bool is_signed, uint32_t num_blocks) {
__host__ void
host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *ct, void *const *bsks,
uint64_t *const *ksks, int_abs_buffer<uint64_t> *mem_ptr,
bool is_signed, uint32_t num_blocks) {
if (!is_signed)
return;
auto radix_params = mem_ptr->params;
auto mask = (Torus *)(mem_ptr->mask->ptr);
auto mask = mem_ptr->mask;
auto big_lwe_dimension = radix_params.big_lwe_dimension;
auto big_lwe_size = big_lwe_dimension + 1;
@@ -48,11 +52,11 @@ __host__ void legacy_host_integer_abs_kb_async(
cuda_memcpy_async_gpu_to_gpu(mask, ct, num_blocks * big_lwe_size_bytes,
streams[0], gpu_indexes[0]);
host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
host_integer_radix_arithmetic_scalar_shift_kb_inplace(
streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks, num_blocks);
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
radix_params.big_lwe_dimension, num_blocks);
host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
radix_params.big_lwe_dimension, num_blocks);
uint32_t requested_flag = outputFlag::FLAG_NONE;
uint32_t uses_carry = 0;
@@ -60,44 +64,8 @@ __host__ void legacy_host_integer_abs_kb_async(
streams, gpu_indexes, gpu_count, ct, nullptr, nullptr, mem_ptr->scp_mem,
bsks, ksks, num_blocks, requested_flag, uses_carry);
// legacy bitop
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, ct, mask, ct, bsks, ksks, num_blocks,
mem_ptr->bitxor_mem->lut, mem_ptr->bitxor_mem->params.message_modulus);
}
template <typename Torus>
__host__ void
host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, CudaRadixCiphertextFFI *ct,
void *const *bsks, uint64_t *const *ksks,
int_abs_buffer<uint64_t> *mem_ptr, bool is_signed) {
if (!is_signed)
return;
auto mask = mem_ptr->mask;
uint32_t num_bits_in_ciphertext =
(31 - __builtin_clz(mem_ptr->params.message_modulus)) *
ct->num_radix_blocks;
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], mask, ct);
host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
streams, gpu_indexes, gpu_count, (Torus *)(mask->ptr),
num_bits_in_ciphertext - 1, mem_ptr->arithmetic_scalar_shift_mem, bsks,
ksks, ct->num_radix_blocks);
host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct);
uint32_t requested_flag = outputFlag::FLAG_NONE;
uint32_t uses_carry = 0;
host_propagate_single_carry<Torus>(
streams, gpu_indexes, gpu_count, (Torus *)(ct->ptr), nullptr, nullptr,
mem_ptr->scp_mem, bsks, ksks, ct->num_radix_blocks, requested_flag,
uses_carry);
host_integer_radix_bitop_kb<Torus>(streams, gpu_indexes, gpu_count, ct, mask,
ct, mem_ptr->bitxor_mem, bsks, ksks);
host_integer_radix_bitop_kb(streams, gpu_indexes, gpu_count, ct, mask, ct,
mem_ptr->bitxor_mem, bsks, ksks, num_blocks);
}
#endif // TFHE_RS_ABS_CUH

View File

@@ -22,15 +22,17 @@ void scratch_cuda_integer_radix_bitop_kb_64(
void cuda_bitop_integer_radix_ciphertext_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_1,
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
void *const *bsks, void *const *ksks) {
void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
uint32_t lwe_ciphertext_count) {
host_integer_radix_bitop_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
lwe_array_1, lwe_array_2, (int_bitop_buffer<uint64_t> *)mem_ptr, bsks,
(uint64_t **)(ksks));
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_array_1),
static_cast<const uint64_t *>(lwe_array_2),
(int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
lwe_ciphertext_count);
}
void cleanup_cuda_integer_bitop(void *const *streams,
@@ -41,50 +43,3 @@ void cleanup_cuda_integer_bitop(void *const *streams,
(int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
}
void update_degrees_after_bitand(uint64_t *output_degrees,
uint64_t *lwe_array_1_degrees,
uint64_t *lwe_array_2_degrees,
uint32_t num_radix_blocks) {
for (uint i = 0; i < num_radix_blocks; i++) {
output_degrees[i] =
std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
}
}
void update_degrees_after_bitor(uint64_t *output_degrees,
uint64_t *lwe_array_1_degrees,
uint64_t *lwe_array_2_degrees,
uint32_t num_radix_blocks) {
for (uint i = 0; i < num_radix_blocks; i++) {
auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
auto result = max;
for (uint j = 0; j < min + 1; j++) {
if (max | j > result) {
result = max | j;
}
}
output_degrees[i] = result;
}
}
void update_degrees_after_bitxor(uint64_t *output_degrees,
uint64_t *lwe_array_1_degrees,
uint64_t *lwe_array_2_degrees,
uint32_t num_radix_blocks) {
for (uint i = 0; i < num_radix_blocks; i++) {
auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
auto result = max;
// Try every possibility to find the worst case
for (uint j = 0; j < min + 1; j++) {
if (max ^ j > result) {
result = max ^ j;
}
}
output_degrees[i] = result;
}
}

View File

@@ -14,33 +14,15 @@
template <typename Torus>
__host__ void host_integer_radix_bitop_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_1,
CudaRadixCiphertextFFI const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
void *const *bsks, Torus *const *ksks) {
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_1,
Torus const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
void *const *bsks, Torus *const *ksks, uint32_t num_radix_blocks) {
auto lut = mem_ptr->lut;
uint64_t degrees[lwe_array_1->num_radix_blocks];
if (mem_ptr->op == BITOP_TYPE::BITAND) {
update_degrees_after_bitand(degrees, lwe_array_1->degrees,
lwe_array_2->degrees,
lwe_array_1->num_radix_blocks);
} else if (mem_ptr->op == BITOP_TYPE::BITOR) {
update_degrees_after_bitor(degrees, lwe_array_1->degrees,
lwe_array_2->degrees,
lwe_array_1->num_radix_blocks);
} else if (mem_ptr->op == BITXOR) {
update_degrees_after_bitxor(degrees, lwe_array_1->degrees,
lwe_array_2->degrees,
lwe_array_1->num_radix_blocks);
}
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_1, lwe_array_2,
bsks, ksks, lut, lut->params.message_modulus);
memcpy(lwe_array_out->degrees, degrees,
lwe_array_out->num_radix_blocks * sizeof(uint64_t));
bsks, ksks, num_radix_blocks, lut, lut->params.message_modulus);
}
template <typename Torus>

View File

@@ -25,16 +25,19 @@ void scratch_cuda_integer_radix_cmux_kb_64(
void cuda_cmux_integer_radix_ciphertext_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_condition,
CudaRadixCiphertextFFI const *lwe_array_true,
CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
void *const *bsks, void *const *ksks) {
void *lwe_array_out, void const *lwe_condition, void const *lwe_array_true,
void const *lwe_array_false, int8_t *mem_ptr, void *const *bsks,
void *const *ksks, uint32_t lwe_ciphertext_count) {
host_integer_radix_cmux_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
lwe_condition, lwe_array_true, lwe_array_false,
(int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks));
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_condition),
static_cast<const uint64_t *>(lwe_array_true),
static_cast<const uint64_t *>(lwe_array_false),
(int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
lwe_ciphertext_count);
}
void cleanup_cuda_integer_radix_cmux(void *const *streams,

View File

@@ -2,7 +2,6 @@
#define CUDA_INTEGER_CMUX_CUH
#include "integer.cuh"
#include "radix_ciphertext.cuh"
template <typename Torus>
__host__ void zero_out_if(cudaStream_t const *streams,
@@ -12,7 +11,7 @@ __host__ void zero_out_if(cudaStream_t const *streams,
int_zero_out_if_buffer<Torus> *mem_ptr,
int_radix_lut<Torus> *predicate, void *const *bsks,
Torus *const *ksks, uint32_t num_radix_blocks) {
cuda_set_device(gpu_indexes[0]);
cudaSetDevice(gpu_indexes[0]);
auto params = mem_ptr->params;
// We can't use integer_radix_apply_bivariate_lookup_table_kb since the
@@ -24,13 +23,13 @@ __host__ void zero_out_if(cudaStream_t const *streams,
predicate->lwe_indexes_in, params.big_lwe_dimension,
params.message_modulus, num_radix_blocks);
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, tmp_lwe_array_input, bsks,
ksks, num_radix_blocks, predicate);
}
template <typename Torus>
__host__ void legacy_host_integer_radix_cmux_kb(
__host__ void host_integer_radix_cmux_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_condition,
Torus const *lwe_array_true, Torus const *lwe_array_false,
@@ -40,91 +39,34 @@ __host__ void legacy_host_integer_radix_cmux_kb(
auto params = mem_ptr->params;
Torus lwe_size = params.big_lwe_dimension + 1;
Torus radix_lwe_size = lwe_size * num_radix_blocks;
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in->ptr, lwe_array_true,
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in, lwe_array_true,
radix_lwe_size * sizeof(Torus), streams[0],
gpu_indexes[0]);
cuda_memcpy_async_gpu_to_gpu(
(Torus *)(mem_ptr->buffer_in->ptr) + radix_lwe_size, lwe_array_false,
radix_lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in + radix_lwe_size,
lwe_array_false, radix_lwe_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
cuda_memcpy_async_gpu_to_gpu(
(Torus *)(mem_ptr->condition_array->ptr) + i * lwe_size, lwe_condition,
lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
}
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, (Torus *)(mem_ptr->buffer_out->ptr),
(Torus *)(mem_ptr->buffer_in->ptr),
(Torus *)(mem_ptr->condition_array->ptr), bsks, ksks,
2 * num_radix_blocks, mem_ptr->predicate_lut, params.message_modulus);
// If the condition was true, true_ct will have kept its value and false_ct
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
// have kept its value
auto mem_true = (Torus *)(mem_ptr->buffer_out->ptr);
auto ptr = (Torus *)mem_ptr->buffer_out->ptr;
auto mem_false = &ptr[radix_lwe_size];
auto added_cts = mem_true;
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
mem_false, params.big_lwe_dimension,
num_radix_blocks);
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
num_radix_blocks, mem_ptr->message_extract_lut);
}
template <typename Torus>
__host__ void host_integer_radix_cmux_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_condition,
CudaRadixCiphertextFFI const *lwe_array_true,
CudaRadixCiphertextFFI const *lwe_array_false,
int_cmux_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks) {
if (lwe_array_out->num_radix_blocks != lwe_array_true->num_radix_blocks)
PANIC("Cuda error: input and output num radix blocks must be the same")
if (lwe_array_out->num_radix_blocks != lwe_array_false->num_radix_blocks)
PANIC("Cuda error: input and output num radix blocks must be the same")
auto num_radix_blocks = lwe_array_out->num_radix_blocks;
auto params = mem_ptr->params;
Torus lwe_size = params.big_lwe_dimension + 1;
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0],
mem_ptr->buffer_in, lwe_array_true);
copy_radix_ciphertext_to_larger_output_slice_async<Torus>(
streams[0], gpu_indexes[0], mem_ptr->buffer_in, lwe_array_false,
num_radix_blocks);
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
cuda_memcpy_async_gpu_to_gpu(
(Torus *)(mem_ptr->condition_array->ptr) + i * lwe_size,
(Torus *)(lwe_condition->ptr), lwe_size * sizeof(Torus), streams[0],
gpu_indexes[0]);
cuda_memcpy_async_gpu_to_gpu(mem_ptr->condition_array + i * lwe_size,
lwe_condition, lwe_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
}
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, mem_ptr->buffer_out, mem_ptr->buffer_in,
mem_ptr->condition_array, bsks, ksks, mem_ptr->predicate_lut,
params.message_modulus);
mem_ptr->condition_array, bsks, ksks, 2 * num_radix_blocks,
mem_ptr->predicate_lut, params.message_modulus);
// If the condition was true, true_ct will have kept its value and false_ct
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
// have kept its value
CudaRadixCiphertextFFI *mem_true = new CudaRadixCiphertextFFI;
CudaRadixCiphertextFFI *mem_false = new CudaRadixCiphertextFFI;
as_radix_ciphertext_slice<Torus>(mem_true, mem_ptr->buffer_out, 0,
num_radix_blocks - 1);
as_radix_ciphertext_slice<Torus>(mem_false, mem_ptr->buffer_out,
num_radix_blocks, 2 * num_radix_blocks - 1);
auto mem_true = mem_ptr->buffer_out;
auto mem_false = &mem_ptr->buffer_out[radix_lwe_size];
auto added_cts = mem_true;
host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
mem_false);
mem_false, params.big_lwe_dimension, num_radix_blocks);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
mem_ptr->message_extract_lut, num_radix_blocks);
delete mem_true;
delete mem_false;
num_radix_blocks, mem_ptr->message_extract_lut);
}
template <typename Torus>

View File

@@ -38,7 +38,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
uint32_t lwe_dimension,
uint32_t num_radix_blocks) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
int num_blocks = 0, num_threads = 0;
int num_entries = (lwe_dimension + 1);
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
@@ -122,9 +122,7 @@ __host__ void are_all_comparisons_block_true(
};
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], is_max_value_lut->get_lut(0, 1),
is_max_value_lut->get_degree(1),
is_max_value_lut->get_max_degree(1), glwe_dimension,
polynomial_size, message_modulus, carry_modulus,
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
is_equal_to_num_blocks_lut_f);
Torus *h_lut_indexes = (Torus *)malloc(num_chunks * sizeof(Torus));
@@ -148,12 +146,12 @@ __host__ void are_all_comparisons_block_true(
// Applies the LUT
if (remaining_blocks == 1) {
// In the last iteration we copy the output to the final address
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
ksks, 1, lut);
return;
} else {
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, tmp_out, accumulator, bsks, ksks,
num_chunks, lut);
}
@@ -219,12 +217,12 @@ __host__ void is_at_least_one_comparisons_block_true(
// Applies the LUT
if (remaining_blocks == 1) {
// In the last iteration we copy the output to the final address
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
ksks, 1, lut);
return;
} else {
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, mem_ptr->tmp_lwe_array_out,
accumulator, bsks, ksks, num_chunks, lut);
}
@@ -305,7 +303,7 @@ __host__ void host_compare_with_zero_equality(
}
}
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, sum, sum, bsks, ksks, num_sum_blocks,
zero_comparison);
are_all_comparisons_block_true<Torus>(streams, gpu_indexes, gpu_count,
@@ -324,7 +322,7 @@ __host__ void host_integer_radix_equality_check_kb(
// Applies the LUT for the comparison operation
auto comparisons = mem_ptr->tmp_block_comparisons;
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, comparisons, lwe_array_1, lwe_array_2,
bsks, ksks, num_radix_blocks, eq_buffer->operator_lut,
eq_buffer->operator_lut->params.message_modulus);
@@ -371,7 +369,7 @@ __host__ void compare_radix_blocks_kb(
// Apply LUT to compare to 0
auto is_non_zero_lut = mem_ptr->eq_buffer->is_non_zero_lut;
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_out, bsks, ksks,
num_radix_blocks, is_non_zero_lut);
@@ -422,7 +420,7 @@ __host__ void tree_sign_reduction(
pack_blocks<Torus>(streams[0], gpu_indexes[0], y, x, big_lwe_dimension,
partial_block_count, 4);
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, x, y, bsks, ksks,
partial_block_count >> 1, inner_tree_leaf);
@@ -462,13 +460,12 @@ __host__ void tree_sign_reduction(
f = sign_handler_f;
}
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], last_lut->get_lut(0, 0),
last_lut->get_degree(0), last_lut->get_max_degree(0), glwe_dimension,
streams[0], gpu_indexes[0], last_lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus, carry_modulus, f);
last_lut->broadcast_lut(streams, gpu_indexes, 0);
// Last leaf
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, y, bsks, ksks, 1,
last_lut);
}
@@ -514,7 +511,7 @@ __host__ void host_integer_radix_difference_check_kb(
// Clean noise
auto identity_lut = mem_ptr->identity_lut;
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
2 * packed_num_radix_blocks, identity_lut);
@@ -552,11 +549,11 @@ __host__ void host_integer_radix_difference_check_kb(
packed_left + packed_num_radix_blocks * big_lwe_size;
Torus *last_right_block_before_sign_block =
packed_right + packed_num_radix_blocks * big_lwe_size;
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
identity_lut);
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, last_right_block_before_sign_block,
lwe_array_right + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks,
1, identity_lut);
@@ -566,7 +563,7 @@ __host__ void host_integer_radix_difference_check_kb(
last_left_block_before_sign_block, last_right_block_before_sign_block,
mem_ptr, bsks, ksks, 1);
// Compare the sign block separately
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count,
comparisons + (packed_num_radix_blocks + 1) * big_lwe_size,
lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
@@ -579,7 +576,7 @@ __host__ void host_integer_radix_difference_check_kb(
streams, gpu_indexes, gpu_count, comparisons, lwe_array_left,
lwe_array_right, mem_ptr, bsks, ksks, num_radix_blocks - 1);
// Compare the sign block separately
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count,
comparisons + (num_radix_blocks - 1) * big_lwe_size,
lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
@@ -623,7 +620,7 @@ __host__ void host_integer_radix_maxmin_kb(
ksks, total_num_radix_blocks);
// Selector
legacy_host_integer_radix_cmux_kb<Torus>(
host_integer_radix_cmux_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out,
mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right,
mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);

View File

@@ -50,7 +50,7 @@ __host__ void host_pack(cudaStream_t stream, uint32_t gpu_index,
if (array_in == array_out)
PANIC("Cuda error: Input and output must be different");
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
auto compression_params = mem_ptr->compression_params;
auto log_modulus = mem_ptr->storage_log_modulus;
@@ -185,7 +185,7 @@ __host__ void host_extract(cudaStream_t stream, uint32_t gpu_index,
if (array_in == glwe_array_out)
PANIC("Cuda error: Input and output must be different");
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
auto compression_params = mem_ptr->compression_params;

View File

@@ -285,7 +285,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
// Shift the mask so that we will only keep bits we should
uint32_t shifted_mask = full_message_mask >> shift_amount;
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, interesting_divisor.last_block(),
interesting_divisor.last_block(), bsks, ksks, 1,
mem_ptr->masking_luts_1[shifted_mask]);
@@ -314,7 +314,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
// the estimated degree of the output is < msg_modulus
shifted_mask = shifted_mask & full_message_mask;
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, divisor_ms_blocks.first_block(),
divisor_ms_blocks.first_block(), bsks, ksks, 1,
mem_ptr->masking_luts_2[shifted_mask]);
@@ -347,7 +347,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
interesting_remainder1.len - 1, streams[0],
gpu_indexes[0]);
legacy_host_radix_blocks_rotate_left<Torus>(
host_radix_blocks_rotate_left<Torus>(
streams, gpu_indexes, gpu_count, interesting_remainder1.data,
tmp_radix.data, 1, interesting_remainder1.len, big_lwe_size);
@@ -402,7 +402,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
// but in that position, interesting_remainder2 always has a 0
auto &merged_interesting_remainder = interesting_remainder1;
legacy_host_addition<Torus>(
host_addition<Torus>(
streams[0], gpu_indexes[0], merged_interesting_remainder.data,
merged_interesting_remainder.data, interesting_remainder2.data,
radix_params.big_lwe_dimension, merged_interesting_remainder.len);
@@ -481,7 +481,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
auto create_clean_version_of_merged_remainder =
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count) {
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count,
cleaned_merged_interesting_remainder.data,
cleaned_merged_interesting_remainder.data, bsks, ksks,
@@ -507,10 +507,10 @@ __host__ void host_unsigned_integer_div_rem_kb(
cuda_synchronize_stream(mem_ptr->sub_streams_3[j], gpu_indexes[j]);
}
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], overflow_sum.data,
subtraction_overflowed.data,
at_least_one_upper_block_is_non_zero.data,
radix_params.big_lwe_dimension, 1);
host_addition<Torus>(streams[0], gpu_indexes[0], overflow_sum.data,
subtraction_overflowed.data,
at_least_one_upper_block_is_non_zero.data,
radix_params.big_lwe_dimension, 1);
int factor = (i) ? 3 : 2;
int factor_lut_id = factor - 2;
@@ -521,7 +521,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
auto conditionally_zero_out_merged_interesting_remainder =
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count) {
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count,
cleaned_merged_interesting_remainder.data,
cleaned_merged_interesting_remainder.data,
@@ -534,7 +534,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
auto conditionally_zero_out_merged_new_remainder =
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count) {
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, new_remainder.data,
new_remainder.data, overflow_sum_radix.data, bsks, ksks,
new_remainder.len,
@@ -544,7 +544,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
auto set_quotient_bit = [&](cudaStream_t const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count) {
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, did_not_overflow.data,
subtraction_overflowed.data,
at_least_one_upper_block_is_non_zero.data, bsks, ksks, 1,
@@ -552,7 +552,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
mem_ptr->merge_overflow_flags_luts[pos_in_block]
->params.message_modulus);
legacy_host_addition<Torus>(
host_addition<Torus>(
streams[0], gpu_indexes[0], &quotient[block_of_bit * big_lwe_size],
&quotient[block_of_bit * big_lwe_size], did_not_overflow.data,
radix_params.big_lwe_dimension, 1);
@@ -588,17 +588,17 @@ __host__ void host_unsigned_integer_div_rem_kb(
// Clean the quotient and remainder
// as even though they have no carries, they are not at nominal noise level
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], remainder,
remainder1.data, remainder2.data,
radix_params.big_lwe_dimension, remainder1.len);
host_addition<Torus>(streams[0], gpu_indexes[0], remainder, remainder1.data,
remainder2.data, radix_params.big_lwe_dimension,
remainder1.len);
for (uint j = 0; j < gpu_count; j++) {
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
}
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem_ptr->sub_streams_1, gpu_indexes, gpu_count, remainder, remainder,
bsks, ksks, num_blocks, mem_ptr->message_extract_lut_1);
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem_ptr->sub_streams_2, gpu_indexes, gpu_count, quotient, quotient, bsks,
ksks, num_blocks, mem_ptr->message_extract_lut_2);
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
@@ -636,14 +636,12 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
}
legacy_host_integer_abs_kb_async<Torus>(
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
positive_numerator.data, bsks, ksks, int_mem_ptr->abs_mem_1, true,
num_blocks);
legacy_host_integer_abs_kb_async<Torus>(
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
positive_divisor.data, bsks, ksks, int_mem_ptr->abs_mem_2, true,
num_blocks);
host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_1, gpu_indexes,
gpu_count, positive_numerator.data, bsks, ksks,
int_mem_ptr->abs_mem_1, true, num_blocks);
host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_2, gpu_indexes,
gpu_count, positive_divisor.data, bsks, ksks,
int_mem_ptr->abs_mem_2, true, num_blocks);
for (uint j = 0; j < int_mem_ptr->active_gpu_count; j++) {
cuda_synchronize_stream(int_mem_ptr->sub_streams_1[j], gpu_indexes[j]);
cuda_synchronize_stream(int_mem_ptr->sub_streams_2[j], gpu_indexes[j]);
@@ -654,7 +652,7 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
positive_numerator.data, positive_divisor.data, bsks, ksks,
int_mem_ptr->unsigned_mem, num_blocks);
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
int_mem_ptr->sign_bits_are_different,
&numerator[big_lwe_size * (num_blocks - 1)],
@@ -667,7 +665,7 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
cuda_synchronize_stream(int_mem_ptr->sub_streams_2[j], gpu_indexes[j]);
}
legacy_host_integer_radix_negation(
host_integer_radix_negation(
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
int_mem_ptr->negated_quotient, quotient, radix_params.big_lwe_dimension,
num_blocks, radix_params.message_modulus, radix_params.carry_modulus);
@@ -679,11 +677,11 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
int_mem_ptr->negated_quotient, nullptr, nullptr, int_mem_ptr->scp_mem_1,
bsks, ksks, num_blocks, requested_flag, uses_carry);
legacy_host_integer_radix_negation(
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
int_mem_ptr->negated_remainder, remainder,
radix_params.big_lwe_dimension, num_blocks,
radix_params.message_modulus, radix_params.carry_modulus);
host_integer_radix_negation(int_mem_ptr->sub_streams_2, gpu_indexes,
gpu_count, int_mem_ptr->negated_remainder,
remainder, radix_params.big_lwe_dimension,
num_blocks, radix_params.message_modulus,
radix_params.carry_modulus);
host_propagate_single_carry<Torus>(
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
@@ -691,12 +689,12 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
int_mem_ptr->scp_mem_2, bsks, ksks, num_blocks, requested_flag,
uses_carry);
legacy_host_integer_radix_cmux_kb<Torus>(
host_integer_radix_cmux_kb<Torus>(
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count, quotient,
int_mem_ptr->sign_bits_are_different, int_mem_ptr->negated_quotient,
quotient, int_mem_ptr->cmux_quotient_mem, bsks, ksks, num_blocks);
legacy_host_integer_radix_cmux_kb<Torus>(
host_integer_radix_cmux_kb<Torus>(
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count, remainder,
&numerator[big_lwe_size * (num_blocks - 1)],
int_mem_ptr->negated_remainder, remainder,

View File

@@ -184,7 +184,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint64_t lut_degree, bool allocate_gpu_memory) {
bool allocate_gpu_memory) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
@@ -195,7 +195,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_radix_lut<uint64_t> **)mem_ptr,
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
lut_degree, allocate_gpu_memory);
allocate_gpu_memory);
}
void scratch_cuda_apply_many_univariate_lut_kb_64(
@@ -205,7 +205,7 @@ void scratch_cuda_apply_many_univariate_lut_kb_64(
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory) {
uint32_t num_many_lut, bool allocate_gpu_memory) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
@@ -216,19 +216,22 @@ void scratch_cuda_apply_many_univariate_lut_kb_64(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_radix_lut<uint64_t> **)mem_ptr,
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
num_many_lut, lut_degree, allocate_gpu_memory);
num_many_lut, allocate_gpu_memory);
}
void cuda_apply_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *output_radix_lwe,
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
void *const *ksks, void *const *bsks) {
void cuda_apply_univariate_lut_kb_64(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count, void *output_radix_lwe,
void const *input_radix_lwe,
int8_t *mem_ptr, void *const *ksks,
void *const *bsks, uint32_t num_blocks) {
host_apply_univariate_lut_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count, output_radix_lwe,
input_radix_lwe, (int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks),
bsks);
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(output_radix_lwe),
static_cast<const uint64_t *>(input_radix_lwe),
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks,
num_blocks);
}
void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
@@ -241,15 +244,16 @@ void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
void cuda_apply_many_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *output_radix_lwe,
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
void *const *ksks, void *const *bsks, uint32_t num_many_lut,
uint32_t lut_stride) {
void *output_radix_lwe, void const *input_radix_lwe, int8_t *mem_ptr,
void *const *ksks, void *const *bsks, uint32_t num_blocks,
uint32_t num_many_lut, uint32_t lut_stride) {
host_apply_many_univariate_lut_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count, output_radix_lwe,
input_radix_lwe, (int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks),
bsks, num_many_lut, lut_stride);
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(output_radix_lwe),
static_cast<const uint64_t *>(input_radix_lwe),
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks, num_blocks,
num_many_lut, lut_stride);
}
void scratch_cuda_apply_bivariate_lut_kb_64(
@@ -259,7 +263,7 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint64_t lut_degree, bool allocate_gpu_memory) {
bool allocate_gpu_memory) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
@@ -270,20 +274,22 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_radix_lut<uint64_t> **)mem_ptr,
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
lut_degree, allocate_gpu_memory);
allocate_gpu_memory);
}
void cuda_apply_bivariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *output_radix_lwe,
CudaRadixCiphertextFFI const *input_radix_lwe_1,
CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
void *const *ksks, void *const *bsks, uint32_t shift) {
void *output_radix_lwe, void const *input_radix_lwe_1,
void const *input_radix_lwe_2, int8_t *mem_ptr, void *const *ksks,
void *const *bsks, uint32_t num_blocks, uint32_t shift) {
host_apply_bivariate_lut_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count, output_radix_lwe,
input_radix_lwe_1, input_radix_lwe_2, (int_radix_lut<uint64_t> *)mem_ptr,
(uint64_t **)(ksks), bsks, shift);
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(output_radix_lwe),
static_cast<const uint64_t *>(input_radix_lwe_1),
static_cast<const uint64_t *>(input_radix_lwe_2),
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks, num_blocks,
shift);
}
void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
@@ -301,7 +307,7 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint64_t lut_degree, bool allocate_gpu_memory) {
bool allocate_gpu_memory) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
@@ -312,7 +318,7 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_radix_lut<uint64_t> **)mem_ptr,
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
lut_degree, allocate_gpu_memory);
allocate_gpu_memory);
}
void cuda_integer_compute_prefix_sum_hillis_steele_64(

File diff suppressed because it is too large Load Diff

View File

@@ -228,9 +228,9 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
streams[0], gpu_indexes[0]);
}
if (num_radix_in_vec == 2) {
legacy_host_addition<Torus>(
streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
&old_blocks[num_blocks * big_lwe_size], big_lwe_dimension, num_blocks);
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
&old_blocks[num_blocks * big_lwe_size],
big_lwe_dimension, num_blocks);
return;
}
@@ -280,13 +280,10 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
// generate accumulators
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], message_acc,
luts_message_carry->get_degree(0), luts_message_carry->get_max_degree(0),
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
lut_f_message);
streams[0], gpu_indexes[0], message_acc, glwe_dimension, polynomial_size,
message_modulus, carry_modulus, lut_f_message);
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], carry_acc, luts_message_carry->get_degree(1),
luts_message_carry->get_max_degree(1), glwe_dimension, polynomial_size,
streams[0], gpu_indexes[0], carry_acc, glwe_dimension, polynomial_size,
message_modulus, carry_modulus, lut_f_carry);
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
@@ -297,7 +294,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
ch_amount++;
dim3 add_grid(ch_amount, num_blocks, 1);
cuda_set_device(gpu_indexes[0]);
cudaSetDevice(gpu_indexes[0]);
tree_add_chunks<Torus><<<add_grid, 512, 0, streams[0]>>>(
new_blocks, old_blocks, min(r, chunk_size), big_lwe_size, num_blocks);
@@ -448,9 +445,9 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
luts_message_carry->release(streams, gpu_indexes, gpu_count);
delete (luts_message_carry);
legacy_host_addition<Torus>(
streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
&old_blocks[num_blocks * big_lwe_size], big_lwe_dimension, num_blocks);
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
&old_blocks[num_blocks * big_lwe_size],
big_lwe_dimension, num_blocks);
}
template <typename Torus, class params>
@@ -541,13 +538,13 @@ __host__ void host_integer_mult_radix_kb(
dim3 grid(lsb_vector_block_count, 1, 1);
dim3 thds(params::degree / params::opt, 1, 1);
cuda_set_device(gpu_indexes[0]);
cudaSetDevice(gpu_indexes[0]);
all_shifted_lhs_rhs<Torus, params><<<grid, thds, 0, streams[0]>>>(
radix_lwe_left, vector_result_lsb, vector_result_msb, radix_lwe_right,
vector_lsb_rhs, vector_msb_rhs, num_blocks);
check_cuda_error(cudaGetLastError());
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, block_mul_res, block_mul_res,
vector_result_sb, bsks, ksks, total_block_count, luts_array,
luts_array->params.message_modulus);
@@ -556,7 +553,7 @@ __host__ void host_integer_mult_radix_kb(
vector_result_msb = &block_mul_res[lsb_vector_block_count *
(polynomial_size * glwe_dimension + 1)];
cuda_set_device(gpu_indexes[0]);
cudaSetDevice(gpu_indexes[0]);
fill_radix_from_lsb_msb<Torus, params>
<<<num_blocks * num_blocks, params::degree / params::opt, 0,
streams[0]>>>(vector_result_sb, vector_result_lsb, vector_result_msb,

View File

@@ -2,11 +2,13 @@
void cuda_negate_integer_radix_ciphertext_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_in, uint32_t message_modulus,
void *lwe_array_out, void const *lwe_array_in, uint32_t lwe_dimension,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus) {
host_integer_radix_negation<uint64_t>((cudaStream_t *)(streams), gpu_indexes,
gpu_count, lwe_array_out, lwe_array_in,
message_modulus, carry_modulus);
host_integer_radix_negation<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_array_in), lwe_dimension,
lwe_ciphertext_count, message_modulus, carry_modulus);
}

View File

@@ -54,69 +54,12 @@ device_integer_radix_negation(Torus *output, Torus const *input,
}
template <typename Torus>
__host__ void
host_integer_radix_negation(cudaStream_t const *streams,
uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_in,
uint64_t message_modulus, uint64_t carry_modulus) {
cuda_set_device(gpu_indexes[0]);
if (lwe_array_out->num_radix_blocks != lwe_array_in->num_radix_blocks)
PANIC("Cuda error: lwe_array_in and lwe_array_out num radix blocks must be "
"the same")
if (lwe_array_out->lwe_dimension != lwe_array_in->lwe_dimension)
PANIC("Cuda error: lwe_array_in and lwe_array_out lwe_dimension must be "
"the same")
auto num_radix_blocks = lwe_array_out->num_radix_blocks;
auto lwe_dimension = lwe_array_out->lwe_dimension;
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = lwe_dimension + 1;
// Create a 1-dimensional grid of threads
int num_blocks = 0, num_threads = 0;
int num_entries = lwe_size;
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
dim3 grid(num_blocks, 1, 1);
dim3 thds(num_threads, 1, 1);
// Value of the shift we multiply our messages by
// If message_modulus and carry_modulus are always powers of 2 we can simplify
// this
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
device_integer_radix_negation<Torus><<<grid, thds, 0, streams[0]>>>(
static_cast<Torus *>(lwe_array_out->ptr),
static_cast<Torus *>(lwe_array_in->ptr), num_radix_blocks, lwe_dimension,
message_modulus, delta);
check_cuda_error(cudaGetLastError());
uint8_t zb = 0;
for (uint i = 0; i < lwe_array_out->num_radix_blocks; i++) {
auto input_degree = lwe_array_in->degrees[i];
if (zb != 0) {
input_degree += static_cast<uint64_t>(zb);
}
Torus z =
std::max(static_cast<Torus>(1),
static_cast<Torus>(ceil(input_degree / message_modulus))) *
message_modulus;
lwe_array_out->degrees[i] = z - static_cast<uint64_t>(zb);
lwe_array_out->noise_levels[i] = lwe_array_in->noise_levels[i];
zb = z / message_modulus;
}
}
template <typename Torus>
__host__ void legacy_host_integer_radix_negation(
__host__ void host_integer_radix_negation(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *output, Torus const *input,
uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count,
uint64_t message_modulus, uint64_t carry_modulus) {
cuda_set_device(gpu_indexes[0]);
cudaSetDevice(gpu_indexes[0]);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask

View File

@@ -1,10 +0,0 @@
#include "radix_ciphertext.cuh"
void release_radix_ciphertext(cudaStream_t const stream,
uint32_t const gpu_index,
CudaRadixCiphertextFFI *data) {
cuda_drop_async(data->ptr, stream, gpu_index);
free(data->degrees);
free(data->noise_levels);
cuda_synchronize_stream(stream, gpu_index);
}

View File

@@ -1,114 +0,0 @@
#ifndef CUDA_INTEGER_RADIX_CIPHERTEXT_CUH
#define CUDA_INTEGER_RADIX_CIPHERTEXT_CUH
#include "device.h"
#include "integer/integer.h"
template <typename Torus>
void create_zero_radix_ciphertext_async(cudaStream_t const stream,
uint32_t const gpu_index,
CudaRadixCiphertextFFI *radix,
const uint32_t num_radix_blocks,
const uint32_t lwe_dimension) {
radix->lwe_dimension = lwe_dimension;
radix->num_radix_blocks = num_radix_blocks;
uint32_t size = (lwe_dimension + 1) * num_radix_blocks * sizeof(Torus);
radix->ptr = (void *)cuda_malloc_async(size, stream, gpu_index);
cuda_memset_async(radix->ptr, 0, size, stream, gpu_index);
radix->degrees = (uint64_t *)(calloc(num_radix_blocks, sizeof(uint64_t)));
radix->noise_levels =
(uint64_t *)(calloc(num_radix_blocks, sizeof(uint64_t)));
if (radix->degrees == NULL || radix->noise_levels == NULL) {
PANIC("Cuda error: degrees / noise levels not allocated correctly")
}
}
// end_lwe_index is inclusive
template <typename Torus>
void as_radix_ciphertext_slice(CudaRadixCiphertextFFI *output_radix,
const CudaRadixCiphertextFFI *input_radix,
const uint32_t start_lwe_index,
const uint32_t end_lwe_index) {
if (input_radix->num_radix_blocks < end_lwe_index - start_lwe_index + 1)
PANIC("Cuda error: input radix should have more blocks than the specified "
"range")
if (start_lwe_index > end_lwe_index)
PANIC("Cuda error: slice range should be non negative")
auto lwe_size = input_radix->lwe_dimension + 1;
output_radix->num_radix_blocks = end_lwe_index - start_lwe_index + 1;
output_radix->lwe_dimension = input_radix->lwe_dimension;
Torus *in_ptr = (Torus *)input_radix->ptr;
output_radix->ptr = (void *)(in_ptr + start_lwe_index * lwe_size);
output_radix->degrees = input_radix->degrees + start_lwe_index;
output_radix->noise_levels = input_radix->noise_levels + start_lwe_index;
}
template <typename Torus>
void copy_radix_ciphertext_to_larger_output_slice_async(
cudaStream_t const stream, uint32_t const gpu_index,
CudaRadixCiphertextFFI *output_radix,
const CudaRadixCiphertextFFI *input_radix,
const uint32_t output_start_lwe_index) {
if (output_radix->lwe_dimension != input_radix->lwe_dimension)
PANIC("Cuda error: input lwe dimension should be equal to output lwe "
"dimension")
if (output_radix->num_radix_blocks - output_start_lwe_index <
input_radix->num_radix_blocks)
PANIC("Cuda error: output range should have more blocks than there are"
"input radix blocks")
if (output_start_lwe_index >= output_radix->num_radix_blocks)
PANIC("Cuda error: output index should be strictly smaller than the number "
"of blocks")
auto lwe_size = input_radix->lwe_dimension + 1;
Torus *out_ptr = (Torus *)output_radix->ptr;
out_ptr = &out_ptr[output_start_lwe_index * lwe_size];
cuda_memcpy_async_gpu_to_gpu(out_ptr, input_radix->ptr,
input_radix->num_radix_blocks * lwe_size *
sizeof(Torus),
stream, gpu_index);
for (uint i = 0; i < input_radix->num_radix_blocks; i++) {
output_radix->degrees[i + output_start_lwe_index] = input_radix->degrees[i];
output_radix->noise_levels[i + output_start_lwe_index] =
input_radix->noise_levels[i];
}
}
template <typename Torus>
void copy_radix_ciphertext_async(cudaStream_t const stream,
uint32_t const gpu_index,
CudaRadixCiphertextFFI *output_radix,
const CudaRadixCiphertextFFI *input_radix) {
copy_radix_ciphertext_to_larger_output_slice_async<Torus>(
stream, gpu_index, output_radix, input_radix, 0);
}
// end_lwe_index is inclusive
template <typename Torus>
void set_zero_radix_ciphertext_async(cudaStream_t const stream,
uint32_t const gpu_index,
CudaRadixCiphertextFFI *radix,
const uint32_t start_lwe_index,
const uint32_t end_lwe_index) {
if (radix->num_radix_blocks < end_lwe_index - start_lwe_index + 1)
PANIC("Cuda error: input radix should have more blocks than the specified "
"range")
if (start_lwe_index > end_lwe_index)
PANIC("Cuda error: slice range should be non negative")
auto lwe_size = radix->lwe_dimension + 1;
auto num_blocks_to_set = end_lwe_index - start_lwe_index + 1;
auto lwe_array_out_block = (Torus *)radix->ptr + start_lwe_index * lwe_size;
cuda_memset_async(lwe_array_out_block, 0,
num_blocks_to_set * lwe_size * sizeof(Torus), stream,
gpu_index);
memset(&radix->degrees[start_lwe_index], 0,
num_blocks_to_set * sizeof(uint64_t));
memset(&radix->noise_levels[start_lwe_index], 0,
num_blocks_to_set * sizeof(uint64_t));
}
#endif

View File

@@ -2,11 +2,13 @@
void cuda_scalar_addition_integer_radix_ciphertext_64_inplace(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array, void const *scalar_input,
uint32_t num_scalars, uint32_t message_modulus, uint32_t carry_modulus) {
void *lwe_array, void const *scalar_input, uint32_t lwe_dimension,
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus) {
host_integer_radix_scalar_addition_inplace<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array,
static_cast<const uint64_t *>(scalar_input), num_scalars, message_modulus,
carry_modulus);
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array),
static_cast<const uint64_t *>(scalar_input), lwe_dimension,
lwe_ciphertext_count, message_modulus, carry_modulus);
}

View File

@@ -24,12 +24,12 @@ __global__ void device_integer_radix_scalar_addition_inplace(
}
template <typename Torus>
__host__ void legacy_host_integer_radix_scalar_addition_inplace(
__host__ void host_integer_radix_scalar_addition_inplace(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *lwe_array, Torus const *scalar_input,
uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus) {
cuda_set_device(gpu_indexes[0]);
cudaSetDevice(gpu_indexes[0]);
// Create a 1-dimensional grid of threads
int num_blocks = 0, num_threads = 0;
@@ -49,42 +49,6 @@ __host__ void legacy_host_integer_radix_scalar_addition_inplace(
delta);
check_cuda_error(cudaGetLastError());
}
template <typename Torus>
__host__ void host_integer_radix_scalar_addition_inplace(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array,
Torus const *scalar_input, uint32_t num_scalars, uint32_t message_modulus,
uint32_t carry_modulus) {
if (lwe_array->num_radix_blocks < num_scalars)
PANIC("Cuda error: num scalars should be smaller or equal to input num "
"radix blocks")
cuda_set_device(gpu_indexes[0]);
// Create a 1-dimensional grid of threads
int num_blocks = 0, num_threads = 0;
int num_entries = num_scalars;
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
dim3 grid(num_blocks, 1, 1);
dim3 thds(num_threads, 1, 1);
// Value of the shift we multiply our messages by
// If message_modulus and carry_modulus are always powers of 2 we can simplify
// this
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
device_integer_radix_scalar_addition_inplace<Torus>
<<<grid, thds, 0, streams[0]>>>((Torus *)lwe_array->ptr, scalar_input,
num_scalars, lwe_array->lwe_dimension,
delta);
check_cuda_error(cudaGetLastError());
Torus scalar_input_cpu[num_scalars];
cuda_memcpy_async_to_cpu(&scalar_input_cpu, scalar_input,
num_scalars * sizeof(Torus), streams[0],
gpu_indexes[0]);
for (uint i = 0; i < num_scalars; i++) {
lwe_array->degrees[i] = lwe_array->degrees[i] + scalar_input_cpu[i];
}
}
template <typename Torus>
__global__ void device_integer_radix_add_scalar_one_inplace(
@@ -104,7 +68,7 @@ __host__ void host_integer_radix_add_scalar_one_inplace(
uint32_t gpu_count, Torus *lwe_array, uint32_t lwe_dimension,
uint32_t input_lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus) {
cuda_set_device(gpu_indexes[0]);
cudaSetDevice(gpu_indexes[0]);
// Create a 1-dimensional grid of threads
int num_blocks = 0, num_threads = 0;
@@ -144,7 +108,7 @@ __host__ void host_integer_radix_scalar_subtraction_inplace(
uint32_t gpu_count, Torus *lwe_array, Torus *scalar_input,
uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus) {
cuda_set_device(gpu_indexes[0]);
cudaSetDevice(gpu_indexes[0]);
// Create a 1-dimensional grid of threads
int num_blocks = 0, num_threads = 0;

View File

@@ -2,58 +2,15 @@
void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
void *lwe_array_out, void const *lwe_array_input, void const *clear_blocks,
uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
void *const *ksks) {
void *const *ksks, uint32_t lwe_ciphertext_count, BITOP_TYPE op) {
host_integer_radix_scalar_bitop_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
lwe_array_input, static_cast<const uint64_t *>(clear_blocks),
num_clear_blocks, (int_bitop_buffer<uint64_t> *)mem_ptr, bsks,
(uint64_t **)(ksks));
}
void update_degrees_after_scalar_bitand(uint64_t *output_degrees,
uint64_t *clear_degrees,
uint64_t *input_degrees,
uint32_t num_clear_blocks) {
for (uint i = 0; i < num_clear_blocks; i++) {
output_degrees[i] = std::min(clear_degrees[i], input_degrees[i]);
}
}
void update_degrees_after_scalar_bitor(uint64_t *output_degrees,
uint64_t *clear_degrees,
uint64_t *input_degrees,
uint32_t num_clear_blocks) {
for (uint i = 0; i < num_clear_blocks; i++) {
auto max = std::max(clear_degrees[i], input_degrees[i]);
auto min = std::min(clear_degrees[i], input_degrees[i]);
auto result = max;
for (uint j = 0; j < min + 1; j++) {
if (max | j > result) {
result = max | j;
}
}
output_degrees[i] = result;
}
}
void update_degrees_after_scalar_bitxor(uint64_t *output_degrees,
uint64_t *clear_degrees,
uint64_t *input_degrees,
uint32_t num_clear_blocks) {
for (uint i = 0; i < num_clear_blocks; i++) {
auto max = std::max(clear_degrees[i], input_degrees[i]);
auto min = std::min(clear_degrees[i], input_degrees[i]);
auto result = max;
// Try every possibility to find the worst case
for (uint j = 0; j < min + 1; j++) {
if (max ^ j > result) {
result = max ^ j;
}
}
output_degrees[i] = result;
}
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_array_input),
static_cast<const uint64_t *>(clear_blocks), num_clear_blocks,
(int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
lwe_ciphertext_count, op);
}

View File

@@ -7,60 +7,45 @@
template <typename Torus>
__host__ void host_integer_radix_scalar_bitop_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, CudaRadixCiphertextFFI *output,
CudaRadixCiphertextFFI const *input, Torus const *clear_blocks,
uint32_t num_clear_blocks, int_bitop_buffer<Torus> *mem_ptr,
void *const *bsks, Torus *const *ksks) {
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_input,
Torus const *clear_blocks, uint32_t num_clear_blocks,
int_bitop_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks,
uint32_t num_radix_blocks, BITOP_TYPE op) {
if (output->num_radix_blocks != input->num_radix_blocks)
PANIC("Cuda error: input and output num radix blocks must be equal")
if (output->lwe_dimension != input->lwe_dimension)
PANIC("Cuda error: input and output num radix blocks must be equal")
auto lut = mem_ptr->lut;
auto op = mem_ptr->op;
auto num_radix_blocks = output->num_radix_blocks;
auto params = lut->params;
auto big_lwe_dimension = params.big_lwe_dimension;
uint32_t lwe_size = big_lwe_dimension + 1;
if (num_clear_blocks == 0) {
if (op == SCALAR_BITAND) {
set_zero_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], output,
0, num_radix_blocks - 1);
cuda_memset_async(lwe_array_out, 0,
num_radix_blocks * lwe_size * sizeof(Torus), streams[0],
gpu_indexes[0]);
} else {
if (input != output)
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], output,
input);
cuda_memcpy_async_gpu_to_gpu(lwe_array_out, lwe_array_input,
num_radix_blocks * lwe_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
}
} else {
// We have all possible LUTs pre-computed and we use the decomposed scalar
// as index to recover the right one
uint64_t degrees[num_clear_blocks];
uint64_t clear_degrees[num_clear_blocks];
cuda_memcpy_async_to_cpu(&clear_degrees, clear_blocks,
num_clear_blocks * sizeof(Torus), streams[0],
gpu_indexes[0]);
if (mem_ptr->op == BITOP_TYPE::SCALAR_BITAND) {
update_degrees_after_scalar_bitand(degrees, clear_degrees, input->degrees,
num_clear_blocks);
} else if (mem_ptr->op == BITOP_TYPE::SCALAR_BITOR) {
update_degrees_after_scalar_bitor(degrees, clear_degrees, input->degrees,
num_clear_blocks);
} else if (mem_ptr->op == SCALAR_BITXOR) {
update_degrees_after_scalar_bitxor(degrees, clear_degrees, input->degrees,
num_clear_blocks);
}
cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(0, 0), clear_blocks,
num_clear_blocks * sizeof(Torus), streams[0],
gpu_indexes[0]);
lut->broadcast_lut(streams, gpu_indexes, 0);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, output, input, bsks, ksks, lut,
num_clear_blocks);
memcpy(output->degrees, degrees, num_clear_blocks * sizeof(uint64_t));
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_input, bsks,
ksks, num_clear_blocks, lut);
if (op == SCALAR_BITAND && num_clear_blocks < num_radix_blocks) {
set_zero_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], output,
num_clear_blocks,
num_radix_blocks - 1);
auto lwe_array_out_block = lwe_array_out + num_clear_blocks * lwe_size;
cuda_memset_async(lwe_array_out_block, 0,
(num_radix_blocks - num_clear_blocks) * lwe_size *
sizeof(Torus),
streams[0], gpu_indexes[0]);
}
}
}

View File

@@ -43,7 +43,7 @@ __host__ void scalar_compare_radix_blocks_kb(
// Apply LUT to compare to 0
auto sign_lut = mem_ptr->eq_buffer->is_non_zero_lut;
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, subtracted_blocks, bsks,
ksks, num_radix_blocks, sign_lut);
@@ -110,13 +110,13 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
};
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], lut->get_lut(0, 0), lut->get_degree(0),
lut->get_max_degree(0), glwe_dimension, polynomial_size,
message_modulus, carry_modulus, scalar_last_leaf_lut_f);
generate_device_accumulator<Torus>(streams[0], gpu_indexes[0],
lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus,
carry_modulus, scalar_last_leaf_lut_f);
lut->broadcast_lut(streams, gpu_indexes, 0);
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out,
mem_ptr->tmp_lwe_array_out, bsks, ksks, 1, lut);
@@ -195,12 +195,12 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
auto lut = diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
generate_device_accumulator_bivariate<Torus>(
streams[0], gpu_indexes[0], lut->get_lut(0, 0), lut->get_degree(0),
lut->get_max_degree(0), glwe_dimension, polynomial_size,
message_modulus, carry_modulus, scalar_bivariate_last_leaf_lut_f);
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus, carry_modulus,
scalar_bivariate_last_leaf_lut_f);
lut->broadcast_lut(streams, gpu_indexes, 0);
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_lsb_out,
lwe_array_msb_out, bsks, ksks, 1, lut, lut->params.message_modulus);
@@ -331,12 +331,12 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
generate_device_accumulator_bivariate<Torus>(
streams[0], gpu_indexes[0], lut->get_lut(0, 0), lut->get_degree(0),
lut->get_max_degree(0), glwe_dimension, polynomial_size,
message_modulus, carry_modulus, scalar_bivariate_last_leaf_lut_f);
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus, carry_modulus,
scalar_bivariate_last_leaf_lut_f);
lut->broadcast_lut(streams, gpu_indexes, 0);
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, are_all_msb_zeros,
sign_block, bsks, ksks, 1, lut, lut->params.message_modulus);
@@ -426,13 +426,12 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
auto signed_msb_lut = mem_ptr->signed_msb_lut;
generate_device_accumulator_bivariate<Torus>(
msb_streams[0], gpu_indexes[0], signed_msb_lut->get_lut(0, 0),
signed_msb_lut->get_degree(0), signed_msb_lut->get_max_degree(0),
params.glwe_dimension, params.polynomial_size, params.message_modulus,
params.carry_modulus, lut_f);
signed_msb_lut->broadcast_lut(streams, gpu_indexes, 0);
Torus const *sign_block = msb + (num_msb_radix_blocks - 1) * big_lwe_size;
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
msb_streams, gpu_indexes, gpu_count, lwe_array_msb_out, sign_block,
are_all_msb_zeros, bsks, ksks, 1, signed_msb_lut,
signed_msb_lut->params.message_modulus);
@@ -492,7 +491,7 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
msb_streams[0], gpu_indexes[0], trivial_sign_block, scalar_sign_block,
big_lwe_dimension, 1, 1, message_modulus, carry_modulus);
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
msb_streams, gpu_indexes, gpu_count, lwe_array_sign_out,
encrypted_sign_block, trivial_sign_block, bsks, ksks, 1,
mem_ptr->signed_lut, mem_ptr->signed_lut->params.message_modulus);
@@ -541,10 +540,10 @@ __host__ void integer_radix_signed_scalar_maxmin_kb(
// Selector
// CMUX for Max or Min
legacy_host_integer_radix_cmux_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, sign, lwe_array_left,
lwe_array_right, mem_ptr->cmux_buffer, bsks, ksks,
total_num_radix_blocks);
host_integer_radix_cmux_kb<Torus>(streams, gpu_indexes, gpu_count,
lwe_array_out, sign, lwe_array_left,
lwe_array_right, mem_ptr->cmux_buffer, bsks,
ksks, total_num_radix_blocks);
}
template <typename Torus>
@@ -622,7 +621,7 @@ __host__ void host_integer_radix_scalar_maxmin_kb(
// Selector
// CMUX for Max or Min
legacy_host_integer_radix_cmux_kb<Torus>(
host_integer_radix_cmux_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out,
mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right,
mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);
@@ -686,7 +685,7 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
lsb_streams[0], gpu_indexes[0]);
scalar_comparison_luts->broadcast_lut(lsb_streams, gpu_indexes, 0);
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
lsb_streams, gpu_indexes, gpu_count, lwe_array_lsb_out, packed_blocks,
bsks, ksks, num_halved_lsb_radix_blocks, scalar_comparison_luts);
}

View File

@@ -47,6 +47,9 @@ __host__ void host_integer_scalar_mul_radix(
void *const *bsks, T *const *ksks, uint32_t input_lwe_dimension,
uint32_t message_modulus, uint32_t num_radix_blocks, uint32_t num_scalars) {
if (num_radix_blocks == 0 | num_scalars == 0)
return;
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
uint32_t lwe_size = input_lwe_dimension + 1;
@@ -80,7 +83,7 @@ __host__ void host_integer_scalar_mul_radix(
preshifted_buffer + (i % msg_bits) * num_radix_blocks * lwe_size;
T *block_shift_buffer =
all_shifted_buffer + j * num_radix_blocks * lwe_size;
legacy_host_radix_blocks_rotate_right<T>(
host_radix_blocks_rotate_right<T>(
streams, gpu_indexes, gpu_count, block_shift_buffer,
preshifted_radix_ct, i / msg_bits, num_radix_blocks, lwe_size);
// create trivial assign for value = 0
@@ -122,12 +125,12 @@ __host__ void host_integer_scalar_mul_radix(
// Small scalar_mul is used in shift/rotate
template <typename T>
__host__ void host_legacy_integer_small_scalar_mul_radix(
__host__ void host_integer_small_scalar_mul_radix(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, T *output_lwe_array, T *input_lwe_array, T scalar,
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
cuda_set_device(gpu_indexes[0]);
cudaSetDevice(gpu_indexes[0]);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = input_lwe_dimension + 1;
@@ -143,42 +146,4 @@ __host__ void host_legacy_integer_small_scalar_mul_radix(
input_lwe_ciphertext_count);
check_cuda_error(cudaGetLastError());
}
// Small scalar_mul is used in shift/rotate
template <typename T>
__host__ void host_integer_small_scalar_mul_radix(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, CudaRadixCiphertextFFI *output_lwe_array,
CudaRadixCiphertextFFI *input_lwe_array, T scalar) {
if (output_lwe_array->num_radix_blocks != input_lwe_array->num_radix_blocks)
PANIC("Cuda error: input and output num radix blocks must be the same")
if (output_lwe_array->lwe_dimension != input_lwe_array->lwe_dimension)
PANIC("Cuda error: input and output lwe_dimension must be the same")
cuda_set_device(gpu_indexes[0]);
auto lwe_dimension = input_lwe_array->lwe_dimension;
auto num_radix_blocks = input_lwe_array->num_radix_blocks;
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = lwe_dimension + 1;
// Create a 1-dimensional grid of threads
int num_blocks = 0, num_threads = 0;
int num_entries = num_radix_blocks * lwe_size;
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
dim3 grid(num_blocks, 1, 1);
dim3 thds(num_threads, 1, 1);
device_small_scalar_radix_multiplication<<<grid, thds, 0, streams[0]>>>(
(T *)output_lwe_array->ptr, (T *)input_lwe_array->ptr, scalar,
lwe_dimension, num_radix_blocks);
check_cuda_error(cudaGetLastError());
for (int i = 0; i < num_radix_blocks; i++) {
output_lwe_array->noise_levels[i] =
input_lwe_array->noise_levels[i] * scalar;
output_lwe_array->degrees[i] = input_lwe_array->degrees[i] * scalar;
}
}
#endif

View File

@@ -57,9 +57,9 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
// one block is responsible to process single lwe ciphertext
if (mem->shift_type == LEFT_SHIFT) {
// rotate right as the blocks are from LSB to MSB
legacy_host_radix_blocks_rotate_right<Torus>(
streams, gpu_indexes, gpu_count, rotated_buffer, lwe_array, rotations,
num_blocks, big_lwe_size);
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
rotated_buffer, lwe_array, rotations,
num_blocks, big_lwe_size);
cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer,
num_blocks * big_lwe_size_bytes, streams[0],
@@ -71,22 +71,22 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
auto receiver_blocks = lwe_array;
auto giver_blocks = rotated_buffer;
legacy_host_radix_blocks_rotate_right<Torus>(
streams, gpu_indexes, gpu_count, giver_blocks, lwe_array, 1, num_blocks,
big_lwe_size);
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
giver_blocks, lwe_array, 1,
num_blocks, big_lwe_size);
auto lut_bivariate = mem->lut_buffers_bivariate[shift_within_block - 1];
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array, receiver_blocks,
giver_blocks, bsks, ksks, num_blocks, lut_bivariate,
lut_bivariate->params.message_modulus);
} else {
// rotate left as the blocks are from LSB to MSB
legacy_host_radix_blocks_rotate_left<Torus>(
streams, gpu_indexes, gpu_count, rotated_buffer, lwe_array, rotations,
num_blocks, big_lwe_size);
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
rotated_buffer, lwe_array, rotations,
num_blocks, big_lwe_size);
cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer,
num_blocks * big_lwe_size_bytes, streams[0],
@@ -98,13 +98,13 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
auto receiver_blocks = lwe_array;
auto giver_blocks = rotated_buffer;
legacy_host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
giver_blocks, lwe_array, 1,
num_blocks, big_lwe_size);
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
giver_blocks, lwe_array, 1, num_blocks,
big_lwe_size);
auto lut_bivariate = mem->lut_buffers_bivariate[shift_within_block - 1];
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array, receiver_blocks,
giver_blocks, bsks, ksks, num_blocks, lut_bivariate,
lut_bivariate->params.message_modulus);

View File

@@ -53,9 +53,9 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(
if (mem->shift_type == LEFT_SHIFT) {
// rotate right as the blocks are from LSB to MSB
legacy_host_radix_blocks_rotate_right<Torus>(
streams, gpu_indexes, gpu_count, rotated_buffer, lwe_array, rotations,
num_blocks, big_lwe_size);
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
rotated_buffer, lwe_array, rotations,
num_blocks, big_lwe_size);
// create trivial assign for value = 0
cuda_memset_async(rotated_buffer, 0, rotations * big_lwe_size_bytes,
@@ -75,7 +75,7 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(
size_t partial_block_count = num_blocks - rotations;
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, partial_current_blocks,
partial_current_blocks, partial_previous_blocks, bsks, ksks,
partial_block_count, lut_bivariate,
@@ -83,9 +83,9 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(
} else {
// right shift
legacy_host_radix_blocks_rotate_left<Torus>(
streams, gpu_indexes, gpu_count, rotated_buffer, lwe_array, rotations,
num_blocks, big_lwe_size);
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
rotated_buffer, lwe_array, rotations,
num_blocks, big_lwe_size);
// rotate left as the blocks are from LSB to MSB
// create trivial assign for value = 0
@@ -106,7 +106,7 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(
size_t partial_block_count = num_blocks - rotations;
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, partial_current_blocks,
partial_current_blocks, partial_next_blocks, bsks, ksks,
partial_block_count, lut_bivariate,
@@ -156,9 +156,9 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
Torus *last_block_copy = &padding_block[big_lwe_size];
if (mem->shift_type == RIGHT_SHIFT) {
legacy_host_radix_blocks_rotate_left<Torus>(
streams, gpu_indexes, gpu_count, rotated_buffer, lwe_array, rotations,
num_blocks, big_lwe_size);
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
rotated_buffer, lwe_array, rotations,
num_blocks, big_lwe_size);
cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer,
num_blocks * big_lwe_size_bytes, streams[0],
gpu_indexes[0]);
@@ -200,7 +200,7 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
size_t partial_block_count = num_blocks - rotations;
auto lut_bivariate = mem->lut_buffers_bivariate[shift_within_block - 1];
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, partial_current_blocks,
partial_current_blocks, partial_next_blocks, bsks, ksks,
partial_block_count, lut_bivariate,
@@ -213,7 +213,7 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
}
auto lut_univariate_padding_block =
mem->lut_buffers_univariate[num_bits_in_block - 1];
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem->local_streams_1, gpu_indexes, gpu_count, padding_block,
last_block_copy, bsks, ksks, 1, lut_univariate_padding_block);
// Replace blocks 'pulled' from the left with the correct padding
@@ -227,7 +227,7 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
if (shift_within_block != 0) {
auto lut_univariate_shift_last_block =
mem->lut_buffers_univariate[shift_within_block - 1];
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem->local_streams_2, gpu_indexes, gpu_count, last_block,
last_block_copy, bsks, ksks, 1, lut_univariate_shift_last_block);
}

View File

@@ -22,13 +22,15 @@ void scratch_cuda_integer_radix_shift_and_rotate_kb_64(
void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI const *lwe_shift,
int8_t *mem_ptr, void *const *bsks, void *const *ksks) {
void *lwe_array, void const *lwe_shift, int8_t *mem_ptr, void *const *bsks,
void *const *ksks, uint32_t num_blocks) {
host_integer_radix_shift_and_rotate_kb_inplace<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array, lwe_shift,
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array),
static_cast<const uint64_t *>(lwe_shift),
(int_shift_and_rotate_buffer<uint64_t> *)mem_ptr, bsks,
(uint64_t **)(ksks));
(uint64_t **)(ksks), num_blocks);
}
void cleanup_cuda_integer_radix_shift_and_rotate(void *const *streams,

View File

@@ -26,22 +26,9 @@ __host__ void scratch_cuda_integer_radix_shift_and_rotate_kb(
template <typename Torus>
__host__ void host_integer_radix_shift_and_rotate_kb_inplace(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array,
CudaRadixCiphertextFFI const *lwe_shift,
uint32_t gpu_count, Torus *lwe_array, Torus const *lwe_shift,
int_shift_and_rotate_buffer<Torus> *mem, void *const *bsks,
Torus *const *ksks) {
cuda_set_device(gpu_indexes[0]);
if (lwe_array->num_radix_blocks != lwe_shift->num_radix_blocks)
PANIC("Cuda error: lwe_shift and lwe_array num radix blocks must be "
"the same")
if (lwe_array->lwe_dimension != lwe_shift->lwe_dimension)
PANIC("Cuda error: lwe_shift and lwe_array lwe_dimension must be "
"the same")
auto num_radix_blocks = lwe_array->num_radix_blocks;
Torus *const *ksks, uint32_t num_radix_blocks) {
uint32_t bits_per_block = log2_int(mem->params.message_modulus);
uint32_t total_nb_bits = bits_per_block * num_radix_blocks;
if (total_nb_bits == 0)
@@ -51,14 +38,10 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
auto big_lwe_size = big_lwe_dimension + 1;
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
if (lwe_array->lwe_dimension != big_lwe_dimension)
PANIC("Cuda error: lwe_shift lwe_dimension must be equal to "
"big_lwe_dimension")
// Extract all bits
auto bits = mem->tmp_bits;
extract_n_bits<Torus>(streams, gpu_indexes, gpu_count, bits, lwe_array, bsks,
ksks, num_radix_blocks * bits_per_block,
ksks, num_radix_blocks, bits_per_block,
mem->bit_extract_luts);
// Extract shift bits
@@ -78,14 +61,13 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
// Extracts bits and put them in the bit index 2 (=> bit number 3)
// so that it is already aligned to the correct position of the cmux input
// and we reduce noise growth
extract_n_bits<Torus>(streams, gpu_indexes, gpu_count, shift_bits, lwe_shift,
bsks, ksks, max_num_bits_that_tell_shift,
extract_n_bits<Torus>(streams, gpu_indexes, gpu_count, shift_bits,
(Torus *)lwe_shift, bsks, ksks, 1,
max_num_bits_that_tell_shift,
mem->bit_extract_luts_with_offset_2);
// If signed, do an "arithmetic shift" by padding with the sign bit
CudaRadixCiphertextFFI last_bit;
as_radix_ciphertext_slice<Torus>(&last_bit, bits, (total_nb_bits - 1),
(total_nb_bits - 1));
auto last_bit = bits + (total_nb_bits - 1) * big_lwe_size;
// Apply op
auto rotated_input = mem->tmp_rotated;
@@ -94,76 +76,60 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
auto mux_lut = mem->mux_lut;
auto mux_inputs = mem->tmp_mux_inputs;
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], input_bits_a,
bits);
cuda_memcpy_async_gpu_to_gpu(input_bits_a, bits,
total_nb_bits * big_lwe_size_bytes, streams[0],
gpu_indexes[0]);
for (int d = 0; d < max_num_bits_that_tell_shift; d++) {
CudaRadixCiphertextFFI shift_bit;
as_radix_ciphertext_slice<Torus>(&shift_bit, shift_bits, d, d);
auto shift_bit = shift_bits + d * big_lwe_size;
cuda_memcpy_async_gpu_to_gpu(input_bits_b, input_bits_a,
total_nb_bits * big_lwe_size_bytes, streams[0],
gpu_indexes[0]);
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], input_bits_b,
input_bits_a);
auto rotations = 1 << d;
switch (mem->shift_type) {
case LEFT_SHIFT:
// rotate right as the blocks are from LSB to MSB
if (input_bits_b->num_radix_blocks != total_nb_bits)
PANIC("Cuda error: incorrect number of blocks")
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
rotated_input, input_bits_b,
rotations);
host_radix_blocks_rotate_right<Torus>(
streams, gpu_indexes, gpu_count, rotated_input, input_bits_b,
rotations, total_nb_bits, big_lwe_size);
cuda_memset_async((Torus *)rotated_input->ptr, 0,
rotations * big_lwe_size_bytes, streams[0],
gpu_indexes[0]);
memset(rotated_input->degrees, 0, rotations * sizeof(uint64_t));
memset(rotated_input->noise_levels, 0, rotations * sizeof(uint64_t));
if (mem->is_signed && mem->shift_type == RIGHT_SHIFT)
for (int i = 0; i < rotations; i++)
cuda_memcpy_async_gpu_to_gpu(rotated_input + i * big_lwe_size,
last_bit, big_lwe_size_bytes, streams[0],
gpu_indexes[0]);
else
cuda_memset_async(rotated_input, 0, rotations * big_lwe_size_bytes,
streams[0], gpu_indexes[0]);
break;
case RIGHT_SHIFT:
// rotate left as the blocks are from LSB to MSB
if (input_bits_b->num_radix_blocks != total_nb_bits)
PANIC("Cuda error: incorrect number of blocks")
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
rotated_input, input_bits_b,
rotations);
host_radix_blocks_rotate_left<Torus>(
streams, gpu_indexes, gpu_count, rotated_input, input_bits_b,
rotations, total_nb_bits, big_lwe_size);
if (mem->is_signed)
for (int i = 0; i < rotations; i++) {
CudaRadixCiphertextFFI slice_rotated_input;
as_radix_ciphertext_slice<Torus>(&slice_rotated_input, rotated_input,
(total_nb_bits - rotations + i),
(total_nb_bits - rotations + i));
for (int i = 0; i < rotations; i++)
cuda_memcpy_async_gpu_to_gpu(
(Torus *)slice_rotated_input.ptr, (Torus *)last_bit.ptr,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
slice_rotated_input.degrees[0] = last_bit.degrees[0];
slice_rotated_input.noise_levels[0] = last_bit.noise_levels[0];
}
else {
CudaRadixCiphertextFFI slice_rotated_input;
as_radix_ciphertext_slice<Torus>(&slice_rotated_input, rotated_input,
(total_nb_bits - rotations),
total_nb_bits);
cuda_memset_async(slice_rotated_input.ptr, 0,
rotations * big_lwe_size_bytes, streams[0],
gpu_indexes[0]);
memset(slice_rotated_input.degrees, 0, rotations * sizeof(uint64_t));
memset(slice_rotated_input.noise_levels, 0,
rotations * sizeof(uint64_t));
}
rotated_input + (total_nb_bits - rotations + i) * big_lwe_size,
last_bit, big_lwe_size_bytes, streams[0], gpu_indexes[0]);
else
cuda_memset_async(
rotated_input + (total_nb_bits - rotations) * big_lwe_size, 0,
rotations * big_lwe_size_bytes, streams[0], gpu_indexes[0]);
break;
case LEFT_ROTATE:
// rotate right as the blocks are from LSB to MSB
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
rotated_input, input_bits_b,
rotations);
host_radix_blocks_rotate_right<Torus>(
streams, gpu_indexes, gpu_count, rotated_input, input_bits_b,
rotations, total_nb_bits, big_lwe_size);
break;
case RIGHT_ROTATE:
// rotate left as the blocks are from LSB to MSB
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
rotated_input, input_bits_b,
rotations);
host_radix_blocks_rotate_left<Torus>(
streams, gpu_indexes, gpu_count, rotated_input, input_bits_b,
rotations, total_nb_bits, big_lwe_size);
break;
default:
PANIC("Unknown operation")
@@ -171,60 +137,61 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
// host_pack bits into one block so that we have
// control_bit|b|a
pack_bivariate_blocks<Torus>(
streams, gpu_indexes, gpu_count, (Torus *)mux_inputs->ptr,
mux_lut->lwe_indexes_out, (Torus *)rotated_input->ptr,
(Torus *)input_bits_a->ptr, mux_lut->lwe_indexes_in, big_lwe_dimension,
2, total_nb_bits);
pack_bivariate_blocks<Torus>(streams, gpu_indexes, gpu_count, mux_inputs,
mux_lut->lwe_indexes_out, rotated_input,
input_bits_a, mux_lut->lwe_indexes_in,
big_lwe_dimension, 2, total_nb_bits);
// The shift bit is already properly aligned/positioned
host_add_the_same_block_to_all_blocks<Torus>(
streams[0], gpu_indexes[0], mux_inputs, mux_inputs, &shift_bit);
for (int i = 0; i < total_nb_bits; i++)
host_addition<Torus>(streams[0], gpu_indexes[0],
mux_inputs + i * big_lwe_size,
mux_inputs + i * big_lwe_size, shift_bit,
mem->params.big_lwe_dimension, 1);
// we have
// control_bit|b|a
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, input_bits_a, mux_inputs, bsks, ksks,
mux_lut, total_nb_bits);
total_nb_bits, mux_lut);
}
// Initializes the output
// Copy the last bit for each radix block
auto lwe_last_out = lwe_array;
last_bit = input_bits_a + (bits_per_block - 1) * big_lwe_size;
for (int i = 0; i < num_radix_blocks; i++) {
CudaRadixCiphertextFFI last_bit, lwe_last_out;
as_radix_ciphertext_slice<Torus>(&last_bit, input_bits_a,
(bits_per_block - 1) + i * bits_per_block,
(bits_per_block - 1) + i * bits_per_block);
as_radix_ciphertext_slice<Torus>(&lwe_last_out, lwe_array, i, i);
cuda_memcpy_async_gpu_to_gpu(lwe_last_out, last_bit, big_lwe_size_bytes,
streams[0], gpu_indexes[0]);
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0],
&lwe_last_out, &last_bit);
lwe_last_out += big_lwe_size;
last_bit += bits_per_block * big_lwe_size;
}
// Bitshift and add the other bits
lwe_last_out = lwe_array;
for (int i = bits_per_block - 2; i >= 0; i--) {
host_integer_small_scalar_mul_radix<Torus>(streams, gpu_indexes, gpu_count,
lwe_array, lwe_array, 2);
host_integer_small_scalar_mul_radix<Torus>(
streams, gpu_indexes, gpu_count, lwe_last_out, lwe_last_out, 2,
big_lwe_dimension, num_radix_blocks);
auto block = lwe_last_out;
auto bit_to_add = input_bits_a + i * big_lwe_size;
CudaRadixCiphertextFFI bit_to_add;
as_radix_ciphertext_slice<Torus>(&bit_to_add, input_bits_a, i, i);
for (int j = 0; j < num_radix_blocks; j++) {
CudaRadixCiphertextFFI block;
as_radix_ciphertext_slice<Torus>(&block, lwe_array, j, j);
host_addition<Torus>(streams[0], gpu_indexes[0], block, block, bit_to_add,
big_lwe_dimension, 1);
host_addition<Torus>(streams[0], gpu_indexes[0], &block, &block,
&bit_to_add);
as_radix_ciphertext_slice<Torus>(&bit_to_add, &bit_to_add, bits_per_block,
bits_per_block);
block += big_lwe_size;
bit_to_add += bits_per_block * big_lwe_size;
}
// To give back a clean ciphertext
auto cleaning_lut = mem->cleaning_lut;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array, lwe_array, bsks, ksks,
cleaning_lut, lwe_array->num_radix_blocks);
streams, gpu_indexes, gpu_count, lwe_last_out, lwe_last_out, bsks, ksks,
num_radix_blocks, cleaning_lut);
}
}
#endif

View File

@@ -1,13 +1,20 @@
#include "integer/integer.h"
#include "linearalgebra/addition.cuh"
/*
* Perform the addition of two u32 input LWE ciphertext vectors.
* See the equivalent operation on u64 ciphertexts for more details.
*/
void cuda_add_lwe_ciphertext_vector_32(
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_in_1,
CudaRadixCiphertextFFI const *lwe_array_in_2) {
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in_1, void const *lwe_array_in_2,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
host_addition<uint32_t>(static_cast<cudaStream_t>(stream), gpu_index,
lwe_array_out, lwe_array_in_1, lwe_array_in_2);
static_cast<uint32_t *>(lwe_array_out),
static_cast<const uint32_t *>(lwe_array_in_1),
static_cast<const uint32_t *>(lwe_array_in_2),
input_lwe_dimension, input_lwe_ciphertext_count);
}
/*
@@ -37,12 +44,16 @@ void cuda_add_lwe_ciphertext_vector_32(
* that performs the operation on the GPU.
*/
void cuda_add_lwe_ciphertext_vector_64(
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_out,
CudaRadixCiphertextFFI const *lwe_array_in_1,
CudaRadixCiphertextFFI const *lwe_array_in_2) {
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in_1, void const *lwe_array_in_2,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
host_addition<uint64_t>(static_cast<cudaStream_t>(stream), gpu_index,
lwe_array_out, lwe_array_in_1, lwe_array_in_2);
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_array_in_1),
static_cast<const uint64_t *>(lwe_array_in_2),
input_lwe_dimension, input_lwe_ciphertext_count);
}
/*

View File

@@ -8,7 +8,6 @@
#include "device.h"
#include "helper_multi_gpu.h"
#include "integer/integer.h"
#include "linear_algebra.h"
#include "utils/kernel_dimensions.cuh"
#include <stdio.h>
@@ -51,7 +50,7 @@ __host__ void host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index,
const uint32_t lwe_dimension,
const uint32_t lwe_ciphertext_count) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
int num_blocks = 0, num_threads = 0;
int num_entries = lwe_ciphertext_count;
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
@@ -72,7 +71,7 @@ __host__ void host_addition_plaintext_scalar(
const T plaintext_input, const uint32_t lwe_dimension,
const uint32_t lwe_ciphertext_count) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
int num_blocks = 0, num_threads = 0;
int num_entries = lwe_ciphertext_count;
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
@@ -101,105 +100,12 @@ __global__ void addition(T *output, T const *input_1, T const *input_2,
// Coefficient-wise addition
template <typename T>
__host__ void host_addition(cudaStream_t stream, uint32_t gpu_index,
CudaRadixCiphertextFFI *output,
CudaRadixCiphertextFFI const *input_1,
CudaRadixCiphertextFFI const *input_2) {
if (output->num_radix_blocks != input_1->num_radix_blocks ||
output->num_radix_blocks != input_2->num_radix_blocks)
PANIC("Cuda error: input and output num radix blocks must be the same")
if (output->lwe_dimension != input_1->lwe_dimension ||
output->lwe_dimension != input_2->lwe_dimension)
PANIC("Cuda error: input and output num radix blocks must be the same")
__host__ void host_addition(cudaStream_t stream, uint32_t gpu_index, T *output,
T const *input_1, T const *input_2,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
cuda_set_device(gpu_index);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = output->lwe_dimension + 1;
// Create a 1-dimensional grid of threads
int num_blocks = 0, num_threads = 0;
int num_entries = output->num_radix_blocks * lwe_size;
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
dim3 grid(num_blocks, 1, 1);
dim3 thds(num_threads, 1, 1);
addition<T><<<grid, thds, 0, stream>>>(
static_cast<T *>(output->ptr), static_cast<const T *>(input_1->ptr),
static_cast<const T *>(input_2->ptr), num_entries);
check_cuda_error(cudaGetLastError());
for (uint i = 0; i < output->num_radix_blocks; i++) {
output->degrees[i] = input_1->degrees[i] + input_2->degrees[i];
output->noise_levels[i] =
input_1->noise_levels[i] + input_2->noise_levels[i];
}
}
template <typename T>
__global__ void constant_addition(T *output, T const *input_1, T const *input_2,
uint32_t lwe_size, uint32_t num_entries) {
int tid = threadIdx.x;
int index = blockIdx.x * blockDim.x + tid;
if (index < num_entries) {
// Here we take advantage of the wrapping behaviour of uint
output[index] = input_1[index] + input_2[index % lwe_size];
}
}
// Coefficient-wise addition by the same block
// input_with_multiple_blocks is a radix ciphertext with potentially multiple
// blocks input_with_single_block is a radix ciphertext with a single block
//
// This function adds the single block in input_with_single_block to each block
// in input_with_multiple_blocks. The result is written to output
template <typename T>
__host__ void host_add_the_same_block_to_all_blocks(
cudaStream_t stream, uint32_t gpu_index, CudaRadixCiphertextFFI *output,
CudaRadixCiphertextFFI const *input_with_multiple_blocks,
CudaRadixCiphertextFFI const *input_with_single_block) {
if (output->num_radix_blocks != input_with_multiple_blocks->num_radix_blocks)
PANIC("Cuda error: input and output num radix blocks must be the same")
if (input_with_single_block->num_radix_blocks != 1)
PANIC(
"Cuda error: input_with_single_block must be a single-block ciphertext")
if (output->lwe_dimension != input_with_multiple_blocks->lwe_dimension ||
output->lwe_dimension != input_with_single_block->lwe_dimension)
PANIC("Cuda error: input and output lwe dimensions must be the same")
cuda_set_device(gpu_index);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = output->lwe_dimension + 1;
// Create a 1-dimensional grid of threads
int num_blocks = 0, num_threads = 0;
int num_entries = output->num_radix_blocks * lwe_size;
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
dim3 grid(num_blocks, 1, 1);
dim3 thds(num_threads, 1, 1);
constant_addition<T><<<grid, thds, 0, stream>>>(
static_cast<T *>(output->ptr),
static_cast<const T *>(input_with_multiple_blocks->ptr),
static_cast<const T *>(input_with_single_block->ptr), lwe_size,
num_entries);
check_cuda_error(cudaGetLastError());
for (uint i = 0; i < output->num_radix_blocks; i++) {
output->degrees[i] = input_with_multiple_blocks->degrees[i] +
input_with_single_block->degrees[0];
output->noise_levels[i] = input_with_multiple_blocks->noise_levels[i] +
input_with_single_block->noise_levels[0];
}
}
// Coefficient-wise addition
template <typename T>
__host__ void legacy_host_addition(cudaStream_t stream, uint32_t gpu_index,
T *output, T const *input_1,
T const *input_2,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = input_lwe_dimension + 1;
@@ -235,7 +141,7 @@ __host__ void host_pack_for_overflowing_ops(cudaStream_t stream,
uint32_t input_lwe_ciphertext_count,
uint32_t message_modulus) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = input_lwe_dimension + 1;
@@ -273,7 +179,7 @@ __host__ void host_subtraction(cudaStream_t stream, uint32_t gpu_index,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = input_lwe_dimension + 1;
@@ -311,7 +217,7 @@ __host__ void host_subtraction_plaintext(cudaStream_t stream,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
int num_blocks = 0, num_threads = 0;
int num_entries = input_lwe_ciphertext_count;
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
@@ -357,7 +263,7 @@ __host__ void host_unchecked_sub_with_correcting_term(
uint32_t input_lwe_ciphertext_count, uint32_t message_modulus,
uint32_t carry_modulus, uint32_t degree) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = input_lwe_dimension + 1;

View File

@@ -34,7 +34,7 @@ __host__ void host_cleartext_vec_multiplication(
T const *cleartext_input, const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = input_lwe_dimension + 1;
@@ -70,7 +70,7 @@ host_cleartext_multiplication(cudaStream_t stream, uint32_t gpu_index,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = input_lwe_dimension + 1;

View File

@@ -26,7 +26,7 @@ __host__ void host_negation(cudaStream_t stream, uint32_t gpu_index, T *output,
T const *input, const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
// lwe_size includes the presence of the body
// whereas lwe_dimension is the number of elements in the mask
int lwe_size = input_lwe_dimension + 1;

View File

@@ -96,7 +96,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
uint32_t total_polynomials) {
auto stream = static_cast<cudaStream_t>(stream_v);
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
auto input1 = (double2 *)_input1;
auto input2 = (double2 *)_input2;
auto output = (double2 *)_output;
@@ -106,12 +106,10 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
int gridSize = total_polynomials;
int blockSize = polynomial_size / choose_opt_amortized(polynomial_size);
int max_shared_memory = cuda_get_max_shared_memory(gpu_index);
double2 *buffer;
switch (polynomial_size) {
case 256:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
check_cuda_error(cudaFuncSetAttribute(
batch_polynomial_mul<FFTDegree<AmortizedDegree<256>, ForwardFFT>,
@@ -132,7 +130,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
}
break;
case 512:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
check_cuda_error(cudaFuncSetAttribute(
batch_polynomial_mul<FFTDegree<AmortizedDegree<521>, ForwardFFT>,
@@ -153,7 +151,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
}
break;
case 1024:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
check_cuda_error(cudaFuncSetAttribute(
batch_polynomial_mul<FFTDegree<AmortizedDegree<1024>, ForwardFFT>,
@@ -174,7 +172,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
}
break;
case 2048:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
check_cuda_error(cudaFuncSetAttribute(
batch_polynomial_mul<FFTDegree<AmortizedDegree<2048>, ForwardFFT>,
@@ -195,7 +193,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
}
break;
case 4096:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
check_cuda_error(cudaFuncSetAttribute(
batch_polynomial_mul<FFTDegree<AmortizedDegree<4096>, ForwardFFT>,
@@ -216,7 +214,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
}
break;
case 8192:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
check_cuda_error(cudaFuncSetAttribute(
batch_polynomial_mul<FFTDegree<AmortizedDegree<8192>, ForwardFFT>,
@@ -237,7 +235,7 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
}
break;
case 16384:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
check_cuda_error(cudaFuncSetAttribute(
batch_polynomial_mul<FFTDegree<AmortizedDegree<16384>, ForwardFFT>,

View File

@@ -78,7 +78,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
double2 *dest, ST const *src,
uint32_t polynomial_size,
uint32_t total_polynomials) {
cuda_set_device(gpu_index);
cudaSetDevice(gpu_index);
int shared_memory_size = sizeof(double) * polynomial_size;
// Here the buffer size is the size of double2 times the number of polynomials
@@ -111,12 +111,10 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
cuda_memcpy_async_to_gpu(d_bsk, h_bsk, buffer_size, stream, gpu_index);
int max_shared_memory = cuda_get_max_shared_memory(gpu_index);
double2 *buffer = (double2 *)cuda_malloc_async(0, stream, gpu_index);
switch (polynomial_size) {
case 256:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
check_cuda_error(cudaFuncSetAttribute(
batch_NSMFFT<FFTDegree<AmortizedDegree<256>, ForwardFFT>, FULLSM>,
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
@@ -134,7 +132,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
}
break;
case 512:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
check_cuda_error(cudaFuncSetAttribute(
batch_NSMFFT<FFTDegree<AmortizedDegree<512>, ForwardFFT>, FULLSM>,
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
@@ -152,7 +150,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
}
break;
case 1024:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
check_cuda_error(cudaFuncSetAttribute(
batch_NSMFFT<FFTDegree<AmortizedDegree<1024>, ForwardFFT>, FULLSM>,
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
@@ -170,7 +168,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
}
break;
case 2048:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
check_cuda_error(cudaFuncSetAttribute(
batch_NSMFFT<FFTDegree<AmortizedDegree<2048>, ForwardFFT>, FULLSM>,
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
@@ -188,7 +186,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
}
break;
case 4096:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
check_cuda_error(cudaFuncSetAttribute(
batch_NSMFFT<FFTDegree<AmortizedDegree<4096>, ForwardFFT>, FULLSM>,
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
@@ -206,7 +204,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
}
break;
case 8192:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
check_cuda_error(cudaFuncSetAttribute(
batch_NSMFFT<FFTDegree<AmortizedDegree<8192>, ForwardFFT>, FULLSM>,
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
@@ -224,7 +222,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
}
break;
case 16384:
if (shared_memory_size <= max_shared_memory) {
if (shared_memory_size <= cuda_get_max_shared_memory(0)) {
check_cuda_error(cudaFuncSetAttribute(
batch_NSMFFT<FFTDegree<AmortizedDegree<16384>, ForwardFFT>, FULLSM>,
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));

Some files were not shown because too many files have changed in this diff Show More