mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-04-28 03:01:21 -04:00
Compare commits
3 Commits
pa/fix/com
...
dt/bench/f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cae938a75b | ||
|
|
bae1d1cf77 | ||
|
|
a3bc1a9d9e |
63
.github/actions/gpu_setup/action.yml
vendored
63
.github/actions/gpu_setup/action.yml
vendored
@@ -1,63 +0,0 @@
|
||||
name: Setup Cuda
|
||||
description: Setup Cuda on Hyperstack or GitHub instance
|
||||
|
||||
inputs:
|
||||
cuda-version:
|
||||
description: Version of Cuda to use
|
||||
required: true
|
||||
gcc-version:
|
||||
description: Version of GCC to use
|
||||
required: true
|
||||
cmake-version:
|
||||
description: Version of cmake to use
|
||||
default: 3.29.6
|
||||
github-instance:
|
||||
description: Instance is hosted on GitHub
|
||||
default: 'false'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt update
|
||||
curl -fsSL https://apt.kitware.com/keys/kitware-archive-latest.asc | sudo gpg --dearmour -o /etc/apt/trusted.gpg.d/kitware.gpg
|
||||
sudo chmod 644 /etc/apt/trusted.gpg.d/kitware.gpg
|
||||
echo 'deb [signed-by=/etc/apt/trusted.gpg.d/kitware.gpg] https://apt.kitware.com/ubuntu/ jammy main' | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null
|
||||
sudo apt update
|
||||
sudo apt install -y cmake cmake-format libclang-dev
|
||||
|
||||
- name: Install CUDA
|
||||
if: inputs.github-instance == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
TOOLKIT_VERSION="$(echo ${{ inputs.cuda-version }} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt update
|
||||
sudo apt -y install cuda-toolkit-${TOOLKIT_VERSION}
|
||||
|
||||
- name: Export CUDA variables
|
||||
shell: bash
|
||||
run: |
|
||||
CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDA_MODULE_LOADER=EAGER" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
shell: bash
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
|
||||
echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
shell: bash
|
||||
run: nvidia-smi
|
||||
53
.github/actions/hyperstack_setup/action.yml
vendored
Normal file
53
.github/actions/hyperstack_setup/action.yml
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
name: Setup Cuda
|
||||
description: Setup Cuda on Hyperstack instance
|
||||
|
||||
inputs:
|
||||
cuda-version:
|
||||
description: Version of Cuda to use
|
||||
required: true
|
||||
gcc-version:
|
||||
description: Version of GCC to use
|
||||
required: true
|
||||
cmake-version:
|
||||
description: Version of cmake to use
|
||||
default: 3.29.6
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ inputs.cmake-version }}/cmake-${{ inputs.cmake-version }}.tar.gz
|
||||
tar -zxvf cmake-${{ inputs.cmake-version }}.tar.gz
|
||||
cd cmake-${{ inputs.cmake-version }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Export CUDA variables
|
||||
shell: bash
|
||||
run: |
|
||||
CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ inputs.cuda-version }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
shell: bash
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
|
||||
echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
shell: bash
|
||||
run: nvidia-smi
|
||||
@@ -11,10 +11,6 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -26,11 +22,10 @@ jobs:
|
||||
name: Setup instance (backward-compat-tests)
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -40,18 +35,11 @@ jobs:
|
||||
backend: aws
|
||||
profile: cpu-small
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
backward-compat-tests:
|
||||
name: Backward compatibility tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -59,7 +47,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -102,7 +90,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (backward-compat-tests)
|
||||
@@ -110,9 +98,8 @@ jobs:
|
||||
needs: [ setup-instance, backward-compat-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -127,4 +114,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
68
.github/workflows/aws_tfhe_fast_tests.yml
vendored
68
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -11,22 +11,32 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.head_ref || github.ref }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
REF: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
|
||||
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'ci/**'
|
||||
# General entry point for Zama's pull request as well as contribution from forks.
|
||||
pull_request_target:
|
||||
paths:
|
||||
- '**'
|
||||
- '!.github/**'
|
||||
- '!ci/**'
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
|
||||
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
|
||||
@@ -52,19 +62,21 @@ jobs:
|
||||
user_docs_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.user_docs_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
ci_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ci_any_changed }}
|
||||
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -109,9 +121,13 @@ jobs:
|
||||
- '!tfhe/src/c_api/**'
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- README.md
|
||||
ci:
|
||||
- .github/**
|
||||
- ci/**
|
||||
|
||||
- name: Aggregate file changes
|
||||
id: aggregated-changes
|
||||
# CI files are not included in this aggregator.
|
||||
if: ( steps.changed-files.outputs.dependencies_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.csprng_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
|
||||
@@ -126,18 +142,27 @@ jobs:
|
||||
run: |
|
||||
echo "any_changed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# Fail if the triggering actor is not part of Zama organization.
|
||||
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
|
||||
check-user-permission:
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_target' && needs.should-run.outputs.ci_file_changed == 'false')
|
||||
uses: ./.github/workflows/check_triggering_actor.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (fast-tests)
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name != 'workflow_dispatch' && needs.should-run.outputs.any_file_changed == 'true')
|
||||
needs: should-run
|
||||
needs: [ should-run, check-user-permission ]
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -147,13 +172,6 @@ jobs:
|
||||
backend: aws
|
||||
profile: cpu-big
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
fast-tests:
|
||||
name: Fast CPU tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
@@ -166,7 +184,8 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ref: ${{ env.REF }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -265,7 +284,7 @@ jobs:
|
||||
make test_zk
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
@@ -278,9 +297,8 @@ jobs:
|
||||
needs: [ setup-instance, fast-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
|
||||
43
.github/workflows/aws_tfhe_integer_tests.yml
vendored
43
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -14,16 +14,12 @@ env:
|
||||
# nextest
|
||||
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
|
||||
NO_BIG_PARAMS: FALSE
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
types: [labeled]
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
@@ -32,11 +28,12 @@ jobs:
|
||||
should-run:
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
integer_test: ${{ github.event_name == 'workflow_dispatch' ||
|
||||
steps.changed-files.outputs.integer_any_changed }}
|
||||
@@ -45,13 +42,14 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
integer:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -69,15 +67,14 @@ jobs:
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
|
||||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -87,18 +84,11 @@ jobs:
|
||||
backend: aws
|
||||
profile: cpu-big
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
unsigned-integer-tests:
|
||||
name: Unsigned integer tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -106,7 +96,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -140,7 +130,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (unsigned-integer-tests)
|
||||
@@ -148,9 +138,8 @@ jobs:
|
||||
needs: [setup-instance, unsigned-integer-tests]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -165,4 +154,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -14,16 +14,12 @@ env:
|
||||
# nextest
|
||||
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
|
||||
NO_BIG_PARAMS: FALSE
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
types: [labeled]
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
@@ -37,7 +33,7 @@ jobs:
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
integer_test: ${{ github.event_name == 'workflow_dispatch' ||
|
||||
steps.changed-files.outputs.integer_any_changed }}
|
||||
@@ -46,13 +42,14 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
integer:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -70,15 +67,14 @@ jobs:
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
|
||||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -88,18 +84,11 @@ jobs:
|
||||
backend: aws
|
||||
profile: cpu-big
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
signed-integer-tests:
|
||||
name: Signed integer tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -107,7 +96,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -145,7 +134,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (signed-integer-tests)
|
||||
@@ -153,9 +142,8 @@ jobs:
|
||||
needs: [setup-instance, signed-integer-tests]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -170,4 +158,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
37
.github/workflows/aws_tfhe_tests.yml
vendored
37
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -11,10 +11,6 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -31,7 +27,7 @@ jobs:
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
|
||||
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
|
||||
@@ -67,13 +63,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -142,11 +138,10 @@ jobs:
|
||||
needs: should-run
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -156,20 +151,13 @@ jobs:
|
||||
backend: aws
|
||||
profile: cpu-big
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cpu-tests:
|
||||
name: CPU tests
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
needs: [ should-run, setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -177,7 +165,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -252,7 +240,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cpu-tests)
|
||||
@@ -260,9 +248,8 @@ jobs:
|
||||
needs: [ setup-instance, cpu-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -277,4 +264,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
29
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
29
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -10,10 +10,6 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -27,11 +23,10 @@ jobs:
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -41,18 +36,11 @@ jobs:
|
||||
backend: aws
|
||||
profile: cpu-small
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
wasm-tests:
|
||||
name: WASM tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -60,7 +48,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -121,7 +109,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (wasm-tests)
|
||||
@@ -129,9 +117,8 @@ jobs:
|
||||
needs: [ setup-instance, wasm-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -146,4 +133,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
6
.github/workflows/benchmark_boolean.yml
vendored
6
.github/workflows/benchmark_boolean.yml
vendored
@@ -51,8 +51,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -104,8 +103,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
6
.github/workflows/benchmark_core_crypto.yml
vendored
6
.github/workflows/benchmark_core_crypto.yml
vendored
@@ -50,8 +50,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -95,8 +94,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
6
.github/workflows/benchmark_erc20.yml
vendored
6
.github/workflows/benchmark_erc20.yml
vendored
@@ -52,8 +52,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -73,8 +72,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
|
||||
22
.github/workflows/benchmark_gpu_4090.yml
vendored
22
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -17,7 +17,7 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
types: [labeled]
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Friday at 9p.m.
|
||||
- cron: "0 21 * * 5"
|
||||
@@ -33,13 +33,16 @@ jobs:
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -60,8 +63,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run integer benchmarks
|
||||
run: |
|
||||
@@ -97,7 +99,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
cuda-core-crypto-benchmarks:
|
||||
name: Cuda core crypto benchmarks (RTX 4090)
|
||||
@@ -114,8 +116,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -135,8 +136,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run core crypto benchmarks
|
||||
run: |
|
||||
@@ -182,7 +182,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
remove_github_label:
|
||||
name: Remove 4090 bench label
|
||||
|
||||
13
.github/workflows/benchmark_gpu_core_crypto.yml
vendored
13
.github/workflows/benchmark_gpu_core_crypto.yml
vendored
@@ -53,11 +53,10 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
@@ -70,6 +69,11 @@ jobs:
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
@@ -104,8 +108,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
15
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
15
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
@@ -14,7 +14,7 @@ on:
|
||||
type: string
|
||||
required: true
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN:
|
||||
FHE_ACTIONS_TOKEN:
|
||||
required: true
|
||||
SLAB_ACTION_TOKEN:
|
||||
required: true
|
||||
@@ -80,11 +80,10 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
@@ -97,6 +96,11 @@ jobs:
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
@@ -130,8 +134,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
@@ -26,7 +26,7 @@ on:
|
||||
type: boolean
|
||||
default: false
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN:
|
||||
FHE_ACTIONS_TOKEN:
|
||||
required: true
|
||||
SLAB_ACTION_TOKEN:
|
||||
required: true
|
||||
@@ -150,11 +150,10 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
@@ -167,6 +166,11 @@ jobs:
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
@@ -206,8 +210,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
6
.github/workflows/benchmark_integer.yml
vendored
6
.github/workflows/benchmark_integer.yml
vendored
@@ -119,8 +119,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -140,8 +139,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
|
||||
6
.github/workflows/benchmark_shortint.yml
vendored
6
.github/workflows/benchmark_shortint.yml
vendored
@@ -82,8 +82,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -103,8 +102,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
|
||||
@@ -119,8 +119,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -140,8 +139,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
|
||||
3
.github/workflows/benchmark_tfhe_fft.yml
vendored
3
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -94,8 +94,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
3
.github/workflows/benchmark_tfhe_ntt.yml
vendored
3
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -94,8 +94,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
26
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
26
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
@@ -3,14 +3,6 @@ name: tfhe-zk-pok benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
bench_type:
|
||||
description: "Benchmarks type"
|
||||
type: choice
|
||||
default: latency
|
||||
options:
|
||||
- latency
|
||||
- throughput
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
@@ -28,7 +20,6 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
BENCH_TYPE: ${{ inputs.bench_type || 'latency' }}
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -45,8 +36,9 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
zk_pok:
|
||||
- tfhe-zk-pok/**
|
||||
@@ -88,8 +80,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -109,12 +100,11 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make BENCH_TYPE=${{ env.BENCH_TYPE }} bench_tfhe_zk_pok
|
||||
make bench_tfhe_zk_pok
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
@@ -128,8 +118,7 @@ jobs:
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
@@ -142,8 +131,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
14
.github/workflows/benchmark_wasm_client.yml
vendored
14
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
|
||||
steps:
|
||||
@@ -36,13 +36,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
wasm_bench:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -88,8 +88,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -177,8 +176,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
15
.github/workflows/benchmark_zk_pke.yml
vendored
15
.github/workflows/benchmark_zk_pke.yml
vendored
@@ -43,13 +43,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
zk_pok:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -130,8 +130,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -151,8 +150,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
@@ -189,8 +187,7 @@ jobs:
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
|
||||
1
.github/workflows/check_commit.yml
vendored
1
.github/workflows/check_commit.yml
vendored
@@ -2,7 +2,6 @@
|
||||
name: Check commit and PR compliance
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
check-commit-pr:
|
||||
name: Check commit and PR
|
||||
|
||||
29
.github/workflows/check_triggering_actor.yml
vendored
Normal file
29
.github/workflows/check_triggering_actor.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
# Check if triggering actor is a collaborator and has write access
|
||||
name: Check Triggering Actor
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
secrets:
|
||||
TOKEN:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
check-actor-permission:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get User Permission
|
||||
id: check-access
|
||||
uses: actions-cool/check-user-permission@7b90a27f92f3961b368376107661682c441f6103 # v2.3.0
|
||||
with:
|
||||
require: write
|
||||
username: ${{ github.triggering_actor }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.TOKEN }}
|
||||
|
||||
- name: Check User Permission
|
||||
if: steps.check-access.outputs.require-result == 'false'
|
||||
run: |
|
||||
echo "${{ github.triggering_actor }} does not have permissions on this repo."
|
||||
echo "Current permission level is ${{ steps.check-access.outputs.user-permission }}"
|
||||
echo "Job originally triggered by ${{ github.actor }}"
|
||||
exit 1
|
||||
7
.github/workflows/ci_lint.yml
vendored
7
.github/workflows/ci_lint.yml
vendored
@@ -6,7 +6,6 @@ on:
|
||||
|
||||
env:
|
||||
ACTIONLINT_VERSION: 1.6.27
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
jobs:
|
||||
lint-check:
|
||||
@@ -15,9 +14,6 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Get actionlint
|
||||
run: |
|
||||
@@ -31,8 +27,7 @@ jobs:
|
||||
make lint_workflow
|
||||
|
||||
- name: Ensure SHA pinned actions
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@25ed13d0628a1601b4b44048e63cc4328ed03633 # v3.0.22
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@c3a2b64f69b7a1542a68f44d9edbd9ec3fc1455e # v3.0.20
|
||||
with:
|
||||
allowlist: |
|
||||
slsa-framework/slsa-github-generator
|
||||
./
|
||||
|
||||
6
.github/workflows/code_coverage.yml
vendored
6
.github/workflows/code_coverage.yml
vendored
@@ -53,7 +53,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
files_yaml: |
|
||||
tfhe:
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
|
||||
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
make test_integer_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
|
||||
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
29
.github/workflows/csprng_randomness_tests.yml
vendored
29
.github/workflows/csprng_randomness_tests.yml
vendored
@@ -10,10 +10,6 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -27,11 +23,10 @@ jobs:
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -41,18 +36,11 @@ jobs:
|
||||
backend: aws
|
||||
profile: cpu-small
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
csprng-randomness-tests:
|
||||
name: CSPRNG randomness tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -60,7 +48,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -77,7 +65,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (csprng-randomness-tests)
|
||||
@@ -85,9 +73,8 @@ jobs:
|
||||
needs: [ setup-instance, csprng-randomness-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -102,4 +89,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
9
.github/workflows/gpu_4090_tests.yml
vendored
9
.github/workflows/gpu_4090_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an RTX 4090 machine
|
||||
name: Cuda - 4090 full tests
|
||||
name: TFHE Cuda Backend - 4090 full tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,7 +11,6 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -29,7 +28,7 @@ jobs:
|
||||
contains(github.event.label.name, '4090_test') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
|
||||
@@ -38,7 +37,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -81,4 +80,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
49
.github/workflows/gpu_fast_h100_tests.yml
vendored
49
.github/workflows/gpu_fast_h100_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
|
||||
name: Cuda - Fast tests on H100
|
||||
name: TFHE Cuda Backend - Fast tests on H100
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -12,22 +12,18 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -35,13 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -68,11 +64,10 @@ jobs:
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -82,20 +77,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -111,14 +99,17 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -151,11 +142,10 @@ jobs:
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
@@ -163,9 +153,8 @@ jobs:
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -180,4 +169,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
49
.github/workflows/gpu_fast_tests.yml
vendored
49
.github/workflows/gpu_fast_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||
name: Cuda - Fast tests
|
||||
name: TFHE Cuda Backend - Fast tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -12,10 +12,6 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -26,7 +22,7 @@ jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -34,13 +30,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -62,15 +58,14 @@ jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-tests)
|
||||
needs: should-run
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
if: github.event_name != 'pull_request' ||
|
||||
needs.should-run.outputs.gpu_test == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -80,20 +75,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: gpu-test
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -109,14 +97,17 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -149,11 +140,10 @@ jobs:
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
@@ -161,9 +151,8 @@ jobs:
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -178,4 +167,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
15
.github/workflows/gpu_full_h100_tests.yml
vendored
15
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
|
||||
name: Cuda - Full tests on H100
|
||||
name: TFHE Cuda Backend - Full tests on H100
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,6 +11,7 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -65,14 +66,18 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
@@ -105,7 +110,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
@@ -128,4 +133,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
47
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
47
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||
name: Cuda - Full tests multi-GPU
|
||||
name: TFHE Cuda Backend - Full tests multi-GPU
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -12,10 +12,6 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -27,7 +23,7 @@ jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -35,13 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -68,11 +64,10 @@ jobs:
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -82,20 +77,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: multi-gpu-test
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA multi-GPU tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -111,14 +99,17 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -154,11 +145,10 @@ jobs:
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests-multi-gpu)
|
||||
@@ -166,9 +156,8 @@ jobs:
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -183,4 +172,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -59,11 +59,15 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
|
||||
44
.github/workflows/gpu_pcc.yml
vendored
44
.github/workflows/gpu_pcc.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Perfom tfhe-cuda-backend post-commit checks on an AWS instance
|
||||
name: Cuda - Post-commit Checks
|
||||
name: TFHE Cuda Backend - Post-commit Checks
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,10 +11,6 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16-22.04"
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -24,11 +20,10 @@ jobs:
|
||||
name: Setup instance (cuda-pcc)
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -38,18 +33,11 @@ jobs:
|
||||
backend: aws
|
||||
profile: gpu-build
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-pcc:
|
||||
name: CUDA post-commit checks
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -68,17 +56,11 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install CUDA
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
shell: bash
|
||||
- name: Set up home
|
||||
run: |
|
||||
TOOLKIT_VERSION="$(echo ${{ matrix.cuda }} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt update
|
||||
sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -101,6 +83,7 @@ jobs:
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run fmt checks
|
||||
@@ -112,12 +95,12 @@ jobs:
|
||||
make pcc_gpu
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-pcc)
|
||||
@@ -125,9 +108,8 @@ jobs:
|
||||
needs: [ setup-instance, cuda-pcc ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -142,4 +124,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Signed integer GPU tests on an RTXA6000 VM on hyperstack with classical PBS
|
||||
name: Cuda - Signed integer tests with classical PBS
|
||||
name: TFHE Cuda Backend - Signed integer tests with classical PBS
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -12,22 +12,18 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -35,13 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -68,11 +64,10 @@ jobs:
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -82,20 +77,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: gpu-test
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA signed integer tests with classical PBS
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -109,16 +97,16 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -137,11 +125,10 @@ jobs:
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-signed-classic-tests)
|
||||
@@ -149,9 +136,8 @@ jobs:
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -166,4 +152,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Signed integer GPU tests on an H100 VM on hyperstack
|
||||
name: Cuda - Signed integer tests on H100
|
||||
name: TFHE Cuda Backend - Signed integer tests on H100
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -12,23 +12,18 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -36,13 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -69,11 +64,10 @@ jobs:
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -83,20 +77,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 signed integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -110,16 +97,16 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -138,11 +125,10 @@ jobs:
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
@@ -150,9 +136,8 @@ jobs:
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -167,4 +152,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
50
.github/workflows/gpu_signed_integer_tests.yml
vendored
50
.github/workflows/gpu_signed_integer_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend signed integer on an AWS instance
|
||||
name: Cuda - Signed integer tests
|
||||
name: TFHE Cuda Backend - Signed integer tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -14,15 +14,14 @@ env:
|
||||
FAST_TESTS: TRUE
|
||||
NIGHTLY_TESTS: FALSE
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
@@ -31,7 +30,7 @@ jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -39,13 +38,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -63,6 +62,7 @@ jobs:
|
||||
- '.github/workflows/gpu_signed_integer_tests.yml'
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-signed-integer-tests)
|
||||
runs-on: ubuntu-latest
|
||||
@@ -71,11 +71,10 @@ jobs:
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
needs.should-run.outputs.gpu_test == 'true'
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -85,20 +84,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: gpu-test
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-signed-integer-tests:
|
||||
name: CUDA signed integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -114,14 +106,17 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -151,7 +146,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-signed-integer-tests.result }}
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
@@ -159,9 +154,8 @@ jobs:
|
||||
needs: [ setup-instance, cuda-signed-integer-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -176,4 +170,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Test unsigned integers on an RTXA6000 VM on hyperstack with the classical PBS
|
||||
name: Cuda - Unsigned integer tests with classical PBS
|
||||
name: TFHE Cuda Backend - Unsigned integer tests with classical PBS
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -12,23 +12,18 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -36,13 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -64,16 +59,15 @@ jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-unsigned-classic-tests)
|
||||
needs: should-run
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -83,20 +77,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: gpu-test
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA unsigned integer tests with classical PBS
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -110,16 +97,16 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -138,11 +125,10 @@ jobs:
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-unsigned-classic-tests)
|
||||
@@ -150,9 +136,8 @@ jobs:
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -167,4 +152,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Test unsigned integers on an H100 VM on hyperstack
|
||||
name: Cuda - Unsigned integer tests on H100
|
||||
name: TFHE Cuda Backend - Unsigned integer tests on H100
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -12,22 +12,18 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -35,13 +31,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -63,16 +59,15 @@ jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
needs: should-run
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -82,20 +77,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 unsigned integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -109,16 +97,16 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -137,11 +125,10 @@ jobs:
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
@@ -149,9 +136,8 @@ jobs:
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -166,4 +152,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
50
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
50
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend unsigned integer on an AWS instance
|
||||
name: Cuda - Unsigned integer tests
|
||||
name: TFHE Cuda Backend - Unsigned integer tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -13,16 +13,14 @@ env:
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_TESTS: TRUE
|
||||
NIGHTLY_TESTS: FALSE
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
@@ -31,7 +29,7 @@ jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
@@ -39,13 +37,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
@@ -72,11 +70,10 @@ jobs:
|
||||
needs.should-run.outputs.gpu_test == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -86,20 +83,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: gpu-test
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-unsigned-integer-tests:
|
||||
name: CUDA unsigned integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -113,16 +103,16 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -149,11 +139,10 @@ jobs:
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-unsigned-integer-tests.result }}
|
||||
SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
@@ -163,7 +152,6 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
@@ -178,4 +166,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
2
.github/workflows/integer_long_run_tests.yml
vendored
2
.github/workflows/integer_long_run_tests.yml
vendored
@@ -51,7 +51,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
|
||||
14
.github/workflows/m1_tests.yml
vendored
14
.github/workflows/m1_tests.yml
vendored
@@ -3,7 +3,7 @@ name: Tests on M1 CPU
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
types: [labeled]
|
||||
# Have a nightly build for M1 tests
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
@@ -21,17 +21,14 @@ env:
|
||||
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
|
||||
# nextest
|
||||
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
cargo-builds-m1:
|
||||
if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
contains(github.event.label.name, 'm1_test') }}
|
||||
if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'm1_test') }}
|
||||
runs-on: ["self-hosted", "m1mac"]
|
||||
# 12 hours, default is 6 hours, hopefully this is more than enough
|
||||
timeout-minutes: 720
|
||||
@@ -40,7 +37,6 @@ jobs:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -195,8 +191,6 @@ jobs:
|
||||
SLACK_COLOR: ${{ needs.cargo-builds-m1.result }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
BRANCH: ${{ github.ref }}
|
||||
|
||||
10
.github/workflows/make_release.yml
vendored
10
.github/workflows/make_release.yml
vendored
@@ -46,8 +46,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe
|
||||
@@ -78,17 +77,12 @@ jobs:
|
||||
name: Publish Release
|
||||
needs: [package] # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
# For provenance of npmjs publish
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: Create NPM version tag
|
||||
if: ${{ inputs.npm_latest_tag }}
|
||||
run: |
|
||||
|
||||
37
.github/workflows/make_release_cuda.yml
vendored
37
.github/workflows/make_release_cuda.yml
vendored
@@ -61,8 +61,11 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: "false"
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
@@ -123,35 +126,7 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 9
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
steps:
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
@@ -186,7 +161,7 @@ jobs:
|
||||
teardown-instance:
|
||||
name: Teardown instance (publish-release)
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [setup-instance, publish-cuda-release]
|
||||
needs: [ setup-instance, publish-cuda-release ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
|
||||
@@ -30,7 +30,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-csprng
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: crate-tfhe-csprng
|
||||
path: target/package/*.crate
|
||||
|
||||
2
.github/workflows/make_release_tfhe_fft.yml
vendored
2
.github/workflows/make_release_tfhe_fft.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-fft
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
|
||||
2
.github/workflows/make_release_tfhe_ntt.yml
vendored
2
.github/workflows/make_release_tfhe_ntt.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-ntt
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
|
||||
@@ -30,7 +30,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-versionable-derive
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: crate-tfhe-versionable-derive
|
||||
path: target/package/*.crate
|
||||
@@ -61,8 +61,6 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
with:
|
||||
@@ -105,13 +103,13 @@ jobs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-versionable
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: crate-tfhe-versionable
|
||||
path: target/package/*.crate
|
||||
@@ -139,7 +137,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Download artifact
|
||||
|
||||
5
.github/workflows/make_release_zk_pok.yml
vendored
5
.github/workflows/make_release_zk_pok.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-zk-pok
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: crate-zk-pok
|
||||
path: target/package/*.crate
|
||||
@@ -61,8 +61,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
with:
|
||||
|
||||
3
.github/workflows/sync_on_push.yml
vendored
3
.github/workflows/sync_on_push.yml
vendored
@@ -16,8 +16,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: git-sync
|
||||
uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
|
||||
with:
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -32,8 +32,5 @@ web-test-runner/
|
||||
node_modules/
|
||||
package-lock.json
|
||||
|
||||
# Python .env
|
||||
.env
|
||||
|
||||
# Dir used for backward compatibility test data
|
||||
tests/tfhe-backward-compat-data/
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
BSD 3-Clause Clear License
|
||||
|
||||
Copyright © 2025 ZAMA.
|
||||
Copyright © 2024 ZAMA.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
|
||||
61
Makefile
61
Makefile
@@ -20,7 +20,7 @@ BENCH_OP_FLAVOR?=DEFAULT
|
||||
BENCH_TYPE?=latency
|
||||
NODE_VERSION=22.6
|
||||
BACKWARD_COMPAT_DATA_URL=https://github.com/zama-ai/tfhe-backward-compat-data.git
|
||||
BACKWARD_COMPAT_DATA_BRANCH?=$(shell ./scripts/backward_compat_data_version.py)
|
||||
BACKWARD_COMPAT_DATA_BRANCH?=v0.5
|
||||
BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
|
||||
BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
|
||||
TFHE_SPEC:=tfhe
|
||||
@@ -282,14 +282,14 @@ check_typos: install_typos_checker
|
||||
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
|
||||
clippy_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
|
||||
check_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
@@ -363,18 +363,7 @@ clippy_rustdoc: install_rs_check_toolchain
|
||||
fi && \
|
||||
CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
|
||||
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: clippy_rustdoc_gpu # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
|
||||
clippy_rustdoc_gpu: install_rs_check_toolchain
|
||||
if [[ "$(OS)" != "Linux" ]]; then \
|
||||
echo "WARNING: skipped clippy_rustdoc_gpu, unsupported OS $(OS)"; \
|
||||
exit 0; \
|
||||
fi && \
|
||||
CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
|
||||
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu \
|
||||
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
|
||||
@@ -405,10 +394,10 @@ clippy_trivium: install_rs_check_toolchain
|
||||
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
|
||||
clippy_all_targets: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats,experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
|
||||
@@ -967,10 +956,6 @@ check_intra_md_links: install_mlc
|
||||
check_md_links: install_mlc
|
||||
mlc --match-file-extension tfhe/docs
|
||||
|
||||
.PHONY: check_parameter_export_ok # Checks exported "current" shortint parameter module is correct
|
||||
check_parameter_export_ok:
|
||||
python3 ./scripts/check_current_param_export.py
|
||||
|
||||
.PHONY: check_compile_tests # Build tests in debug without running them
|
||||
check_compile_tests: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
|
||||
@@ -1071,35 +1056,35 @@ bench_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_signed_integer # Run benchmarks for signed integer
|
||||
bench_signed_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
|
||||
bench_integer_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
|
||||
bench_integer_compression: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench glwe_packing_compression-integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_compression_gpu
|
||||
bench_integer_compression_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench glwe_packing_compression-integer-bench \
|
||||
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,gpu,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
|
||||
bench_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -1107,7 +1092,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
|
||||
bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -1115,7 +1100,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
|
||||
bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
@@ -1123,7 +1108,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
|
||||
bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
@@ -1131,14 +1116,14 @@ bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) -- ::unsigned
|
||||
|
||||
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
|
||||
bench_integer_zk: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench zk-pke-bench \
|
||||
--features=integer,internal-keycache,zk-pok,nightly-avx512 \
|
||||
--features=integer,internal-keycache,zk-pok,nightly-avx512,pbs-stats \
|
||||
-p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_shortint # Run benchmarks for shortint
|
||||
@@ -1296,7 +1281,7 @@ parse_wasm_benchmarks: install_rs_check_toolchain
|
||||
|
||||
.PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
|
||||
write_params_to_file: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example write_params_to_file --features=boolean,shortint,internal-keycache
|
||||
|
||||
.PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
|
||||
@@ -1328,17 +1313,15 @@ sha256_bool: install_rs_check_toolchain
|
||||
--example sha256_bool --features=boolean
|
||||
|
||||
.PHONY: pcc # pcc stands for pre commit checks (except GPU)
|
||||
pcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
|
||||
check_md_docs_are_tested check_intra_md_links clippy_all check_compile_tests test_tfhe_lints \
|
||||
tfhe_lints
|
||||
pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
|
||||
clippy_all check_compile_tests test_tfhe_lints tfhe_lints
|
||||
|
||||
.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
|
||||
pcc_gpu: check_rust_bindings_did_not_change clippy_rustdoc_gpu \
|
||||
clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu
|
||||
pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change
|
||||
|
||||
.PHONY: fpcc # pcc stands for pre commit checks, the f stands for fast
|
||||
fpcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
|
||||
check_md_docs_are_tested clippy_fast check_compile_tests
|
||||
fpcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested clippy_fast \
|
||||
check_compile_tests
|
||||
|
||||
.PHONY: conformance # Automatically fix problems that can be fixed
|
||||
conformance: fix_newline fmt fmt_js
|
||||
|
||||
@@ -18,102 +18,102 @@ use tfhe::prelude::*;
|
||||
use tfhe_trivium::TriviumStream;
|
||||
|
||||
fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
|
||||
assert!(a.len() % 8 == 0);
|
||||
let mut hexadecimal: String = "".to_string();
|
||||
for test in a.chunks(8) {
|
||||
// Encoding is bytes in LSB order
|
||||
match test[4..8] {
|
||||
[false, false, false, false] => hexadecimal.push('0'),
|
||||
[true, false, false, false] => hexadecimal.push('1'),
|
||||
[false, true, false, false] => hexadecimal.push('2'),
|
||||
[true, true, false, false] => hexadecimal.push('3'),
|
||||
assert!(a.len() % 8 == 0);
|
||||
let mut hexadecimal: String = "".to_string();
|
||||
for test in a.chunks(8) {
|
||||
// Encoding is bytes in LSB order
|
||||
match test[4..8] {
|
||||
[false, false, false, false] => hexadecimal.push('0'),
|
||||
[true, false, false, false] => hexadecimal.push('1'),
|
||||
[false, true, false, false] => hexadecimal.push('2'),
|
||||
[true, true, false, false] => hexadecimal.push('3'),
|
||||
|
||||
[false, false, true, false] => hexadecimal.push('4'),
|
||||
[true, false, true, false] => hexadecimal.push('5'),
|
||||
[false, true, true, false] => hexadecimal.push('6'),
|
||||
[true, true, true, false] => hexadecimal.push('7'),
|
||||
[false, false, true, false] => hexadecimal.push('4'),
|
||||
[true, false, true, false] => hexadecimal.push('5'),
|
||||
[false, true, true, false] => hexadecimal.push('6'),
|
||||
[true, true, true, false] => hexadecimal.push('7'),
|
||||
|
||||
[false, false, false, true] => hexadecimal.push('8'),
|
||||
[true, false, false, true] => hexadecimal.push('9'),
|
||||
[false, true, false, true] => hexadecimal.push('A'),
|
||||
[true, true, false, true] => hexadecimal.push('B'),
|
||||
[false, false, false, true] => hexadecimal.push('8'),
|
||||
[true, false, false, true] => hexadecimal.push('9'),
|
||||
[false, true, false, true] => hexadecimal.push('A'),
|
||||
[true, true, false, true] => hexadecimal.push('B'),
|
||||
|
||||
[false, false, true, true] => hexadecimal.push('C'),
|
||||
[true, false, true, true] => hexadecimal.push('D'),
|
||||
[false, true, true, true] => hexadecimal.push('E'),
|
||||
[true, true, true, true] => hexadecimal.push('F'),
|
||||
_ => ()
|
||||
};
|
||||
match test[0..4] {
|
||||
[false, false, false, false] => hexadecimal.push('0'),
|
||||
[true, false, false, false] => hexadecimal.push('1'),
|
||||
[false, true, false, false] => hexadecimal.push('2'),
|
||||
[true, true, false, false] => hexadecimal.push('3'),
|
||||
[false, false, true, true] => hexadecimal.push('C'),
|
||||
[true, false, true, true] => hexadecimal.push('D'),
|
||||
[false, true, true, true] => hexadecimal.push('E'),
|
||||
[true, true, true, true] => hexadecimal.push('F'),
|
||||
_ => ()
|
||||
};
|
||||
match test[0..4] {
|
||||
[false, false, false, false] => hexadecimal.push('0'),
|
||||
[true, false, false, false] => hexadecimal.push('1'),
|
||||
[false, true, false, false] => hexadecimal.push('2'),
|
||||
[true, true, false, false] => hexadecimal.push('3'),
|
||||
|
||||
[false, false, true, false] => hexadecimal.push('4'),
|
||||
[true, false, true, false] => hexadecimal.push('5'),
|
||||
[false, true, true, false] => hexadecimal.push('6'),
|
||||
[true, true, true, false] => hexadecimal.push('7'),
|
||||
[false, false, true, false] => hexadecimal.push('4'),
|
||||
[true, false, true, false] => hexadecimal.push('5'),
|
||||
[false, true, true, false] => hexadecimal.push('6'),
|
||||
[true, true, true, false] => hexadecimal.push('7'),
|
||||
|
||||
[false, false, false, true] => hexadecimal.push('8'),
|
||||
[true, false, false, true] => hexadecimal.push('9'),
|
||||
[false, true, false, true] => hexadecimal.push('A'),
|
||||
[true, true, false, true] => hexadecimal.push('B'),
|
||||
[false, false, false, true] => hexadecimal.push('8'),
|
||||
[true, false, false, true] => hexadecimal.push('9'),
|
||||
[false, true, false, true] => hexadecimal.push('A'),
|
||||
[true, true, false, true] => hexadecimal.push('B'),
|
||||
|
||||
[false, false, true, true] => hexadecimal.push('C'),
|
||||
[true, false, true, true] => hexadecimal.push('D'),
|
||||
[false, true, true, true] => hexadecimal.push('E'),
|
||||
[true, true, true, true] => hexadecimal.push('F'),
|
||||
_ => ()
|
||||
};
|
||||
}
|
||||
return hexadecimal;
|
||||
[false, false, true, true] => hexadecimal.push('C'),
|
||||
[true, false, true, true] => hexadecimal.push('D'),
|
||||
[false, true, true, true] => hexadecimal.push('E'),
|
||||
[true, true, true, true] => hexadecimal.push('F'),
|
||||
_ => ()
|
||||
};
|
||||
}
|
||||
return hexadecimal;
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let config = ConfigBuilder::default().build();
|
||||
let (client_key, server_key) = generate_keys(config);
|
||||
let config = ConfigBuilder::default().build();
|
||||
let (client_key, server_key) = generate_keys(config);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
let mut key = [false; 80];
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
let mut key = [false; 80];
|
||||
|
||||
for i in (0..key_string.len()).step_by(2) {
|
||||
let mut val: u8 = u8::from_str_radix(&key_string[i..i+2], 16).unwrap();
|
||||
for j in 0..8 {
|
||||
key[8*(i>>1) + j] = val % 2 == 1;
|
||||
val >>= 1;
|
||||
}
|
||||
}
|
||||
for i in (0..key_string.len()).step_by(2) {
|
||||
let mut val: u8 = u8::from_str_radix(&key_string[i..i+2], 16).unwrap();
|
||||
for j in 0..8 {
|
||||
key[8*(i>>1) + j] = val % 2 == 1;
|
||||
val >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
let iv_string = "0D74DB42A91077DE45AC".to_string();
|
||||
let mut iv = [false; 80];
|
||||
let iv_string = "0D74DB42A91077DE45AC".to_string();
|
||||
let mut iv = [false; 80];
|
||||
|
||||
for i in (0..iv_string.len()).step_by(2) {
|
||||
let mut val: u8 = u8::from_str_radix(&iv_string[i..i+2], 16).unwrap();
|
||||
for j in 0..8 {
|
||||
iv[8*(i>>1) + j] = val % 2 == 1;
|
||||
val >>= 1;
|
||||
}
|
||||
}
|
||||
for i in (0..iv_string.len()).step_by(2) {
|
||||
let mut val: u8 = u8::from_str_radix(&iv_string[i..i+2], 16).unwrap();
|
||||
for j in 0..8 {
|
||||
iv[8*(i>>1) + j] = val % 2 == 1;
|
||||
val >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
let output_0_63 = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
|
||||
let output_0_63 = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
|
||||
|
||||
let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
|
||||
let cipher_iv = iv.map(|x| FheBool::encrypt(x, &client_key));
|
||||
let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
|
||||
let cipher_iv = iv.map(|x| FheBool::encrypt(x, &client_key));
|
||||
|
||||
|
||||
let mut trivium = TriviumStream::<FheBool>::new(cipher_key, cipher_iv, &server_key);
|
||||
let mut trivium = TriviumStream::<FheBool>::new(cipher_key, cipher_iv, &server_key);
|
||||
|
||||
let mut vec = Vec::<bool>::with_capacity(64*8);
|
||||
while vec.len() < 64*8 {
|
||||
let cipher_outputs = trivium.next_64();
|
||||
for c in cipher_outputs {
|
||||
vec.push(c.decrypt(&client_key))
|
||||
}
|
||||
}
|
||||
let mut vec = Vec::<bool>::with_capacity(64*8);
|
||||
while vec.len() < 64*8 {
|
||||
let cipher_outputs = trivium.next_64();
|
||||
for c in cipher_outputs {
|
||||
vec.push(c.decrypt(&client_key))
|
||||
}
|
||||
}
|
||||
|
||||
let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
|
||||
assert_eq!(output_0_63, hexadecimal[0..64*2]);
|
||||
let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
|
||||
assert_eq!(output_0_63, hexadecimal[0..64*2]);
|
||||
}
|
||||
```
|
||||
|
||||
@@ -129,7 +129,7 @@ Other sizes than 64 bit are expected to be available in the future.
|
||||
|
||||
# FHE shortint Trivium implementation
|
||||
|
||||
The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
|
||||
The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64`).
|
||||
It uses a lower level API of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run
|
||||
on the same cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes
|
||||
its setup a little more intricate.
|
||||
@@ -137,68 +137,67 @@ its setup a little more intricate.
|
||||
Example code:
|
||||
```rust
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe::shortint::parameters::v1_0::{
|
||||
V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
use tfhe::shortint::parameters::{
|
||||
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
|
||||
V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
|
||||
};
|
||||
use tfhe::{ConfigBuilder, generate_keys, FheUint64};
|
||||
use tfhe::prelude::*;
|
||||
use tfhe_trivium::TriviumStreamShortint;
|
||||
|
||||
fn test_shortint() {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
let mut key = [0; 80];
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
let mut key = [0; 80];
|
||||
|
||||
for i in (0..key_string.len()).step_by(2) {
|
||||
let mut val = u64::from_str_radix(&key_string[i..i+2], 16).unwrap();
|
||||
for j in 0..8 {
|
||||
key[8*(i>>1) + j] = val % 2;
|
||||
val >>= 1;
|
||||
}
|
||||
}
|
||||
for i in (0..key_string.len()).step_by(2) {
|
||||
let mut val = u64::from_str_radix(&key_string[i..i+2], 16).unwrap();
|
||||
for j in 0..8 {
|
||||
key[8*(i>>1) + j] = val % 2;
|
||||
val >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
let iv_string = "0D74DB42A91077DE45AC".to_string();
|
||||
let mut iv = [0; 80];
|
||||
let iv_string = "0D74DB42A91077DE45AC".to_string();
|
||||
let mut iv = [0; 80];
|
||||
|
||||
for i in (0..iv_string.len()).step_by(2) {
|
||||
let mut val = u64::from_str_radix(&iv_string[i..i+2], 16).unwrap();
|
||||
for j in 0..8 {
|
||||
iv[8*(i>>1) + j] = val % 2;
|
||||
val >>= 1;
|
||||
}
|
||||
}
|
||||
let output_0_63 = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
|
||||
for i in (0..iv_string.len()).step_by(2) {
|
||||
let mut val = u64::from_str_radix(&iv_string[i..i+2], 16).unwrap();
|
||||
for j in 0..8 {
|
||||
iv[8*(i>>1) + j] = val % 2;
|
||||
val >>= 1;
|
||||
}
|
||||
}
|
||||
let output_0_63 = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
|
||||
|
||||
let cipher_key = key.map(|x| client_key.encrypt(x));
|
||||
let cipher_iv = iv.map(|x| client_key.encrypt(x));
|
||||
let cipher_key = key.map(|x| client_key.encrypt(x));
|
||||
let cipher_iv = iv.map(|x| client_key.encrypt(x));
|
||||
|
||||
let mut ciphered_message = vec![FheUint64::try_encrypt(0u64, &hl_client_key).unwrap(); 9];
|
||||
let mut ciphered_message = vec![FheUint64::try_encrypt(0u64, &hl_client_key).unwrap(); 9];
|
||||
|
||||
let mut trivium = TriviumStreamShortint::new(cipher_key, cipher_iv, &server_key, &ksk);
|
||||
let mut trivium = TriviumStreamShortint::new(cipher_key, cipher_iv, &server_key, &ksk);
|
||||
|
||||
let mut vec = Vec::<u64>::with_capacity(8);
|
||||
while vec.len() < 8 {
|
||||
let trans_ciphered_message = trivium.trans_encrypt_64(ciphered_message.pop().unwrap(), &hl_server_key);
|
||||
vec.push(trans_ciphered_message.decrypt(&hl_client_key));
|
||||
}
|
||||
let mut vec = Vec::<u64>::with_capacity(8);
|
||||
while vec.len() < 8 {
|
||||
let trans_ciphered_message = trivium.trans_encrypt_64(ciphered_message.pop().unwrap(), &hl_server_key);
|
||||
vec.push(trans_ciphered_message.decrypt(&hl_client_key));
|
||||
}
|
||||
|
||||
let hexadecimal = get_hexagonal_string_from_u64(vec);
|
||||
assert_eq!(output_0_63, hexadecimal[0..64*2]);
|
||||
let hexadecimal = get_hexagonal_string_from_u64(vec);
|
||||
assert_eq!(output_0_63, hexadecimal[0..64*2]);
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
use criterion::Criterion;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::v1_0::{
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
use tfhe::shortint::parameters::{
|
||||
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
|
||||
V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
|
||||
};
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
|
||||
@@ -11,19 +10,19 @@ use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};
|
||||
|
||||
pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
@@ -64,19 +63,19 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
|
||||
|
||||
pub fn kreyvium_shortint_gen(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
@@ -112,19 +111,19 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
|
||||
|
||||
pub fn kreyvium_shortint_trans(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
use criterion::Criterion;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::v1_0::{
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
use tfhe::shortint::parameters::{
|
||||
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
|
||||
V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
|
||||
};
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
|
||||
@@ -11,19 +10,19 @@ use tfhe_trivium::{TransCiphering, TriviumStreamShortint};
|
||||
|
||||
pub fn trivium_shortint_warmup(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
@@ -64,19 +63,19 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
|
||||
|
||||
pub fn trivium_shortint_gen(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
@@ -112,19 +111,19 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
|
||||
|
||||
pub fn trivium_shortint_trans(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::v1_0::{
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
use tfhe::shortint::parameters::{
|
||||
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
|
||||
V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
|
||||
};
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
|
||||
// Values for these tests come from the github repo renaud1239/Kreyvium,
|
||||
@@ -221,19 +220,19 @@ use tfhe::shortint::prelude::*;
|
||||
#[test]
|
||||
fn kreyvium_test_shortint_long() {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::v1_0::{
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
use tfhe::shortint::parameters::{
|
||||
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
|
||||
V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
|
||||
};
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
|
||||
// Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
|
||||
@@ -357,19 +356,19 @@ use tfhe::shortint::prelude::*;
|
||||
#[test]
|
||||
fn trivium_test_shortint_long() {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe-cuda-backend"
|
||||
version = "0.8.0"
|
||||
version = "0.7.0"
|
||||
edition = "2021"
|
||||
authors = ["Zama team"]
|
||||
license = "BSD-3-Clause-Clear"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
BSD 3-Clause Clear License
|
||||
|
||||
Copyright © 2025 ZAMA.
|
||||
Copyright © 2024 ZAMA.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
|
||||
@@ -27,8 +27,6 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
|
||||
std::abort(); \
|
||||
}
|
||||
|
||||
void cuda_set_device(uint32_t gpu_index);
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index);
|
||||
|
||||
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
|
||||
|
||||
@@ -112,8 +112,6 @@ template <typename Torus> struct int_decompression {
|
||||
|
||||
generate_device_accumulator_with_encoding<Torus>(
|
||||
streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
|
||||
decompression_rescale_lut->get_degree(0),
|
||||
decompression_rescale_lut->get_max_degree(0),
|
||||
encryption_params.glwe_dimension, encryption_params.polynomial_size,
|
||||
effective_compression_message_modulus,
|
||||
effective_compression_carry_modulus,
|
||||
|
||||
@@ -38,15 +38,6 @@ enum SIGNED_OPERATION { ADDITION = 1, SUBTRACTION = -1 };
|
||||
enum outputFlag { FLAG_NONE = 0, FLAG_OVERFLOW = 1, FLAG_CARRY = 2 };
|
||||
|
||||
extern "C" {
|
||||
|
||||
typedef struct {
|
||||
void *ptr;
|
||||
uint64_t *degrees;
|
||||
uint64_t *noise_levels;
|
||||
uint32_t num_radix_blocks;
|
||||
uint32_t lwe_dimension;
|
||||
} CudaRadixCiphertextFFI;
|
||||
|
||||
void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
@@ -54,7 +45,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory);
|
||||
void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
@@ -62,12 +53,13 @@ void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
void cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks);
|
||||
uint32_t num_many_lut, bool allocate_gpu_memory);
|
||||
void cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, void *output_radix_lwe,
|
||||
void const *input_radix_lwe,
|
||||
int8_t *mem_ptr, void *const *ksks,
|
||||
void *const *bsks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -81,15 +73,13 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_apply_bivariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe_1,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_radix_blocks,
|
||||
uint32_t shift);
|
||||
void *output_radix_lwe, void const *input_radix_lwe_1,
|
||||
void const *input_radix_lwe_2, int8_t *mem_ptr, void *const *ksks,
|
||||
void *const *bsks, uint32_t num_blocks, uint32_t shift);
|
||||
|
||||
void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -98,10 +88,9 @@ void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
|
||||
|
||||
void cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_luts,
|
||||
uint32_t lut_stride);
|
||||
void *output_radix_lwe, void const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks,
|
||||
uint32_t num_luts, uint32_t lut_stride);
|
||||
|
||||
void scratch_cuda_full_propagation_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -143,14 +132,15 @@ void cleanup_cuda_integer_mult(void *const *streams,
|
||||
|
||||
void cuda_negate_integer_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, uint32_t message_modulus,
|
||||
void *lwe_array_out, void const *lwe_array_in, uint32_t lwe_dimension,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus);
|
||||
|
||||
void cuda_scalar_addition_integer_radix_ciphertext_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, void const *scalar_input,
|
||||
uint32_t num_scalars, uint32_t message_modulus, uint32_t carry_modulus);
|
||||
void *lwe_array, void const *scalar_input, uint32_t lwe_dimension,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus);
|
||||
|
||||
void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -163,8 +153,8 @@ void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(
|
||||
|
||||
void cuda_integer_radix_logical_scalar_shift_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, uint32_t shift, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *lwe_array, uint32_t shift, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks);
|
||||
|
||||
void scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -177,8 +167,8 @@ void scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(
|
||||
|
||||
void cuda_integer_radix_arithmetic_scalar_shift_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, uint32_t shift, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *lwe_array, uint32_t shift, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_integer_radix_logical_scalar_shift(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -199,8 +189,8 @@ void scratch_cuda_integer_radix_shift_and_rotate_kb_64(
|
||||
|
||||
void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI const *lwe_shift,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks);
|
||||
void *lwe_array, void const *lwe_shift, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_integer_radix_shift_and_rotate(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -243,17 +233,15 @@ void scratch_cuda_integer_radix_bitop_kb_64(
|
||||
|
||||
void cuda_bitop_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t lwe_ciphertext_count);
|
||||
|
||||
void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
|
||||
void *lwe_array_out, void const *lwe_array_input, void const *clear_blocks,
|
||||
uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks);
|
||||
void *const *ksks, uint32_t lwe_ciphertext_count, BITOP_TYPE op);
|
||||
|
||||
void cleanup_cuda_integer_bitop(void *const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -270,11 +258,9 @@ void scratch_cuda_integer_radix_cmux_kb_64(
|
||||
|
||||
void cuda_cmux_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *lwe_array_out, void const *lwe_condition, void const *lwe_array_true,
|
||||
void const *lwe_array_false, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t lwe_ciphertext_count);
|
||||
|
||||
void cleanup_cuda_integer_radix_cmux(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -291,8 +277,8 @@ void scratch_cuda_integer_radix_scalar_rotate_kb_64(
|
||||
|
||||
void cuda_integer_radix_scalar_rotate_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, uint32_t n, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *lwe_array, uint32_t n, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_integer_radix_scalar_rotate(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -319,16 +305,15 @@ void scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
|
||||
void cuda_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI *carry_out,
|
||||
const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t requested_flag, uint32_t uses_carry);
|
||||
void *lwe_array, void *carry_out, const void *carry_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks,
|
||||
uint32_t requested_flag, uint32_t uses_carry);
|
||||
|
||||
void cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
|
||||
CudaRadixCiphertextFFI *carry_out, const CudaRadixCiphertextFFI *carry_in,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t requested_flag, uint32_t uses_carry);
|
||||
void *lhs_array, const void *rhs_array, void *carry_out,
|
||||
const void *carry_in, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks, uint32_t requested_flag, uint32_t uses_carry);
|
||||
|
||||
void cleanup_cuda_propagate_single_carry(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -351,10 +336,9 @@ void scratch_cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
|
||||
void cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
|
||||
CudaRadixCiphertextFFI *overflow_block,
|
||||
const CudaRadixCiphertextFFI *input_borrow, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t compute_overflow,
|
||||
void *lhs_array, const void *rhs_array, void *overflow_block,
|
||||
const void *input_borrow, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks, uint32_t compute_overflow,
|
||||
uint32_t uses_input_borrow);
|
||||
|
||||
void cleanup_cuda_integer_overflowing_sub(void *const *streams,
|
||||
@@ -427,13 +411,12 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI *generates_or_propagates, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks);
|
||||
void *output_radix_lwe, void *generates_or_propagates, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks, uint32_t shift);
|
||||
|
||||
void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -441,8 +424,9 @@ void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
|
||||
void cuda_integer_reverse_blocks_64_inplace(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array);
|
||||
uint32_t gpu_count, void *lwe_array,
|
||||
uint32_t num_blocks,
|
||||
uint32_t lwe_size);
|
||||
|
||||
void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -455,8 +439,8 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
|
||||
void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_integer_abs_inplace(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,8 +0,0 @@
|
||||
#ifndef CUDA_RADIX_CIPHERTEXT_H
|
||||
#define CUDA_RADIX_CIPHERTEXT_H
|
||||
|
||||
void release_radix_ciphertext(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *data);
|
||||
|
||||
#endif
|
||||
@@ -1,7 +1,6 @@
|
||||
#ifndef CUDA_LINALG_H_
|
||||
#define CUDA_LINALG_H_
|
||||
|
||||
#include "integer/integer.h"
|
||||
#include <stdint.h>
|
||||
|
||||
extern "C" {
|
||||
@@ -14,14 +13,17 @@ void cuda_negate_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input_1,
|
||||
CudaRadixCiphertextFFI const *input_2);
|
||||
void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input_1,
|
||||
CudaRadixCiphertextFFI const *input_2);
|
||||
void cuda_add_lwe_ciphertext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in_1, void const *lwe_array_in_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in_1, void const *lwe_array_in_2,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *plaintext_array_in,
|
||||
|
||||
@@ -5,12 +5,12 @@
|
||||
|
||||
template <typename Torus>
|
||||
bool supports_distributed_shared_memory_on_multibit_programmable_bootstrap(
|
||||
uint32_t polynomial_size, int max_shared_memory);
|
||||
uint32_t polynomial_size);
|
||||
|
||||
template <typename Torus>
|
||||
bool has_support_to_cuda_programmable_bootstrap_tbc_multi_bit(
|
||||
uint32_t num_samples, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, int max_shared_memory);
|
||||
uint32_t level_count);
|
||||
|
||||
#if CUDA_ARCH >= 900
|
||||
template <typename Torus>
|
||||
@@ -114,8 +114,6 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t lwe_chunk_size,
|
||||
PBS_VARIANT pbs_variant, bool allocate_gpu_memory) {
|
||||
cuda_set_device(gpu_index);
|
||||
|
||||
this->pbs_variant = pbs_variant;
|
||||
this->lwe_chunk_size = lwe_chunk_size;
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
|
||||
|
||||
@@ -61,7 +61,7 @@ get_buffer_size_partial_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {
|
||||
|
||||
template <typename Torus>
|
||||
bool supports_distributed_shared_memory_on_classic_programmable_bootstrap(
|
||||
uint32_t polynomial_size, int max_shared_memory);
|
||||
uint32_t polynomial_size);
|
||||
|
||||
template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer;
|
||||
|
||||
@@ -77,10 +77,10 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
|
||||
bool allocate_gpu_memory) {
|
||||
cuda_set_device(gpu_index);
|
||||
|
||||
this->pbs_variant = pbs_variant;
|
||||
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(0);
|
||||
|
||||
if (allocate_gpu_memory) {
|
||||
switch (pbs_variant) {
|
||||
@@ -157,7 +157,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
|
||||
bool supports_dsm =
|
||||
supports_distributed_shared_memory_on_classic_programmable_bootstrap<
|
||||
Torus>(polynomial_size, max_shared_memory);
|
||||
Torus>(polynomial_size);
|
||||
|
||||
uint64_t full_sm =
|
||||
get_buffer_size_full_sm_programmable_bootstrap_tbc<Torus>(
|
||||
@@ -218,7 +218,8 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
template <typename Torus>
|
||||
uint64_t get_buffer_size_programmable_bootstrap_cg(
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) {
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
int max_shared_memory = cuda_get_max_shared_memory(0);
|
||||
uint64_t full_sm =
|
||||
get_buffer_size_full_sm_programmable_bootstrap_cg<Torus>(polynomial_size);
|
||||
uint64_t partial_sm =
|
||||
@@ -244,8 +245,7 @@ template <typename Torus>
|
||||
bool has_support_to_cuda_programmable_bootstrap_cg(uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t level_count,
|
||||
uint32_t num_samples,
|
||||
int max_shared_memory);
|
||||
uint32_t num_samples);
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
|
||||
@@ -8,7 +8,7 @@ extern "C" {
|
||||
|
||||
bool has_support_to_cuda_programmable_bootstrap_cg_multi_bit(
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t num_samples, int max_shared_memory);
|
||||
uint32_t num_samples);
|
||||
|
||||
void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64(
|
||||
void *stream, uint32_t gpu_index, void *dest, void const *src,
|
||||
|
||||
@@ -11,7 +11,7 @@ void cuda_convert_lwe_ciphertext_vector_to_gpu(cudaStream_t stream,
|
||||
uint32_t gpu_index, T *dest,
|
||||
T *src, uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
|
||||
cuda_memcpy_async_to_gpu(dest, src, size, stream, gpu_index);
|
||||
}
|
||||
@@ -21,7 +21,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu(cudaStream_t stream,
|
||||
uint32_t gpu_index, T *dest,
|
||||
T *src, uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
|
||||
cuda_memcpy_async_to_cpu(dest, src, size, stream, gpu_index);
|
||||
}
|
||||
@@ -55,7 +55,7 @@ __host__ void host_sample_extract(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus const *glwe_array_in,
|
||||
uint32_t const *nth_array, uint32_t num_nths,
|
||||
uint32_t glwe_dimension) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
dim3 grid(num_nths);
|
||||
dim3 thds(params::degree / params::opt);
|
||||
|
||||
@@ -261,7 +261,7 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
|
||||
|
||||
// Optimization of packing keyswitch when packing many LWEs
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
|
||||
|
||||
@@ -57,7 +57,7 @@ void batch_fft_ggsw_vector(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
if (gpu_count != 1)
|
||||
PANIC("GPU error (batch_fft_ggsw_vector): multi-GPU execution is not "
|
||||
"supported yet.")
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
|
||||
int shared_memory_size = sizeof(double) * polynomial_size;
|
||||
|
||||
|
||||
@@ -45,19 +45,19 @@ keyswitch(Torus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
|
||||
const Torus *__restrict__ lwe_input_indexes,
|
||||
const Torus *__restrict__ ksk, uint32_t lwe_dimension_in,
|
||||
uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count) {
|
||||
const int tid = threadIdx.x + blockIdx.y * blockDim.x;
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int shmem_index = threadIdx.x + threadIdx.y * blockDim.x;
|
||||
|
||||
extern __shared__ int8_t sharedmem[];
|
||||
Torus *lwe_acc_out = (Torus *)sharedmem;
|
||||
auto block_lwe_array_out = get_chunk(
|
||||
lwe_array_out, lwe_output_indexes[blockIdx.x], lwe_dimension_out + 1);
|
||||
lwe_array_out, lwe_output_indexes[blockIdx.y], lwe_dimension_out + 1);
|
||||
|
||||
if (tid <= lwe_dimension_out) {
|
||||
|
||||
Torus local_lwe_out = 0;
|
||||
auto block_lwe_array_in = get_chunk(
|
||||
lwe_array_in, lwe_input_indexes[blockIdx.x], lwe_dimension_in + 1);
|
||||
lwe_array_in, lwe_input_indexes[blockIdx.y], lwe_dimension_in + 1);
|
||||
|
||||
if (tid == lwe_dimension_out && threadIdx.y == 0) {
|
||||
local_lwe_out = block_lwe_array_in[lwe_dimension_in];
|
||||
@@ -105,22 +105,16 @@ __host__ void host_keyswitch_lwe_ciphertext_vector(
|
||||
uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
constexpr int num_threads_y = 32;
|
||||
int num_blocks_per_sample, num_threads_x;
|
||||
int num_blocks, num_threads_x;
|
||||
|
||||
getNumBlocksAndThreads2D(lwe_dimension_out + 1, 512, num_threads_y,
|
||||
num_blocks_per_sample, num_threads_x);
|
||||
num_blocks, num_threads_x);
|
||||
|
||||
int shared_mem = sizeof(Torus) * num_threads_y * num_threads_x;
|
||||
if (num_blocks_per_sample > 65536)
|
||||
PANIC("Cuda error (Keyswith): number of blocks per sample is too large");
|
||||
|
||||
// In multiplication of large integers (512, 1024, 2048), the number of
|
||||
// samples can be larger than 65536, so we need to set it in the first
|
||||
// dimension of the grid
|
||||
dim3 grid(num_samples, num_blocks_per_sample, 1);
|
||||
dim3 grid(num_blocks, num_samples, 1);
|
||||
dim3 threads(num_threads_x, num_threads_y, 1);
|
||||
|
||||
keyswitch<Torus><<<grid, threads, shared_mem, stream>>>(
|
||||
@@ -166,7 +160,7 @@ __host__ void scratch_packing_keyswitch_lwe_list_to_glwe(
|
||||
cudaStream_t stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t num_lwes, bool allocate_gpu_memory) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
|
||||
|
||||
|
||||
@@ -110,7 +110,7 @@ template <typename Torus>
|
||||
__host__ void host_modulus_switch_inplace(cudaStream_t stream,
|
||||
uint32_t gpu_index, Torus *array,
|
||||
int size, uint32_t log_modulus) {
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
int num_threads = 0, num_blocks = 0;
|
||||
getNumBlocksAndThreads(size, 1024, num_blocks, num_threads);
|
||||
|
||||
@@ -2,12 +2,8 @@
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
void cuda_set_device(uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
}
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
cudaEvent_t event;
|
||||
check_cuda_error(cudaEventCreate(&event));
|
||||
return event;
|
||||
@@ -15,24 +11,24 @@ cudaEvent_t cuda_create_event(uint32_t gpu_index) {
|
||||
|
||||
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaEventRecord(event, stream));
|
||||
}
|
||||
|
||||
void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
|
||||
uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaStreamWaitEvent(stream, event, 0));
|
||||
}
|
||||
|
||||
void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaEventDestroy(event));
|
||||
}
|
||||
|
||||
/// Unsafe function to create a CUDA stream, must check first that GPU exists
|
||||
cudaStream_t cuda_create_stream(uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
cudaStream_t stream;
|
||||
check_cuda_error(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||
return stream;
|
||||
@@ -40,22 +36,15 @@ cudaStream_t cuda_create_stream(uint32_t gpu_index) {
|
||||
|
||||
/// Unsafe function to destroy CUDA stream, must check first the GPU exists
|
||||
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaStreamDestroy(stream));
|
||||
}
|
||||
|
||||
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaStreamSynchronize(stream));
|
||||
}
|
||||
|
||||
void synchronize_streams(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count) {
|
||||
for (uint i = 0; i < gpu_count; i++) {
|
||||
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Determine if a CUDA device is available at runtime
|
||||
uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
|
||||
|
||||
@@ -63,7 +52,7 @@ uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
|
||||
/// or if there's not enough memory. A safe wrapper around it must call
|
||||
/// cuda_check_valid_malloc() first
|
||||
void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
void *ptr;
|
||||
check_cuda_error(cudaMalloc((void **)&ptr, size));
|
||||
|
||||
@@ -74,7 +63,7 @@ void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
|
||||
/// asynchronously.
|
||||
void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
void *ptr;
|
||||
|
||||
#ifndef CUDART_VERSION
|
||||
@@ -97,7 +86,7 @@ void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
|
||||
|
||||
/// Check that allocation is valid
|
||||
void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
size_t total_mem, free_mem;
|
||||
check_cuda_error(cudaMemGetInfo(&free_mem, &total_mem));
|
||||
if (size > free_mem) {
|
||||
@@ -145,7 +134,7 @@ void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
|
||||
PANIC("Cuda error: invalid device pointer in async copy to GPU.")
|
||||
}
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
@@ -165,7 +154,7 @@ void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
if (attr_src.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
|
||||
}
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
if (attr_src.device == attr_dest.device) {
|
||||
check_cuda_error(
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice, stream));
|
||||
@@ -190,7 +179,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,
|
||||
if (attr_src.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
|
||||
}
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
if (attr_src.device == attr_dest.device) {
|
||||
check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToDevice));
|
||||
} else {
|
||||
@@ -201,7 +190,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,
|
||||
|
||||
/// Synchronizes device
|
||||
void cuda_synchronize_device(uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
@@ -214,7 +203,7 @@ void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid dest device pointer in cuda memset.")
|
||||
}
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaMemsetAsync(dest, val, size, stream));
|
||||
}
|
||||
|
||||
@@ -234,7 +223,7 @@ void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
if (attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid dest device pointer in cuda set value.")
|
||||
}
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
int block_size = 256;
|
||||
int num_blocks = (n + block_size - 1) / block_size;
|
||||
|
||||
@@ -264,7 +253,7 @@ void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
|
||||
}
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream));
|
||||
}
|
||||
@@ -278,14 +267,14 @@ int cuda_get_number_of_gpus() {
|
||||
|
||||
/// Drop a cuda array
|
||||
void cuda_drop(void *ptr, uint32_t gpu_index) {
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaFree(ptr));
|
||||
}
|
||||
|
||||
/// Drop a cuda array asynchronously, if supported on the device
|
||||
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
#ifndef CUDART_VERSION
|
||||
#error CUDART_VERSION Undefined!
|
||||
#elif (CUDART_VERSION >= 11020)
|
||||
|
||||
@@ -22,14 +22,15 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
|
||||
void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks) {
|
||||
|
||||
auto mem = (int_abs_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_abs_kb<uint64_t>((cudaStream_t *)(streams), gpu_indexes,
|
||||
gpu_count, ct, bsks, (uint64_t **)(ksks), mem,
|
||||
is_signed);
|
||||
gpu_count, static_cast<uint64_t *>(ct), bsks,
|
||||
(uint64_t **)(ksks), mem, is_signed,
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_abs_inplace(void *const *streams,
|
||||
|
||||
@@ -2,12 +2,15 @@
|
||||
#define TFHE_RS_ABS_CUH
|
||||
|
||||
#include "crypto/keyswitch.cuh"
|
||||
#include "device.h"
|
||||
#include "integer/bitwise_ops.cuh"
|
||||
#include "integer/comparison.cuh"
|
||||
#include "integer/integer.cuh"
|
||||
#include "integer/integer_utilities.h"
|
||||
#include "integer/negation.cuh"
|
||||
#include "integer/scalar_shifts.cuh"
|
||||
#include "radix_ciphertext.cuh"
|
||||
#include "linear_algebra.h"
|
||||
#include "pbs/programmable_bootstrap.h"
|
||||
#include "utils/helper.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <fstream>
|
||||
@@ -29,15 +32,16 @@ __host__ void scratch_cuda_integer_abs_kb(
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_abs_kb_async(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *ct, void *const *bsks, uint64_t *const *ksks,
|
||||
int_abs_buffer<uint64_t> *mem_ptr, bool is_signed, uint32_t num_blocks) {
|
||||
__host__ void
|
||||
host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *ct, void *const *bsks,
|
||||
uint64_t *const *ksks, int_abs_buffer<uint64_t> *mem_ptr,
|
||||
bool is_signed, uint32_t num_blocks) {
|
||||
if (!is_signed)
|
||||
return;
|
||||
|
||||
auto radix_params = mem_ptr->params;
|
||||
auto mask = (Torus *)(mem_ptr->mask->ptr);
|
||||
auto mask = mem_ptr->mask;
|
||||
|
||||
auto big_lwe_dimension = radix_params.big_lwe_dimension;
|
||||
auto big_lwe_size = big_lwe_dimension + 1;
|
||||
@@ -48,55 +52,20 @@ __host__ void legacy_host_integer_abs_kb_async(
|
||||
cuda_memcpy_async_gpu_to_gpu(mask, ct, num_blocks * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
legacy_host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
|
||||
host_integer_radix_arithmetic_scalar_shift_kb_inplace(
|
||||
streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
|
||||
mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks, num_blocks);
|
||||
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
|
||||
radix_params.big_lwe_dimension, num_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
|
||||
radix_params.big_lwe_dimension, num_blocks);
|
||||
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
legacy_host_propagate_single_carry<Torus>(
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, ct, nullptr, nullptr, mem_ptr->scp_mem,
|
||||
bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
|
||||
// legacy bitop
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, ct, mask, ct, bsks, ksks, num_blocks,
|
||||
mem_ptr->bitxor_mem->lut, mem_ptr->bitxor_mem->params.message_modulus);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *ct,
|
||||
void *const *bsks, uint64_t *const *ksks,
|
||||
int_abs_buffer<uint64_t> *mem_ptr, bool is_signed) {
|
||||
if (!is_signed)
|
||||
return;
|
||||
|
||||
auto mask = mem_ptr->mask;
|
||||
|
||||
uint32_t num_bits_in_ciphertext =
|
||||
(31 - __builtin_clz(mem_ptr->params.message_modulus)) *
|
||||
ct->num_radix_blocks;
|
||||
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], mask, ct);
|
||||
|
||||
host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
|
||||
mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
|
||||
ct->num_radix_blocks);
|
||||
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<Torus>(streams, gpu_indexes, gpu_count, ct,
|
||||
nullptr, nullptr, mem_ptr->scp_mem, bsks,
|
||||
ksks, requested_flag, uses_carry);
|
||||
|
||||
host_integer_radix_bitop_kb<Torus>(streams, gpu_indexes, gpu_count, ct, mask,
|
||||
ct, mem_ptr->bitxor_mem, bsks, ksks);
|
||||
host_integer_radix_bitop_kb(streams, gpu_indexes, gpu_count, ct, mask, ct,
|
||||
mem_ptr->bitxor_mem, bsks, ksks, num_blocks);
|
||||
}
|
||||
|
||||
#endif // TFHE_RS_ABS_CUH
|
||||
|
||||
@@ -22,15 +22,17 @@ void scratch_cuda_integer_radix_bitop_kb_64(
|
||||
|
||||
void cuda_bitop_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t lwe_ciphertext_count) {
|
||||
|
||||
host_integer_radix_bitop_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_1, lwe_array_2, (int_bitop_buffer<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks));
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_1),
|
||||
static_cast<const uint64_t *>(lwe_array_2),
|
||||
(int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_bitop(void *const *streams,
|
||||
@@ -41,50 +43,3 @@ void cleanup_cuda_integer_bitop(void *const *streams,
|
||||
(int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void update_degrees_after_bitand(uint64_t *output_degrees,
|
||||
uint64_t *lwe_array_1_degrees,
|
||||
uint64_t *lwe_array_2_degrees,
|
||||
uint32_t num_radix_blocks) {
|
||||
for (uint i = 0; i < num_radix_blocks; i++) {
|
||||
output_degrees[i] =
|
||||
std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void update_degrees_after_bitor(uint64_t *output_degrees,
|
||||
uint64_t *lwe_array_1_degrees,
|
||||
uint64_t *lwe_array_2_degrees,
|
||||
uint32_t num_radix_blocks) {
|
||||
for (uint i = 0; i < num_radix_blocks; i++) {
|
||||
auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
|
||||
auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
|
||||
auto result = max;
|
||||
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max | j > result) {
|
||||
result = max | j;
|
||||
}
|
||||
}
|
||||
output_degrees[i] = result;
|
||||
}
|
||||
}
|
||||
|
||||
void update_degrees_after_bitxor(uint64_t *output_degrees,
|
||||
uint64_t *lwe_array_1_degrees,
|
||||
uint64_t *lwe_array_2_degrees,
|
||||
uint32_t num_radix_blocks) {
|
||||
for (uint i = 0; i < num_radix_blocks; i++) {
|
||||
auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
|
||||
auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
|
||||
auto result = max;
|
||||
|
||||
// Try every possibility to find the worst case
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max ^ j > result) {
|
||||
result = max ^ j;
|
||||
}
|
||||
}
|
||||
output_degrees[i] = result;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,34 +14,15 @@
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_bitop_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
|
||||
void *const *bsks, Torus *const *ksks) {
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_1,
|
||||
Torus const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
|
||||
void *const *bsks, Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
auto lut = mem_ptr->lut;
|
||||
uint64_t degrees[lwe_array_1->num_radix_blocks];
|
||||
if (mem_ptr->op == BITOP_TYPE::BITAND) {
|
||||
update_degrees_after_bitand(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
} else if (mem_ptr->op == BITOP_TYPE::BITOR) {
|
||||
update_degrees_after_bitor(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
} else if (mem_ptr->op == BITXOR) {
|
||||
update_degrees_after_bitxor(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
}
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_1, lwe_array_2,
|
||||
bsks, ksks, lut, lwe_array_out->num_radix_blocks,
|
||||
lut->params.message_modulus);
|
||||
|
||||
memcpy(lwe_array_out->degrees, degrees,
|
||||
lwe_array_out->num_radix_blocks * sizeof(uint64_t));
|
||||
bsks, ksks, num_radix_blocks, lut, lut->params.message_modulus);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -25,16 +25,19 @@ void scratch_cuda_integer_radix_cmux_kb_64(
|
||||
|
||||
void cuda_cmux_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
void *lwe_array_out, void const *lwe_condition, void const *lwe_array_true,
|
||||
void const *lwe_array_false, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t lwe_ciphertext_count) {
|
||||
|
||||
host_integer_radix_cmux_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_condition, lwe_array_true, lwe_array_false,
|
||||
(int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks));
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_condition),
|
||||
static_cast<const uint64_t *>(lwe_array_true),
|
||||
static_cast<const uint64_t *>(lwe_array_false),
|
||||
(int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
|
||||
lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_radix_cmux(void *const *streams,
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
#define CUDA_INTEGER_CMUX_CUH
|
||||
|
||||
#include "integer.cuh"
|
||||
#include "radix_ciphertext.cuh"
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void zero_out_if(cudaStream_t const *streams,
|
||||
@@ -12,25 +11,25 @@ __host__ void zero_out_if(cudaStream_t const *streams,
|
||||
int_zero_out_if_buffer<Torus> *mem_ptr,
|
||||
int_radix_lut<Torus> *predicate, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
auto params = mem_ptr->params;
|
||||
|
||||
// We can't use integer_radix_apply_bivariate_lookup_table_kb since the
|
||||
// second operand is not an array
|
||||
auto tmp_lwe_array_input = mem_ptr->tmp;
|
||||
host_pack_bivariate_blocks_with_single_block<Torus>(
|
||||
pack_bivariate_blocks_with_single_block<Torus>(
|
||||
streams, gpu_indexes, gpu_count, tmp_lwe_array_input,
|
||||
predicate->lwe_indexes_in, lwe_array_input, lwe_condition,
|
||||
predicate->lwe_indexes_in, params.big_lwe_dimension,
|
||||
params.message_modulus, num_radix_blocks);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, tmp_lwe_array_input, bsks,
|
||||
ksks, num_radix_blocks, predicate);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_radix_cmux_kb(
|
||||
__host__ void host_integer_radix_cmux_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_condition,
|
||||
Torus const *lwe_array_true, Torus const *lwe_array_false,
|
||||
@@ -40,88 +39,34 @@ __host__ void legacy_host_integer_radix_cmux_kb(
|
||||
auto params = mem_ptr->params;
|
||||
Torus lwe_size = params.big_lwe_dimension + 1;
|
||||
Torus radix_lwe_size = lwe_size * num_radix_blocks;
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in->ptr, lwe_array_true,
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in, lwe_array_true,
|
||||
radix_lwe_size * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
(Torus *)(mem_ptr->buffer_in->ptr) + radix_lwe_size, lwe_array_false,
|
||||
radix_lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in + radix_lwe_size,
|
||||
lwe_array_false, radix_lwe_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
(Torus *)(mem_ptr->condition_array->ptr) + i * lwe_size, lwe_condition,
|
||||
lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
}
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, (Torus *)(mem_ptr->buffer_out->ptr),
|
||||
(Torus *)(mem_ptr->buffer_in->ptr),
|
||||
(Torus *)(mem_ptr->condition_array->ptr), bsks, ksks,
|
||||
2 * num_radix_blocks, mem_ptr->predicate_lut, params.message_modulus);
|
||||
|
||||
// If the condition was true, true_ct will have kept its value and false_ct
|
||||
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
|
||||
// have kept its value
|
||||
auto mem_true = (Torus *)(mem_ptr->buffer_out->ptr);
|
||||
auto ptr = (Torus *)mem_ptr->buffer_out->ptr;
|
||||
auto mem_false = &ptr[radix_lwe_size];
|
||||
auto added_cts = mem_true;
|
||||
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
|
||||
mem_false, params.big_lwe_dimension,
|
||||
num_radix_blocks);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
|
||||
num_radix_blocks, mem_ptr->message_extract_lut);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_cmux_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false,
|
||||
int_cmux_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks) {
|
||||
|
||||
if (lwe_array_out->num_radix_blocks != lwe_array_true->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be the same")
|
||||
if (lwe_array_out->num_radix_blocks != lwe_array_false->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be the same")
|
||||
|
||||
auto num_radix_blocks = lwe_array_out->num_radix_blocks;
|
||||
auto params = mem_ptr->params;
|
||||
Torus lwe_size = params.big_lwe_dimension + 1;
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], mem_ptr->buffer_in, 0, num_radix_blocks,
|
||||
lwe_array_true, 0, num_radix_blocks);
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], mem_ptr->buffer_in, num_radix_blocks,
|
||||
2 * num_radix_blocks, lwe_array_false, 0, num_radix_blocks);
|
||||
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
mem_ptr->condition_array, i, i + 1,
|
||||
lwe_condition, 0, 1);
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->condition_array + i * lwe_size,
|
||||
lwe_condition, lwe_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->buffer_out, mem_ptr->buffer_in,
|
||||
mem_ptr->condition_array, bsks, ksks, mem_ptr->predicate_lut,
|
||||
2 * num_radix_blocks, params.message_modulus);
|
||||
mem_ptr->condition_array, bsks, ksks, 2 * num_radix_blocks,
|
||||
mem_ptr->predicate_lut, params.message_modulus);
|
||||
|
||||
// If the condition was true, true_ct will have kept its value and false_ct
|
||||
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
|
||||
// have kept its value
|
||||
CudaRadixCiphertextFFI mem_true;
|
||||
CudaRadixCiphertextFFI mem_false;
|
||||
as_radix_ciphertext_slice<Torus>(&mem_true, mem_ptr->buffer_out, 0,
|
||||
num_radix_blocks);
|
||||
as_radix_ciphertext_slice<Torus>(&mem_false, mem_ptr->buffer_out,
|
||||
num_radix_blocks, 2 * num_radix_blocks);
|
||||
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], &mem_true, &mem_true,
|
||||
&mem_false, num_radix_blocks);
|
||||
auto mem_true = mem_ptr->buffer_out;
|
||||
auto mem_false = &mem_ptr->buffer_out[radix_lwe_size];
|
||||
auto added_cts = mem_true;
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
|
||||
mem_false, params.big_lwe_dimension, num_radix_blocks);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, &mem_true, bsks, ksks,
|
||||
mem_ptr->message_extract_lut, num_radix_blocks);
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
|
||||
num_radix_blocks, mem_ptr->message_extract_lut);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -38,7 +38,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
|
||||
uint32_t lwe_dimension,
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = (lwe_dimension + 1);
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
@@ -122,9 +122,7 @@ __host__ void are_all_comparisons_block_true(
|
||||
};
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], is_max_value_lut->get_lut(0, 1),
|
||||
is_max_value_lut->get_degree(1),
|
||||
is_max_value_lut->get_max_degree(1), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
is_equal_to_num_blocks_lut_f);
|
||||
|
||||
Torus *h_lut_indexes = (Torus *)malloc(num_chunks * sizeof(Torus));
|
||||
@@ -148,12 +146,12 @@ __host__ void are_all_comparisons_block_true(
|
||||
// Applies the LUT
|
||||
if (remaining_blocks == 1) {
|
||||
// In the last iteration we copy the output to the final address
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
|
||||
ksks, 1, lut);
|
||||
return;
|
||||
} else {
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, tmp_out, accumulator, bsks, ksks,
|
||||
num_chunks, lut);
|
||||
}
|
||||
@@ -219,12 +217,12 @@ __host__ void is_at_least_one_comparisons_block_true(
|
||||
// Applies the LUT
|
||||
if (remaining_blocks == 1) {
|
||||
// In the last iteration we copy the output to the final address
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
|
||||
ksks, 1, lut);
|
||||
return;
|
||||
} else {
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->tmp_lwe_array_out,
|
||||
accumulator, bsks, ksks, num_chunks, lut);
|
||||
}
|
||||
@@ -305,7 +303,7 @@ __host__ void host_compare_with_zero_equality(
|
||||
}
|
||||
}
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, sum, sum, bsks, ksks, num_sum_blocks,
|
||||
zero_comparison);
|
||||
are_all_comparisons_block_true<Torus>(streams, gpu_indexes, gpu_count,
|
||||
@@ -324,7 +322,7 @@ __host__ void host_integer_radix_equality_check_kb(
|
||||
|
||||
// Applies the LUT for the comparison operation
|
||||
auto comparisons = mem_ptr->tmp_block_comparisons;
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, comparisons, lwe_array_1, lwe_array_2,
|
||||
bsks, ksks, num_radix_blocks, eq_buffer->operator_lut,
|
||||
eq_buffer->operator_lut->params.message_modulus);
|
||||
@@ -371,14 +369,14 @@ __host__ void compare_radix_blocks_kb(
|
||||
|
||||
// Apply LUT to compare to 0
|
||||
auto is_non_zero_lut = mem_ptr->eq_buffer->is_non_zero_lut;
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_out, bsks, ksks,
|
||||
num_radix_blocks, is_non_zero_lut);
|
||||
|
||||
// Add one
|
||||
// Here Lhs can have the following values: (-1) % (message modulus * carry
|
||||
// modulus), 0, 1 So the output values after the addition will be: 0, 1, 2
|
||||
legacy_host_integer_radix_add_scalar_one_inplace<Torus>(
|
||||
host_integer_radix_add_scalar_one_inplace<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, big_lwe_dimension,
|
||||
num_radix_blocks, message_modulus, carry_modulus);
|
||||
}
|
||||
@@ -422,7 +420,7 @@ __host__ void tree_sign_reduction(
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], y, x, big_lwe_dimension,
|
||||
partial_block_count, 4);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, x, y, bsks, ksks,
|
||||
partial_block_count >> 1, inner_tree_leaf);
|
||||
|
||||
@@ -462,13 +460,12 @@ __host__ void tree_sign_reduction(
|
||||
f = sign_handler_f;
|
||||
}
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], last_lut->get_lut(0, 0),
|
||||
last_lut->get_degree(0), last_lut->get_max_degree(0), glwe_dimension,
|
||||
streams[0], gpu_indexes[0], last_lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus, f);
|
||||
last_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
// Last leaf
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, y, bsks, ksks, 1,
|
||||
last_lut);
|
||||
}
|
||||
@@ -514,7 +511,7 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
|
||||
// Clean noise
|
||||
auto identity_lut = mem_ptr->identity_lut;
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
|
||||
2 * packed_num_radix_blocks, identity_lut);
|
||||
|
||||
@@ -552,11 +549,11 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
packed_left + packed_num_radix_blocks * big_lwe_size;
|
||||
Torus *last_right_block_before_sign_block =
|
||||
packed_right + packed_num_radix_blocks * big_lwe_size;
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
|
||||
lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
|
||||
identity_lut);
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, last_right_block_before_sign_block,
|
||||
lwe_array_right + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks,
|
||||
1, identity_lut);
|
||||
@@ -566,7 +563,7 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
last_left_block_before_sign_block, last_right_block_before_sign_block,
|
||||
mem_ptr, bsks, ksks, 1);
|
||||
// Compare the sign block separately
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count,
|
||||
comparisons + (packed_num_radix_blocks + 1) * big_lwe_size,
|
||||
lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
|
||||
@@ -579,7 +576,7 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
streams, gpu_indexes, gpu_count, comparisons, lwe_array_left,
|
||||
lwe_array_right, mem_ptr, bsks, ksks, num_radix_blocks - 1);
|
||||
// Compare the sign block separately
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count,
|
||||
comparisons + (num_radix_blocks - 1) * big_lwe_size,
|
||||
lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
|
||||
@@ -623,7 +620,7 @@ __host__ void host_integer_radix_maxmin_kb(
|
||||
ksks, total_num_radix_blocks);
|
||||
|
||||
// Selector
|
||||
legacy_host_integer_radix_cmux_kb<Torus>(
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right,
|
||||
mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);
|
||||
|
||||
@@ -50,39 +50,31 @@ __host__ void host_pack(cudaStream_t stream, uint32_t gpu_index,
|
||||
if (array_in == array_out)
|
||||
PANIC("Cuda error: Input and output must be different");
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
auto compression_params = mem_ptr->compression_params;
|
||||
|
||||
auto log_modulus = mem_ptr->storage_log_modulus;
|
||||
printf("pack num_lwes: %u\n", num_lwes);
|
||||
printf("pack log_modulus: %u\n", log_modulus);
|
||||
// [0..num_glwes-1) GLWEs
|
||||
auto in_len = num_glwes * compression_params.glwe_dimension *
|
||||
compression_params.polynomial_size + num_lwes;
|
||||
printf("pack compression_params.glwe_dimension: %u\n", compression_params.glwe_dimension);
|
||||
printf("pack compression_params.polynomial_size: %u\n", compression_params.polynomial_size);
|
||||
printf("pack in_len: %u\n", in_len);
|
||||
|
||||
auto in_len = (compression_params.glwe_dimension + 1) *
|
||||
compression_params.polynomial_size;
|
||||
auto number_bits_to_pack = in_len * log_modulus;
|
||||
printf("pack number_bits_to_pack: %u\n", number_bits_to_pack);
|
||||
|
||||
auto nbits = sizeof(Torus) * 8;
|
||||
printf("pack nbits: %lu\n", nbits);
|
||||
|
||||
// number_bits_to_pack.div_ceil(Scalar::BITS)
|
||||
auto out_len = (number_bits_to_pack + nbits - 1) / nbits;
|
||||
printf("pack out_len: %u\n", out_len);
|
||||
|
||||
// Last GLWE
|
||||
printf("pack num_glwes: %u\n", num_glwes);
|
||||
number_bits_to_pack = in_len * log_modulus;
|
||||
auto last_out_len = (number_bits_to_pack + nbits - 1) / nbits;
|
||||
|
||||
auto num_coeffs = (num_glwes - 1) * out_len + last_out_len;
|
||||
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
getNumBlocksAndThreads(out_len, 1024, num_blocks, num_threads);
|
||||
getNumBlocksAndThreads(num_coeffs, 1024, num_blocks, num_threads);
|
||||
|
||||
dim3 grid(num_blocks);
|
||||
dim3 threads(num_threads);
|
||||
pack<Torus><<<grid, threads, 0, stream>>>(array_out, array_in, log_modulus,
|
||||
out_len, in_len, out_len);
|
||||
num_coeffs, in_len, out_len);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
@@ -193,46 +185,32 @@ __host__ void host_extract(cudaStream_t stream, uint32_t gpu_index,
|
||||
if (array_in == glwe_array_out)
|
||||
PANIC("Cuda error: Input and output must be different");
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
auto compression_params = mem_ptr->compression_params;
|
||||
|
||||
auto log_modulus = mem_ptr->storage_log_modulus;
|
||||
|
||||
uint32_t body_count = mem_ptr->body_count;
|
||||
|
||||
|
||||
auto num_glwes = (body_count + compression_params.polynomial_size - 1) / compression_params.polynomial_size;
|
||||
printf("%u / %u\n", glwe_index, num_glwes);
|
||||
printf("extract body_count: %u\n", body_count);
|
||||
|
||||
if (glwe_index == num_glwes-1)
|
||||
body_count %= compression_params.polynomial_size;
|
||||
|
||||
uint32_t body_count =
|
||||
std::min(mem_ptr->body_count, compression_params.polynomial_size);
|
||||
auto initial_out_len =
|
||||
(compression_params.glwe_dimension+1) * compression_params.polynomial_size;
|
||||
printf("extract compression_params.glwe_dimension: %u\n", compression_params.glwe_dimension);
|
||||
printf("extract compression_params.polynomial_size: %u\n", compression_params.polynomial_size);
|
||||
printf("extract initial_out_len: %u\n", initial_out_len);
|
||||
|
||||
auto number_bits_to_unpack = initial_out_len * log_modulus;
|
||||
printf("extract log_modulus: %u\n", log_modulus);
|
||||
printf("extract number_bits_to_unpack: %u\n", number_bits_to_unpack);
|
||||
compression_params.glwe_dimension * compression_params.polynomial_size +
|
||||
body_count;
|
||||
|
||||
auto compressed_glwe_accumulator_size =
|
||||
(compression_params.glwe_dimension + 1) *
|
||||
compression_params.polynomial_size;
|
||||
auto number_bits_to_unpack = compressed_glwe_accumulator_size * log_modulus;
|
||||
auto nbits = sizeof(Torus) * 8;
|
||||
printf("extract nbits: %lu\n", nbits);
|
||||
|
||||
// number_bits_to_unpack.div_ceil(Scalar::BITS)
|
||||
auto input_len = (number_bits_to_unpack + nbits - 1) / nbits;
|
||||
printf("extract input_len: %u\n", input_len);
|
||||
|
||||
// We assure the tail of the glwe is zeroed
|
||||
auto zeroed_slice = glwe_array_out + initial_out_len;
|
||||
// cuda_memset_async(zeroed_slice, 0,
|
||||
// (compression_params.polynomial_size - body_count) *
|
||||
// sizeof(Torus),
|
||||
// stream, gpu_index);
|
||||
auto glwe_ciphertext_size = (compression_params.glwe_dimension + 1) * compression_params.polynomial_size;
|
||||
cuda_memset_async(glwe_array_out, 0, glwe_ciphertext_size * sizeof(Torus), stream, gpu_index);
|
||||
cuda_memset_async(zeroed_slice, 0,
|
||||
(compression_params.polynomial_size - body_count) *
|
||||
sizeof(Torus),
|
||||
stream, gpu_index);
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
getNumBlocksAndThreads(initial_out_len, 128, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks);
|
||||
@@ -257,6 +235,10 @@ __host__ void host_integer_decompress(
|
||||
|
||||
auto compression_params = h_mem_ptr->compression_params;
|
||||
auto lwe_per_glwe = compression_params.polynomial_size;
|
||||
if (indexes_array_size > lwe_per_glwe)
|
||||
PANIC("Cuda error: too many LWEs to decompress. The number of LWEs should "
|
||||
"be smaller than "
|
||||
"polynomial_size.")
|
||||
|
||||
auto num_radix_blocks = h_mem_ptr->num_radix_blocks;
|
||||
if (num_radix_blocks != indexes_array_size)
|
||||
@@ -287,7 +269,7 @@ __host__ void host_integer_decompress(
|
||||
glwe_vec.push_back(std::make_pair(i, extracted_glwe));
|
||||
} else {
|
||||
// Updates the index
|
||||
++glwe_vec.back().first;
|
||||
glwe_vec.back().first++;
|
||||
}
|
||||
}
|
||||
// Sample extract all LWEs
|
||||
@@ -297,7 +279,7 @@ __host__ void host_integer_decompress(
|
||||
uint32_t current_idx = 0;
|
||||
auto d_indexes_array_chunk = d_indexes_array;
|
||||
for (const auto &max_idx_and_glwe : glwe_vec) {
|
||||
const uint32_t last_idx = max_idx_and_glwe.first;
|
||||
uint32_t last_idx = max_idx_and_glwe.first;
|
||||
extracted_glwe = max_idx_and_glwe.second;
|
||||
|
||||
auto num_lwes = last_idx + 1 - current_idx;
|
||||
|
||||
@@ -285,7 +285,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
// Shift the mask so that we will only keep bits we should
|
||||
uint32_t shifted_mask = full_message_mask >> shift_amount;
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, interesting_divisor.last_block(),
|
||||
interesting_divisor.last_block(), bsks, ksks, 1,
|
||||
mem_ptr->masking_luts_1[shifted_mask]);
|
||||
@@ -314,7 +314,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
// the estimated degree of the output is < msg_modulus
|
||||
shifted_mask = shifted_mask & full_message_mask;
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, divisor_ms_blocks.first_block(),
|
||||
divisor_ms_blocks.first_block(), bsks, ksks, 1,
|
||||
mem_ptr->masking_luts_2[shifted_mask]);
|
||||
@@ -339,7 +339,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
interesting_remainder1.insert(0, numerator_block_1.first_block(),
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
legacy_host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
|
||||
host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
|
||||
streams, gpu_indexes, gpu_count, interesting_remainder1.data, 1,
|
||||
mem_ptr->shift_mem_1, bsks, ksks, interesting_remainder1.len);
|
||||
|
||||
@@ -347,7 +347,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
interesting_remainder1.len - 1, streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
legacy_host_radix_blocks_rotate_left<Torus>(
|
||||
host_radix_blocks_rotate_left<Torus>(
|
||||
streams, gpu_indexes, gpu_count, interesting_remainder1.data,
|
||||
tmp_radix.data, 1, interesting_remainder1.len, big_lwe_size);
|
||||
|
||||
@@ -369,7 +369,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
auto left_shift_interesting_remainder2 = [&](cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
legacy_host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
|
||||
host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
|
||||
streams, gpu_indexes, gpu_count, interesting_remainder2.data, 1,
|
||||
mem_ptr->shift_mem_2, bsks, ksks, interesting_remainder2.len);
|
||||
}; // left_shift_interesting_remainder2
|
||||
@@ -402,7 +402,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
// but in that position, interesting_remainder2 always has a 0
|
||||
auto &merged_interesting_remainder = interesting_remainder1;
|
||||
|
||||
legacy_host_addition<Torus>(
|
||||
host_addition<Torus>(
|
||||
streams[0], gpu_indexes[0], merged_interesting_remainder.data,
|
||||
merged_interesting_remainder.data, interesting_remainder2.data,
|
||||
radix_params.big_lwe_dimension, merged_interesting_remainder.len);
|
||||
@@ -437,7 +437,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
mem_ptr->overflow_sub_mem->update_lut_indexes(
|
||||
streams, gpu_indexes, first_indexes, second_indexes, scalar_indexes,
|
||||
merged_interesting_remainder.len);
|
||||
legacy_host_integer_overflowing_sub<uint64_t>(
|
||||
host_integer_overflowing_sub<uint64_t>(
|
||||
streams, gpu_indexes, gpu_count, new_remainder.data,
|
||||
(uint64_t *)merged_interesting_remainder.data,
|
||||
interesting_divisor.data, subtraction_overflowed.data,
|
||||
@@ -481,7 +481,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
auto create_clean_version_of_merged_remainder =
|
||||
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count,
|
||||
cleaned_merged_interesting_remainder.data,
|
||||
cleaned_merged_interesting_remainder.data, bsks, ksks,
|
||||
@@ -507,10 +507,10 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_3[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], overflow_sum.data,
|
||||
subtraction_overflowed.data,
|
||||
at_least_one_upper_block_is_non_zero.data,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], overflow_sum.data,
|
||||
subtraction_overflowed.data,
|
||||
at_least_one_upper_block_is_non_zero.data,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
|
||||
int factor = (i) ? 3 : 2;
|
||||
int factor_lut_id = factor - 2;
|
||||
@@ -521,7 +521,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
auto conditionally_zero_out_merged_interesting_remainder =
|
||||
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count,
|
||||
cleaned_merged_interesting_remainder.data,
|
||||
cleaned_merged_interesting_remainder.data,
|
||||
@@ -534,7 +534,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
auto conditionally_zero_out_merged_new_remainder =
|
||||
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, new_remainder.data,
|
||||
new_remainder.data, overflow_sum_radix.data, bsks, ksks,
|
||||
new_remainder.len,
|
||||
@@ -544,7 +544,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
auto set_quotient_bit = [&](cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, did_not_overflow.data,
|
||||
subtraction_overflowed.data,
|
||||
at_least_one_upper_block_is_non_zero.data, bsks, ksks, 1,
|
||||
@@ -552,7 +552,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
mem_ptr->merge_overflow_flags_luts[pos_in_block]
|
||||
->params.message_modulus);
|
||||
|
||||
legacy_host_addition<Torus>(
|
||||
host_addition<Torus>(
|
||||
streams[0], gpu_indexes[0], "ient[block_of_bit * big_lwe_size],
|
||||
"ient[block_of_bit * big_lwe_size], did_not_overflow.data,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
@@ -588,17 +588,17 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
|
||||
// Clean the quotient and remainder
|
||||
// as even though they have no carries, they are not at nominal noise level
|
||||
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], remainder,
|
||||
remainder1.data, remainder2.data,
|
||||
radix_params.big_lwe_dimension, remainder1.len);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], remainder, remainder1.data,
|
||||
remainder2.data, radix_params.big_lwe_dimension,
|
||||
remainder1.len);
|
||||
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem_ptr->sub_streams_1, gpu_indexes, gpu_count, remainder, remainder,
|
||||
bsks, ksks, num_blocks, mem_ptr->message_extract_lut_1);
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem_ptr->sub_streams_2, gpu_indexes, gpu_count, quotient, quotient, bsks,
|
||||
ksks, num_blocks, mem_ptr->message_extract_lut_2);
|
||||
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
|
||||
@@ -636,14 +636,12 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
legacy_host_integer_abs_kb_async<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
|
||||
positive_numerator.data, bsks, ksks, int_mem_ptr->abs_mem_1, true,
|
||||
num_blocks);
|
||||
legacy_host_integer_abs_kb_async<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
positive_divisor.data, bsks, ksks, int_mem_ptr->abs_mem_2, true,
|
||||
num_blocks);
|
||||
host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_1, gpu_indexes,
|
||||
gpu_count, positive_numerator.data, bsks, ksks,
|
||||
int_mem_ptr->abs_mem_1, true, num_blocks);
|
||||
host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_2, gpu_indexes,
|
||||
gpu_count, positive_divisor.data, bsks, ksks,
|
||||
int_mem_ptr->abs_mem_2, true, num_blocks);
|
||||
for (uint j = 0; j < int_mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
@@ -654,7 +652,7 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
positive_numerator.data, positive_divisor.data, bsks, ksks,
|
||||
int_mem_ptr->unsigned_mem, num_blocks);
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->sign_bits_are_different,
|
||||
&numerator[big_lwe_size * (num_blocks - 1)],
|
||||
@@ -667,36 +665,36 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
cuda_synchronize_stream(int_mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
legacy_host_integer_radix_negation(
|
||||
host_integer_radix_negation(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_quotient, quotient, radix_params.big_lwe_dimension,
|
||||
num_blocks, radix_params.message_modulus, radix_params.carry_modulus);
|
||||
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
legacy_host_propagate_single_carry<Torus>(
|
||||
host_propagate_single_carry<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_quotient, nullptr, nullptr, int_mem_ptr->scp_mem_1,
|
||||
bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
|
||||
legacy_host_integer_radix_negation(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_remainder, remainder,
|
||||
radix_params.big_lwe_dimension, num_blocks,
|
||||
radix_params.message_modulus, radix_params.carry_modulus);
|
||||
host_integer_radix_negation(int_mem_ptr->sub_streams_2, gpu_indexes,
|
||||
gpu_count, int_mem_ptr->negated_remainder,
|
||||
remainder, radix_params.big_lwe_dimension,
|
||||
num_blocks, radix_params.message_modulus,
|
||||
radix_params.carry_modulus);
|
||||
|
||||
legacy_host_propagate_single_carry<Torus>(
|
||||
host_propagate_single_carry<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_remainder, nullptr, nullptr,
|
||||
int_mem_ptr->scp_mem_2, bsks, ksks, num_blocks, requested_flag,
|
||||
uses_carry);
|
||||
|
||||
legacy_host_integer_radix_cmux_kb<Torus>(
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count, quotient,
|
||||
int_mem_ptr->sign_bits_are_different, int_mem_ptr->negated_quotient,
|
||||
quotient, int_mem_ptr->cmux_quotient_mem, bsks, ksks, num_blocks);
|
||||
|
||||
legacy_host_integer_radix_cmux_kb<Torus>(
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count, remainder,
|
||||
&numerator[big_lwe_size * (num_blocks - 1)],
|
||||
int_mem_ptr->negated_remainder, remainder,
|
||||
|
||||
@@ -106,42 +106,49 @@ void scratch_cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
|
||||
void cuda_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI *carry_out,
|
||||
const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t requested_flag, uint32_t uses_carry) {
|
||||
void *lwe_array, void *carry_out, const void *carry_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks,
|
||||
uint32_t requested_flag, uint32_t uses_carry) {
|
||||
|
||||
host_propagate_single_carry<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array, carry_out,
|
||||
carry_in, (int_sc_prop_memory<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), requested_flag, uses_carry);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array), static_cast<uint64_t *>(carry_out),
|
||||
static_cast<const uint64_t *>(carry_in),
|
||||
(int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
|
||||
void cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
|
||||
CudaRadixCiphertextFFI *carry_out, const CudaRadixCiphertextFFI *carry_in,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t requested_flag, uint32_t uses_carry) {
|
||||
void *lhs_array, const void *rhs_array, void *carry_out,
|
||||
const void *carry_in, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks, uint32_t requested_flag, uint32_t uses_carry) {
|
||||
|
||||
host_add_and_propagate_single_carry<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lhs_array, rhs_array,
|
||||
carry_out, carry_in, (int_sc_prop_memory<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), requested_flag, uses_carry);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lhs_array),
|
||||
static_cast<const uint64_t *>(rhs_array),
|
||||
static_cast<uint64_t *>(carry_out),
|
||||
static_cast<const uint64_t *>(carry_in),
|
||||
(int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
|
||||
void cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
|
||||
CudaRadixCiphertextFFI *overflow_block,
|
||||
const CudaRadixCiphertextFFI *input_borrow, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t compute_overflow,
|
||||
void *lhs_array, const void *rhs_array, void *overflow_block,
|
||||
const void *input_borrow, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks, uint32_t compute_overflow,
|
||||
uint32_t uses_input_borrow) {
|
||||
|
||||
host_integer_overflowing_sub<uint64_t>(
|
||||
(cudaStream_t const *)streams, gpu_indexes, gpu_count, lhs_array,
|
||||
lhs_array, rhs_array, overflow_block, input_borrow,
|
||||
(cudaStream_t const *)streams, gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lhs_array), static_cast<uint64_t *>(lhs_array),
|
||||
static_cast<const uint64_t *>(rhs_array),
|
||||
static_cast<uint64_t *>(overflow_block),
|
||||
static_cast<const uint64_t *>(input_borrow),
|
||||
(int_borrow_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)ksks,
|
||||
compute_overflow, uses_input_borrow);
|
||||
num_blocks, compute_overflow, uses_input_borrow);
|
||||
}
|
||||
|
||||
void cleanup_cuda_propagate_single_carry(void *const *streams,
|
||||
@@ -177,7 +184,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
@@ -188,7 +195,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
lut_degree, allocate_gpu_memory);
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
@@ -198,7 +205,7 @@ void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory) {
|
||||
uint32_t num_many_lut, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
@@ -209,19 +216,22 @@ void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
num_many_lut, lut_degree, allocate_gpu_memory);
|
||||
num_many_lut, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks) {
|
||||
void cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, void *output_radix_lwe,
|
||||
void const *input_radix_lwe,
|
||||
int8_t *mem_ptr, void *const *ksks,
|
||||
void *const *bsks, uint32_t num_blocks) {
|
||||
|
||||
host_apply_univariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, output_radix_lwe,
|
||||
input_radix_lwe, (int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks),
|
||||
bsks);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<const uint64_t *>(input_radix_lwe),
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks,
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
@@ -234,15 +244,16 @@ void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
|
||||
void cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
void *output_radix_lwe, void const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
host_apply_many_univariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, output_radix_lwe,
|
||||
input_radix_lwe, (int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks),
|
||||
bsks, num_many_lut, lut_stride);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<const uint64_t *>(input_radix_lwe),
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks, num_blocks,
|
||||
num_many_lut, lut_stride);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
@@ -252,7 +263,7 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
@@ -263,21 +274,22 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
lut_degree, allocate_gpu_memory);
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_apply_bivariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe_1,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_radix_blocks,
|
||||
uint32_t shift) {
|
||||
void *output_radix_lwe, void const *input_radix_lwe_1,
|
||||
void const *input_radix_lwe_2, int8_t *mem_ptr, void *const *ksks,
|
||||
void *const *bsks, uint32_t num_blocks, uint32_t shift) {
|
||||
|
||||
host_apply_bivariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, output_radix_lwe,
|
||||
input_radix_lwe_1, input_radix_lwe_2, (int_radix_lut<uint64_t> *)mem_ptr,
|
||||
(uint64_t **)(ksks), bsks, num_radix_blocks, shift);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<const uint64_t *>(input_radix_lwe_1),
|
||||
static_cast<const uint64_t *>(input_radix_lwe_2),
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks, num_blocks,
|
||||
shift);
|
||||
}
|
||||
|
||||
void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
|
||||
@@ -295,7 +307,7 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
@@ -306,21 +318,22 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
lut_degree, allocate_gpu_memory);
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI *generates_or_propagates, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_radix_blocks) {
|
||||
void *output_radix_lwe, void *generates_or_propagates, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks, uint32_t shift) {
|
||||
|
||||
int_radix_params params = ((int_radix_lut<uint64_t> *)mem_ptr)->params;
|
||||
|
||||
host_compute_prefix_sum_hillis_steele<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, output_radix_lwe,
|
||||
generates_or_propagates, params, (int_radix_lut<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), num_radix_blocks);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<uint64_t *>(generates_or_propagates), params,
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
@@ -332,9 +345,11 @@ void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
|
||||
void cuda_integer_reverse_blocks_64_inplace(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array) {
|
||||
uint32_t gpu_count, void *lwe_array,
|
||||
uint32_t num_blocks,
|
||||
uint32_t lwe_size) {
|
||||
|
||||
host_radix_blocks_reverse_inplace<uint64_t>((cudaStream_t *)(streams),
|
||||
gpu_indexes, lwe_array);
|
||||
host_radix_blocks_reverse_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes,
|
||||
static_cast<uint64_t *>(lwe_array), num_blocks, lwe_size);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -228,9 +228,9 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
if (num_radix_in_vec == 2) {
|
||||
legacy_host_addition<Torus>(
|
||||
streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks[num_blocks * big_lwe_size], big_lwe_dimension, num_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks[num_blocks * big_lwe_size],
|
||||
big_lwe_dimension, num_blocks);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -280,13 +280,10 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
|
||||
// generate accumulators
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], message_acc,
|
||||
luts_message_carry->get_degree(0), luts_message_carry->get_max_degree(0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
lut_f_message);
|
||||
streams[0], gpu_indexes[0], message_acc, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, lut_f_message);
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], carry_acc, luts_message_carry->get_degree(1),
|
||||
luts_message_carry->get_max_degree(1), glwe_dimension, polynomial_size,
|
||||
streams[0], gpu_indexes[0], carry_acc, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, lut_f_carry);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
@@ -297,7 +294,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
ch_amount++;
|
||||
dim3 add_grid(ch_amount, num_blocks, 1);
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
tree_add_chunks<Torus><<<add_grid, 512, 0, streams[0]>>>(
|
||||
new_blocks, old_blocks, min(r, chunk_size), big_lwe_size, num_blocks);
|
||||
|
||||
@@ -448,9 +445,9 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
luts_message_carry->release(streams, gpu_indexes, gpu_count);
|
||||
delete (luts_message_carry);
|
||||
|
||||
legacy_host_addition<Torus>(
|
||||
streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks[num_blocks * big_lwe_size], big_lwe_dimension, num_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks[num_blocks * big_lwe_size],
|
||||
big_lwe_dimension, num_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
@@ -541,13 +538,13 @@ __host__ void host_integer_mult_radix_kb(
|
||||
dim3 grid(lsb_vector_block_count, 1, 1);
|
||||
dim3 thds(params::degree / params::opt, 1, 1);
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
all_shifted_lhs_rhs<Torus, params><<<grid, thds, 0, streams[0]>>>(
|
||||
radix_lwe_left, vector_result_lsb, vector_result_msb, radix_lwe_right,
|
||||
vector_lsb_rhs, vector_msb_rhs, num_blocks);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, block_mul_res, block_mul_res,
|
||||
vector_result_sb, bsks, ksks, total_block_count, luts_array,
|
||||
luts_array->params.message_modulus);
|
||||
@@ -556,7 +553,7 @@ __host__ void host_integer_mult_radix_kb(
|
||||
vector_result_msb = &block_mul_res[lsb_vector_block_count *
|
||||
(polynomial_size * glwe_dimension + 1)];
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
fill_radix_from_lsb_msb<Torus, params>
|
||||
<<<num_blocks * num_blocks, params::degree / params::opt, 0,
|
||||
streams[0]>>>(vector_result_sb, vector_result_lsb, vector_result_msb,
|
||||
@@ -587,7 +584,7 @@ __host__ void host_integer_mult_radix_kb(
|
||||
auto scp_mem_ptr = mem_ptr->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
legacy_host_propagate_single_carry<Torus>(
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, radix_lwe_out, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
|
||||
@@ -2,11 +2,13 @@
|
||||
|
||||
void cuda_negate_integer_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, uint32_t message_modulus,
|
||||
void *lwe_array_out, void const *lwe_array_in, uint32_t lwe_dimension,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus) {
|
||||
|
||||
host_integer_radix_negation<uint64_t>((cudaStream_t *)(streams), gpu_indexes,
|
||||
gpu_count, lwe_array_out, lwe_array_in,
|
||||
message_modulus, carry_modulus);
|
||||
host_integer_radix_negation<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in), lwe_dimension,
|
||||
lwe_ciphertext_count, message_modulus, carry_modulus);
|
||||
}
|
||||
|
||||
@@ -54,69 +54,12 @@ device_integer_radix_negation(Torus *output, Torus const *input,
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
host_integer_radix_negation(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in,
|
||||
uint64_t message_modulus, uint64_t carry_modulus) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
|
||||
if (lwe_array_out->num_radix_blocks != lwe_array_in->num_radix_blocks)
|
||||
PANIC("Cuda error: lwe_array_in and lwe_array_out num radix blocks must be "
|
||||
"the same")
|
||||
|
||||
if (lwe_array_out->lwe_dimension != lwe_array_in->lwe_dimension)
|
||||
PANIC("Cuda error: lwe_array_in and lwe_array_out lwe_dimension must be "
|
||||
"the same")
|
||||
|
||||
auto num_radix_blocks = lwe_array_out->num_radix_blocks;
|
||||
auto lwe_dimension = lwe_array_out->lwe_dimension;
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = lwe_dimension + 1;
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = lwe_size;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
// Value of the shift we multiply our messages by
|
||||
// If message_modulus and carry_modulus are always powers of 2 we can simplify
|
||||
// this
|
||||
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
|
||||
|
||||
device_integer_radix_negation<Torus><<<grid, thds, 0, streams[0]>>>(
|
||||
static_cast<Torus *>(lwe_array_out->ptr),
|
||||
static_cast<Torus *>(lwe_array_in->ptr), num_radix_blocks, lwe_dimension,
|
||||
message_modulus, delta);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
uint8_t zb = 0;
|
||||
for (uint i = 0; i < lwe_array_out->num_radix_blocks; i++) {
|
||||
auto input_degree = lwe_array_in->degrees[i];
|
||||
|
||||
if (zb != 0) {
|
||||
input_degree += static_cast<uint64_t>(zb);
|
||||
}
|
||||
Torus z =
|
||||
std::max(static_cast<Torus>(1),
|
||||
static_cast<Torus>(ceil(input_degree / message_modulus))) *
|
||||
message_modulus;
|
||||
|
||||
lwe_array_out->degrees[i] = z - static_cast<uint64_t>(zb);
|
||||
lwe_array_out->noise_levels[i] = lwe_array_in->noise_levels[i];
|
||||
zb = z / message_modulus;
|
||||
}
|
||||
}
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_radix_negation(
|
||||
__host__ void host_integer_radix_negation(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *output, Torus const *input,
|
||||
uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count,
|
||||
uint64_t message_modulus, uint64_t carry_modulus) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
@@ -172,7 +115,7 @@ __host__ void host_integer_overflowing_sub_kb(
|
||||
|
||||
*/
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_overflowing_sub(
|
||||
__host__ void host_integer_overflowing_sub(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_out_array, Torus *lhs_array,
|
||||
const Torus *rhs_array, Torus *overflow_block, const Torus *input_borrow,
|
||||
@@ -191,13 +134,13 @@ __host__ void legacy_host_integer_overflowing_sub(
|
||||
uint32_t num_groups = (num_blocks + grouping_size - 1) / grouping_size;
|
||||
|
||||
auto stream = (cudaStream_t *)streams;
|
||||
legacy_host_unchecked_sub_with_correcting_term<Torus>(
|
||||
host_unchecked_sub_with_correcting_term<Torus>(
|
||||
stream[0], gpu_indexes[0], static_cast<Torus *>(lwe_out_array),
|
||||
static_cast<Torus *>(lhs_array), static_cast<const Torus *>(rhs_array),
|
||||
radix_params.big_lwe_dimension, num_blocks, radix_params.message_modulus,
|
||||
radix_params.carry_modulus, radix_params.message_modulus - 1);
|
||||
|
||||
legacy_host_single_borrow_propagate<Torus>(
|
||||
host_single_borrow_propagate<Torus>(
|
||||
streams, gpu_indexes, gpu_count, static_cast<Torus *>(lwe_out_array),
|
||||
static_cast<Torus *>(overflow_block),
|
||||
static_cast<const Torus *>(input_borrow),
|
||||
@@ -205,47 +148,4 @@ __host__ void legacy_host_integer_overflowing_sub(
|
||||
num_blocks, num_groups, compute_overflow, uses_input_borrow);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_overflowing_sub(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI *input_left,
|
||||
const CudaRadixCiphertextFFI *input_right,
|
||||
CudaRadixCiphertextFFI *overflow_block,
|
||||
const CudaRadixCiphertextFFI *input_borrow,
|
||||
int_borrow_prop_memory<uint64_t> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t compute_overflow, uint32_t uses_input_borrow) {
|
||||
|
||||
if (output->num_radix_blocks != input_left->num_radix_blocks ||
|
||||
output->num_radix_blocks != input_right->num_radix_blocks)
|
||||
PANIC("Cuda error: lwe_array_in and output num radix blocks must be "
|
||||
"the same")
|
||||
|
||||
if (output->lwe_dimension != input_left->lwe_dimension ||
|
||||
output->lwe_dimension != input_right->lwe_dimension)
|
||||
PANIC("Cuda error: lwe_array_in and output lwe_dimension must be "
|
||||
"the same")
|
||||
|
||||
auto num_blocks = output->num_radix_blocks;
|
||||
auto radix_params = mem_ptr->params;
|
||||
|
||||
// We need to recalculate the num_groups, because on the division the number
|
||||
// of num_blocks changes
|
||||
uint32_t block_modulus =
|
||||
radix_params.message_modulus * radix_params.carry_modulus;
|
||||
uint32_t num_bits_in_block = log2_int(block_modulus);
|
||||
uint32_t grouping_size = num_bits_in_block;
|
||||
uint32_t num_groups = (num_blocks + grouping_size - 1) / grouping_size;
|
||||
|
||||
auto stream = (cudaStream_t *)streams;
|
||||
host_unchecked_sub_with_correcting_term<Torus>(
|
||||
stream[0], gpu_indexes[0], output, input_left, input_right, num_blocks,
|
||||
radix_params.message_modulus, radix_params.carry_modulus);
|
||||
|
||||
host_single_borrow_propagate<Torus>(
|
||||
streams, gpu_indexes, gpu_count, output, overflow_block, input_borrow,
|
||||
(int_borrow_prop_memory<Torus> *)mem_ptr, bsks, (Torus **)(ksks),
|
||||
num_groups, compute_overflow, uses_input_borrow);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
#include "radix_ciphertext.cuh"
|
||||
|
||||
void release_radix_ciphertext(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *data) {
|
||||
cuda_drop_async(data->ptr, stream, gpu_index);
|
||||
free(data->degrees);
|
||||
free(data->noise_levels);
|
||||
cuda_synchronize_stream(stream, gpu_index);
|
||||
}
|
||||
@@ -1,142 +0,0 @@
|
||||
#ifndef CUDA_INTEGER_RADIX_CIPHERTEXT_CUH
|
||||
#define CUDA_INTEGER_RADIX_CIPHERTEXT_CUH
|
||||
|
||||
#include "device.h"
|
||||
#include "integer/integer.h"
|
||||
|
||||
template <typename Torus>
|
||||
void create_zero_radix_ciphertext_async(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *radix,
|
||||
const uint32_t num_radix_blocks,
|
||||
const uint32_t lwe_dimension) {
|
||||
radix->lwe_dimension = lwe_dimension;
|
||||
radix->num_radix_blocks = num_radix_blocks;
|
||||
uint32_t size = (lwe_dimension + 1) * num_radix_blocks * sizeof(Torus);
|
||||
radix->ptr = (void *)cuda_malloc_async(size, stream, gpu_index);
|
||||
cuda_memset_async(radix->ptr, 0, size, stream, gpu_index);
|
||||
|
||||
radix->degrees = (uint64_t *)(calloc(num_radix_blocks, sizeof(uint64_t)));
|
||||
radix->noise_levels =
|
||||
(uint64_t *)(calloc(num_radix_blocks, sizeof(uint64_t)));
|
||||
if (radix->degrees == NULL || radix->noise_levels == NULL) {
|
||||
PANIC("Cuda error: degrees / noise levels not allocated correctly")
|
||||
}
|
||||
}
|
||||
|
||||
// end_input_lwe_index is exclusive
|
||||
template <typename Torus>
|
||||
void as_radix_ciphertext_slice(CudaRadixCiphertextFFI *output_radix,
|
||||
const CudaRadixCiphertextFFI *input_radix,
|
||||
const uint32_t start_input_lwe_index,
|
||||
const uint32_t end_input_lwe_index) {
|
||||
if (input_radix->num_radix_blocks <
|
||||
end_input_lwe_index - start_input_lwe_index)
|
||||
PANIC("Cuda error: input radix should have more blocks than the specified "
|
||||
"range")
|
||||
if (start_input_lwe_index >= end_input_lwe_index)
|
||||
PANIC("Cuda error: slice range should be non negative")
|
||||
|
||||
auto lwe_size = input_radix->lwe_dimension + 1;
|
||||
output_radix->num_radix_blocks = end_input_lwe_index - start_input_lwe_index;
|
||||
output_radix->lwe_dimension = input_radix->lwe_dimension;
|
||||
Torus *in_ptr = (Torus *)input_radix->ptr;
|
||||
output_radix->ptr = (void *)(in_ptr + start_input_lwe_index * lwe_size);
|
||||
output_radix->degrees = input_radix->degrees + start_input_lwe_index;
|
||||
output_radix->noise_levels =
|
||||
input_radix->noise_levels + start_input_lwe_index;
|
||||
}
|
||||
|
||||
// end_lwe_index are exclusive
|
||||
template <typename Torus>
|
||||
void copy_radix_ciphertext_slice_async(
|
||||
cudaStream_t const stream, uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *output_radix, const uint32_t output_start_lwe_index,
|
||||
const uint32_t output_end_lwe_index,
|
||||
const CudaRadixCiphertextFFI *input_radix,
|
||||
const uint32_t input_start_lwe_index, const uint32_t input_end_lwe_index) {
|
||||
if (output_radix->lwe_dimension != input_radix->lwe_dimension)
|
||||
PANIC("Cuda error: input lwe dimension should be equal to output lwe "
|
||||
"dimension")
|
||||
if (output_end_lwe_index - output_start_lwe_index !=
|
||||
input_end_lwe_index - input_start_lwe_index)
|
||||
PANIC("Cuda error: output and input ranges should have the same size")
|
||||
if (output_end_lwe_index - output_start_lwe_index >
|
||||
output_radix->num_radix_blocks)
|
||||
PANIC("Cuda error: output range should be lower or equal to output num "
|
||||
"blocks")
|
||||
if (input_end_lwe_index - input_start_lwe_index >
|
||||
input_radix->num_radix_blocks)
|
||||
PANIC(
|
||||
"Cuda error: input range should be lower or equal to input num blocks")
|
||||
if (output_end_lwe_index - output_start_lwe_index <= 0)
|
||||
PANIC("Cuda error: output range should be greater than zero")
|
||||
if (input_end_lwe_index - input_start_lwe_index <= 0)
|
||||
PANIC("Cuda error: input range should be greater than zero")
|
||||
if (output_end_lwe_index <= output_start_lwe_index)
|
||||
PANIC("Cuda error: output end index should be greater or equal to output "
|
||||
"start index")
|
||||
if (input_end_lwe_index <= input_start_lwe_index)
|
||||
PANIC("Cuda error: input end index should be greater or equal to input "
|
||||
"start index")
|
||||
if (output_start_lwe_index > output_radix->num_radix_blocks)
|
||||
PANIC("Cuda error: output start index should be smaller than the number "
|
||||
"of blocks")
|
||||
if (input_start_lwe_index > input_radix->num_radix_blocks)
|
||||
PANIC("Cuda error: input start index should be smaller than the number "
|
||||
"of blocks")
|
||||
|
||||
auto lwe_size = input_radix->lwe_dimension + 1;
|
||||
Torus *out_ptr = (Torus *)output_radix->ptr;
|
||||
out_ptr = &out_ptr[output_start_lwe_index * lwe_size];
|
||||
Torus *in_ptr = (Torus *)input_radix->ptr;
|
||||
in_ptr = &in_ptr[input_start_lwe_index * lwe_size];
|
||||
auto num_blocks = input_end_lwe_index - input_start_lwe_index;
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(out_ptr, in_ptr,
|
||||
num_blocks * lwe_size * sizeof(Torus), stream,
|
||||
gpu_index);
|
||||
for (uint i = 0; i < num_blocks; i++) {
|
||||
output_radix->degrees[i + output_start_lwe_index] =
|
||||
input_radix->degrees[i + input_start_lwe_index];
|
||||
output_radix->noise_levels[i + output_start_lwe_index] =
|
||||
input_radix->noise_levels[i + input_start_lwe_index];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void copy_radix_ciphertext_async(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *output_radix,
|
||||
const CudaRadixCiphertextFFI *input_radix) {
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
stream, gpu_index, output_radix, 0, output_radix->num_radix_blocks,
|
||||
input_radix, 0, input_radix->num_radix_blocks);
|
||||
}
|
||||
|
||||
// end_lwe_index is exclusive
|
||||
template <typename Torus>
|
||||
void set_zero_radix_ciphertext_slice_async(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *radix,
|
||||
const uint32_t start_lwe_index,
|
||||
const uint32_t end_lwe_index) {
|
||||
if (radix->num_radix_blocks < end_lwe_index - start_lwe_index)
|
||||
PANIC("Cuda error: input radix should have more blocks than the specified "
|
||||
"range")
|
||||
if (start_lwe_index >= end_lwe_index)
|
||||
PANIC("Cuda error: slice range should be non negative")
|
||||
|
||||
auto lwe_size = radix->lwe_dimension + 1;
|
||||
auto num_blocks_to_set = end_lwe_index - start_lwe_index;
|
||||
auto lwe_array_out_block = (Torus *)radix->ptr + start_lwe_index * lwe_size;
|
||||
cuda_memset_async(lwe_array_out_block, 0,
|
||||
num_blocks_to_set * lwe_size * sizeof(Torus), stream,
|
||||
gpu_index);
|
||||
memset(&radix->degrees[start_lwe_index], 0,
|
||||
num_blocks_to_set * sizeof(uint64_t));
|
||||
memset(&radix->noise_levels[start_lwe_index], 0,
|
||||
num_blocks_to_set * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -2,11 +2,13 @@
|
||||
|
||||
void cuda_scalar_addition_integer_radix_ciphertext_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, void const *scalar_input,
|
||||
uint32_t num_scalars, uint32_t message_modulus, uint32_t carry_modulus) {
|
||||
void *lwe_array, void const *scalar_input, uint32_t lwe_dimension,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus) {
|
||||
|
||||
host_integer_radix_scalar_addition_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array,
|
||||
static_cast<const uint64_t *>(scalar_input), num_scalars, message_modulus,
|
||||
carry_modulus);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(scalar_input), lwe_dimension,
|
||||
lwe_ciphertext_count, message_modulus, carry_modulus);
|
||||
}
|
||||
|
||||
@@ -24,12 +24,12 @@ __global__ void device_integer_radix_scalar_addition_inplace(
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_radix_scalar_addition_inplace(
|
||||
__host__ void host_integer_radix_scalar_addition_inplace(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array, Torus const *scalar_input,
|
||||
uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
@@ -49,42 +49,6 @@ __host__ void legacy_host_integer_radix_scalar_addition_inplace(
|
||||
delta);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_scalar_addition_inplace(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array,
|
||||
Torus const *scalar_input, uint32_t num_scalars, uint32_t message_modulus,
|
||||
uint32_t carry_modulus) {
|
||||
if (lwe_array->num_radix_blocks < num_scalars)
|
||||
PANIC("Cuda error: num scalars should be smaller or equal to input num "
|
||||
"radix blocks")
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = num_scalars;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
// Value of the shift we multiply our messages by
|
||||
// If message_modulus and carry_modulus are always powers of 2 we can simplify
|
||||
// this
|
||||
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
|
||||
|
||||
device_integer_radix_scalar_addition_inplace<Torus>
|
||||
<<<grid, thds, 0, streams[0]>>>((Torus *)lwe_array->ptr, scalar_input,
|
||||
num_scalars, lwe_array->lwe_dimension,
|
||||
delta);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
Torus scalar_input_cpu[num_scalars];
|
||||
cuda_memcpy_async_to_cpu(&scalar_input_cpu, scalar_input,
|
||||
num_scalars * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
for (uint i = 0; i < num_scalars; i++) {
|
||||
lwe_array->degrees[i] = lwe_array->degrees[i] + scalar_input_cpu[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ void device_integer_radix_add_scalar_one_inplace(
|
||||
@@ -98,42 +62,17 @@ __global__ void device_integer_radix_add_scalar_one_inplace(
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_radix_add_scalar_one_inplace(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array, uint32_t lwe_dimension,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = num_radix_blocks;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
// Value of the shift we multiply our messages by
|
||||
// If message_modulus and carry_modulus are always powers of 2 we can simplify
|
||||
// this
|
||||
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
|
||||
|
||||
device_integer_radix_add_scalar_one_inplace<Torus>
|
||||
<<<grid, thds, 0, streams[0]>>>(lwe_array, num_radix_blocks,
|
||||
lwe_dimension, delta);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_add_scalar_one_inplace(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array,
|
||||
uint32_t message_modulus, uint32_t carry_modulus) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
uint32_t gpu_count, Torus *lwe_array, uint32_t lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus) {
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = lwe_array->num_radix_blocks;
|
||||
int num_entries = input_lwe_ciphertext_count;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
@@ -144,13 +83,9 @@ __host__ void host_integer_radix_add_scalar_one_inplace(
|
||||
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
|
||||
|
||||
device_integer_radix_add_scalar_one_inplace<Torus>
|
||||
<<<grid, thds, 0, streams[0]>>>((Torus *)lwe_array->ptr,
|
||||
lwe_array->num_radix_blocks,
|
||||
lwe_array->lwe_dimension, delta);
|
||||
<<<grid, thds, 0, streams[0]>>>(lwe_array, input_lwe_ciphertext_count,
|
||||
lwe_dimension, delta);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
for (uint i = 0; i < lwe_array->num_radix_blocks; i++) {
|
||||
lwe_array->degrees[i] = lwe_array->degrees[i] + 1;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
@@ -173,7 +108,7 @@ __host__ void host_integer_radix_scalar_subtraction_inplace(
|
||||
uint32_t gpu_count, Torus *lwe_array, Torus *scalar_input,
|
||||
uint32_t lwe_dimension, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus) {
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
|
||||
@@ -2,58 +2,15 @@
|
||||
|
||||
void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
|
||||
void *lwe_array_out, void const *lwe_array_input, void const *clear_blocks,
|
||||
uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks) {
|
||||
void *const *ksks, uint32_t lwe_ciphertext_count, BITOP_TYPE op) {
|
||||
|
||||
host_integer_radix_scalar_bitop_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_input, static_cast<const uint64_t *>(clear_blocks),
|
||||
num_clear_blocks, (int_bitop_buffer<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks));
|
||||
}
|
||||
|
||||
void update_degrees_after_scalar_bitand(uint64_t *output_degrees,
|
||||
uint64_t *clear_degrees,
|
||||
uint64_t *input_degrees,
|
||||
uint32_t num_clear_blocks) {
|
||||
for (uint i = 0; i < num_clear_blocks; i++) {
|
||||
output_degrees[i] = std::min(clear_degrees[i], input_degrees[i]);
|
||||
}
|
||||
}
|
||||
void update_degrees_after_scalar_bitor(uint64_t *output_degrees,
|
||||
uint64_t *clear_degrees,
|
||||
uint64_t *input_degrees,
|
||||
uint32_t num_clear_blocks) {
|
||||
for (uint i = 0; i < num_clear_blocks; i++) {
|
||||
auto max = std::max(clear_degrees[i], input_degrees[i]);
|
||||
auto min = std::min(clear_degrees[i], input_degrees[i]);
|
||||
auto result = max;
|
||||
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max | j > result) {
|
||||
result = max | j;
|
||||
}
|
||||
}
|
||||
output_degrees[i] = result;
|
||||
}
|
||||
}
|
||||
void update_degrees_after_scalar_bitxor(uint64_t *output_degrees,
|
||||
uint64_t *clear_degrees,
|
||||
uint64_t *input_degrees,
|
||||
uint32_t num_clear_blocks) {
|
||||
for (uint i = 0; i < num_clear_blocks; i++) {
|
||||
auto max = std::max(clear_degrees[i], input_degrees[i]);
|
||||
auto min = std::min(clear_degrees[i], input_degrees[i]);
|
||||
auto result = max;
|
||||
|
||||
// Try every possibility to find the worst case
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max ^ j > result) {
|
||||
result = max ^ j;
|
||||
}
|
||||
}
|
||||
output_degrees[i] = result;
|
||||
}
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_input),
|
||||
static_cast<const uint64_t *>(clear_blocks), num_clear_blocks,
|
||||
(int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
lwe_ciphertext_count, op);
|
||||
}
|
||||
|
||||
@@ -7,60 +7,45 @@
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_scalar_bitop_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input, Torus const *clear_blocks,
|
||||
uint32_t num_clear_blocks, int_bitop_buffer<Torus> *mem_ptr,
|
||||
void *const *bsks, Torus *const *ksks) {
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_input,
|
||||
Torus const *clear_blocks, uint32_t num_clear_blocks,
|
||||
int_bitop_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks,
|
||||
uint32_t num_radix_blocks, BITOP_TYPE op) {
|
||||
|
||||
if (output->num_radix_blocks != input->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be equal")
|
||||
if (output->lwe_dimension != input->lwe_dimension)
|
||||
PANIC("Cuda error: input and output num radix blocks must be equal")
|
||||
auto lut = mem_ptr->lut;
|
||||
auto op = mem_ptr->op;
|
||||
auto num_radix_blocks = output->num_radix_blocks;
|
||||
auto params = lut->params;
|
||||
auto big_lwe_dimension = params.big_lwe_dimension;
|
||||
|
||||
uint32_t lwe_size = big_lwe_dimension + 1;
|
||||
|
||||
if (num_clear_blocks == 0) {
|
||||
if (op == SCALAR_BITAND) {
|
||||
set_zero_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
output, 0, num_radix_blocks);
|
||||
cuda_memset_async(lwe_array_out, 0,
|
||||
num_radix_blocks * lwe_size * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
} else {
|
||||
if (input != output)
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], output,
|
||||
input);
|
||||
cuda_memcpy_async_gpu_to_gpu(lwe_array_out, lwe_array_input,
|
||||
num_radix_blocks * lwe_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
} else {
|
||||
// We have all possible LUTs pre-computed and we use the decomposed scalar
|
||||
// as index to recover the right one
|
||||
uint64_t degrees[num_clear_blocks];
|
||||
uint64_t clear_degrees[num_clear_blocks];
|
||||
cuda_memcpy_async_to_cpu(&clear_degrees, clear_blocks,
|
||||
num_clear_blocks * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
if (mem_ptr->op == BITOP_TYPE::SCALAR_BITAND) {
|
||||
update_degrees_after_scalar_bitand(degrees, clear_degrees, input->degrees,
|
||||
num_clear_blocks);
|
||||
} else if (mem_ptr->op == BITOP_TYPE::SCALAR_BITOR) {
|
||||
update_degrees_after_scalar_bitor(degrees, clear_degrees, input->degrees,
|
||||
num_clear_blocks);
|
||||
} else if (mem_ptr->op == SCALAR_BITXOR) {
|
||||
update_degrees_after_scalar_bitxor(degrees, clear_degrees, input->degrees,
|
||||
num_clear_blocks);
|
||||
}
|
||||
cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(0, 0), clear_blocks,
|
||||
num_clear_blocks * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, output, input, bsks, ksks, lut,
|
||||
num_clear_blocks);
|
||||
memcpy(output->degrees, degrees, num_clear_blocks * sizeof(uint64_t));
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_input, bsks,
|
||||
ksks, num_clear_blocks, lut);
|
||||
|
||||
if (op == SCALAR_BITAND && num_clear_blocks < num_radix_blocks) {
|
||||
set_zero_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
output, num_clear_blocks,
|
||||
num_radix_blocks);
|
||||
auto lwe_array_out_block = lwe_array_out + num_clear_blocks * lwe_size;
|
||||
cuda_memset_async(lwe_array_out_block, 0,
|
||||
(num_radix_blocks - num_clear_blocks) * lwe_size *
|
||||
sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,14 +43,14 @@ __host__ void scalar_compare_radix_blocks_kb(
|
||||
|
||||
// Apply LUT to compare to 0
|
||||
auto sign_lut = mem_ptr->eq_buffer->is_non_zero_lut;
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, subtracted_blocks, bsks,
|
||||
ksks, num_radix_blocks, sign_lut);
|
||||
|
||||
// Add one
|
||||
// Here Lhs can have the following values: (-1) % (message modulus * carry
|
||||
// modulus), 0, 1 So the output values after the addition will be: 0, 1, 2
|
||||
legacy_host_integer_radix_add_scalar_one_inplace<Torus>(
|
||||
host_integer_radix_add_scalar_one_inplace<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, big_lwe_dimension,
|
||||
num_radix_blocks, message_modulus, carry_modulus);
|
||||
}
|
||||
@@ -110,13 +110,13 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
};
|
||||
|
||||
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), lut->get_degree(0),
|
||||
lut->get_max_degree(0), glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, scalar_last_leaf_lut_f);
|
||||
generate_device_accumulator<Torus>(streams[0], gpu_indexes[0],
|
||||
lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus,
|
||||
carry_modulus, scalar_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
mem_ptr->tmp_lwe_array_out, bsks, ksks, 1, lut);
|
||||
|
||||
@@ -195,12 +195,12 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
|
||||
auto lut = diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), lut->get_degree(0),
|
||||
lut->get_max_degree(0), glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, scalar_bivariate_last_leaf_lut_f);
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_bivariate_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_lsb_out,
|
||||
lwe_array_msb_out, bsks, ksks, 1, lut, lut->params.message_modulus);
|
||||
|
||||
@@ -331,12 +331,12 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
|
||||
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), lut->get_degree(0),
|
||||
lut->get_max_degree(0), glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, scalar_bivariate_last_leaf_lut_f);
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_bivariate_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, are_all_msb_zeros,
|
||||
sign_block, bsks, ksks, 1, lut, lut->params.message_modulus);
|
||||
|
||||
@@ -426,13 +426,12 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
auto signed_msb_lut = mem_ptr->signed_msb_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
msb_streams[0], gpu_indexes[0], signed_msb_lut->get_lut(0, 0),
|
||||
signed_msb_lut->get_degree(0), signed_msb_lut->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, lut_f);
|
||||
signed_msb_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
Torus const *sign_block = msb + (num_msb_radix_blocks - 1) * big_lwe_size;
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
msb_streams, gpu_indexes, gpu_count, lwe_array_msb_out, sign_block,
|
||||
are_all_msb_zeros, bsks, ksks, 1, signed_msb_lut,
|
||||
signed_msb_lut->params.message_modulus);
|
||||
@@ -492,7 +491,7 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
msb_streams[0], gpu_indexes[0], trivial_sign_block, scalar_sign_block,
|
||||
big_lwe_dimension, 1, 1, message_modulus, carry_modulus);
|
||||
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
msb_streams, gpu_indexes, gpu_count, lwe_array_sign_out,
|
||||
encrypted_sign_block, trivial_sign_block, bsks, ksks, 1,
|
||||
mem_ptr->signed_lut, mem_ptr->signed_lut->params.message_modulus);
|
||||
@@ -541,10 +540,10 @@ __host__ void integer_radix_signed_scalar_maxmin_kb(
|
||||
|
||||
// Selector
|
||||
// CMUX for Max or Min
|
||||
legacy_host_integer_radix_cmux_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, sign, lwe_array_left,
|
||||
lwe_array_right, mem_ptr->cmux_buffer, bsks, ksks,
|
||||
total_num_radix_blocks);
|
||||
host_integer_radix_cmux_kb<Torus>(streams, gpu_indexes, gpu_count,
|
||||
lwe_array_out, sign, lwe_array_left,
|
||||
lwe_array_right, mem_ptr->cmux_buffer, bsks,
|
||||
ksks, total_num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
@@ -622,7 +621,7 @@ __host__ void host_integer_radix_scalar_maxmin_kb(
|
||||
|
||||
// Selector
|
||||
// CMUX for Max or Min
|
||||
legacy_host_integer_radix_cmux_kb<Torus>(
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right,
|
||||
mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);
|
||||
@@ -686,7 +685,7 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
|
||||
lsb_streams[0], gpu_indexes[0]);
|
||||
scalar_comparison_luts->broadcast_lut(lsb_streams, gpu_indexes, 0);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
lsb_streams, gpu_indexes, gpu_count, lwe_array_lsb_out, packed_blocks,
|
||||
bsks, ksks, num_halved_lsb_radix_blocks, scalar_comparison_luts);
|
||||
}
|
||||
|
||||
@@ -47,6 +47,9 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
void *const *bsks, T *const *ksks, uint32_t input_lwe_dimension,
|
||||
uint32_t message_modulus, uint32_t num_radix_blocks, uint32_t num_scalars) {
|
||||
|
||||
if (num_radix_blocks == 0 | num_scalars == 0)
|
||||
return;
|
||||
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
uint32_t lwe_size = input_lwe_dimension + 1;
|
||||
@@ -55,7 +58,7 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
uint32_t num_ciphertext_bits = msg_bits * num_radix_blocks;
|
||||
|
||||
T *preshifted_buffer = mem->preshifted_buffer;
|
||||
T *all_shifted_buffer = (T *)mem->all_shifted_buffer->ptr;
|
||||
T *all_shifted_buffer = mem->all_shifted_buffer;
|
||||
|
||||
for (size_t shift_amount = 0; shift_amount < msg_bits; shift_amount++) {
|
||||
T *ptr = preshifted_buffer + shift_amount * lwe_size * num_radix_blocks;
|
||||
@@ -63,7 +66,7 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
cuda_memcpy_async_gpu_to_gpu(ptr, lwe_array,
|
||||
lwe_size_bytes * num_radix_blocks,
|
||||
streams[0], gpu_indexes[0]);
|
||||
legacy_host_integer_radix_logical_scalar_shift_kb_inplace<T>(
|
||||
host_integer_radix_logical_scalar_shift_kb_inplace<T>(
|
||||
streams, gpu_indexes, gpu_count, ptr, shift_amount,
|
||||
mem->logical_scalar_shift_buffer, bsks, ksks, num_radix_blocks);
|
||||
} else {
|
||||
@@ -80,7 +83,7 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
preshifted_buffer + (i % msg_bits) * num_radix_blocks * lwe_size;
|
||||
T *block_shift_buffer =
|
||||
all_shifted_buffer + j * num_radix_blocks * lwe_size;
|
||||
legacy_host_radix_blocks_rotate_right<T>(
|
||||
host_radix_blocks_rotate_right<T>(
|
||||
streams, gpu_indexes, gpu_count, block_shift_buffer,
|
||||
preshifted_radix_ct, i / msg_bits, num_radix_blocks, lwe_size);
|
||||
// create trivial assign for value = 0
|
||||
@@ -114,7 +117,7 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
auto scp_mem_ptr = mem->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
legacy_host_propagate_single_carry<T>(
|
||||
host_propagate_single_carry<T>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, num_radix_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
@@ -122,12 +125,12 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
|
||||
// Small scalar_mul is used in shift/rotate
|
||||
template <typename T>
|
||||
__host__ void host_legacy_integer_small_scalar_mul_radix(
|
||||
__host__ void host_integer_small_scalar_mul_radix(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, T *output_lwe_array, T *input_lwe_array, T scalar,
|
||||
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
@@ -143,42 +146,4 @@ __host__ void host_legacy_integer_small_scalar_mul_radix(
|
||||
input_lwe_ciphertext_count);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
// Small scalar_mul is used in shift/rotate
|
||||
template <typename T>
|
||||
__host__ void host_integer_small_scalar_mul_radix(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *output_lwe_array,
|
||||
CudaRadixCiphertextFFI *input_lwe_array, T scalar) {
|
||||
|
||||
if (output_lwe_array->num_radix_blocks != input_lwe_array->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be the same")
|
||||
if (output_lwe_array->lwe_dimension != input_lwe_array->lwe_dimension)
|
||||
PANIC("Cuda error: input and output lwe_dimension must be the same")
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
auto lwe_dimension = input_lwe_array->lwe_dimension;
|
||||
auto num_radix_blocks = input_lwe_array->num_radix_blocks;
|
||||
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = lwe_dimension + 1;
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = num_radix_blocks * lwe_size;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
device_small_scalar_radix_multiplication<<<grid, thds, 0, streams[0]>>>(
|
||||
(T *)output_lwe_array->ptr, (T *)input_lwe_array->ptr, scalar,
|
||||
lwe_dimension, num_radix_blocks);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
for (int i = 0; i < num_radix_blocks; i++) {
|
||||
output_lwe_array->noise_levels[i] =
|
||||
input_lwe_array->noise_levels[i] * scalar;
|
||||
output_lwe_array->degrees[i] = input_lwe_array->degrees[i] * scalar;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -22,13 +22,14 @@ void scratch_cuda_integer_radix_scalar_rotate_kb_64(
|
||||
|
||||
void cuda_integer_radix_scalar_rotate_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, uint32_t n, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
void *lwe_array, uint32_t n, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks) {
|
||||
|
||||
host_integer_radix_scalar_rotate_kb_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array, n,
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array), n,
|
||||
(int_logical_scalar_shift_buffer<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks));
|
||||
(uint64_t **)(ksks), num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_radix_scalar_rotate(void *const *streams,
|
||||
|
||||
@@ -26,14 +26,18 @@ __host__ void scratch_cuda_integer_radix_scalar_rotate_kb(
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_scalar_rotate_kb_inplace(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array, uint32_t n,
|
||||
uint32_t gpu_count, Torus *lwe_array, uint32_t n,
|
||||
int_logical_scalar_shift_buffer<Torus> *mem, void *const *bsks,
|
||||
Torus *const *ksks) {
|
||||
Torus *const *ksks, uint32_t num_blocks) {
|
||||
|
||||
auto num_blocks = lwe_array->num_radix_blocks;
|
||||
auto params = mem->params;
|
||||
auto glwe_dimension = params.glwe_dimension;
|
||||
auto polynomial_size = params.polynomial_size;
|
||||
auto message_modulus = params.message_modulus;
|
||||
|
||||
size_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
|
||||
size_t num_bits_in_message = (size_t)log2_int(message_modulus);
|
||||
size_t total_num_bits = num_bits_in_message * num_blocks;
|
||||
n = n % total_num_bits;
|
||||
@@ -44,7 +48,7 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
|
||||
size_t rotations = n / num_bits_in_message;
|
||||
size_t shift_within_block = n % num_bits_in_message;
|
||||
|
||||
auto rotated_buffer = mem->tmp_rotated;
|
||||
Torus *rotated_buffer = mem->tmp_rotated;
|
||||
|
||||
// rotate right all the blocks in radix ciphertext
|
||||
// copy result in new buffer
|
||||
@@ -55,11 +59,11 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
|
||||
// rotate right as the blocks are from LSB to MSB
|
||||
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
|
||||
rotated_buffer, lwe_array, rotations,
|
||||
num_blocks);
|
||||
num_blocks, big_lwe_size);
|
||||
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
lwe_array, 0, num_blocks,
|
||||
rotated_buffer, 0, num_blocks);
|
||||
cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer,
|
||||
num_blocks * big_lwe_size_bytes, streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
if (shift_within_block == 0) {
|
||||
return;
|
||||
@@ -69,24 +73,24 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
|
||||
auto giver_blocks = rotated_buffer;
|
||||
host_radix_blocks_rotate_right<Torus>(streams, gpu_indexes, gpu_count,
|
||||
giver_blocks, lwe_array, 1,
|
||||
num_blocks);
|
||||
num_blocks, big_lwe_size);
|
||||
|
||||
auto lut_bivariate = mem->lut_buffers_bivariate[shift_within_block - 1];
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, receiver_blocks,
|
||||
giver_blocks, bsks, ksks, lut_bivariate, num_blocks,
|
||||
giver_blocks, bsks, ksks, num_blocks, lut_bivariate,
|
||||
lut_bivariate->params.message_modulus);
|
||||
|
||||
} else {
|
||||
// rotate left as the blocks are from LSB to MSB
|
||||
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
|
||||
rotated_buffer, lwe_array, rotations,
|
||||
num_blocks);
|
||||
num_blocks, big_lwe_size);
|
||||
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
lwe_array, 0, num_blocks,
|
||||
rotated_buffer, 0, num_blocks);
|
||||
cuda_memcpy_async_gpu_to_gpu(lwe_array, rotated_buffer,
|
||||
num_blocks * big_lwe_size_bytes, streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
if (shift_within_block == 0) {
|
||||
return;
|
||||
@@ -95,14 +99,14 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
|
||||
auto receiver_blocks = lwe_array;
|
||||
auto giver_blocks = rotated_buffer;
|
||||
host_radix_blocks_rotate_left<Torus>(streams, gpu_indexes, gpu_count,
|
||||
giver_blocks, lwe_array, 1,
|
||||
num_blocks);
|
||||
giver_blocks, lwe_array, 1, num_blocks,
|
||||
big_lwe_size);
|
||||
|
||||
auto lut_bivariate = mem->lut_buffers_bivariate[shift_within_block - 1];
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, receiver_blocks,
|
||||
giver_blocks, bsks, ksks, lut_bivariate, num_blocks,
|
||||
giver_blocks, bsks, ksks, num_blocks, lut_bivariate,
|
||||
lut_bivariate->params.message_modulus);
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user