mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-11 07:38:08 -05:00
Compare commits
28 Commits
bb/fix/sum
...
al/debug_s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
071934532e | ||
|
|
87b41ebbe5 | ||
|
|
7ba7dd3e19 | ||
|
|
7f122bb435 | ||
|
|
56c0811751 | ||
|
|
580ac5aa99 | ||
|
|
cf34e9b7f0 | ||
|
|
856fc1a709 | ||
|
|
fe0a195630 | ||
|
|
aca7e79585 | ||
|
|
c0e89a53ef | ||
|
|
312952007f | ||
|
|
ff51ed3f34 | ||
|
|
9737bdcb98 | ||
|
|
87a43a4900 | ||
|
|
345bdbf17f | ||
|
|
cc54ba2236 | ||
|
|
11df6c69ee | ||
|
|
b76f4dbfe0 | ||
|
|
be21c15c80 | ||
|
|
aa51b25313 | ||
|
|
300c95fe3d | ||
|
|
524adda8f6 | ||
|
|
dedcf205b4 | ||
|
|
2c8d4c0fb0 | ||
|
|
3370fb5b7e | ||
|
|
cd77eac42b | ||
|
|
40f20b4ecb |
4
.github/actions/gpu_setup/action.yml
vendored
4
.github/actions/gpu_setup/action.yml
vendored
@@ -33,7 +33,9 @@ runs:
|
||||
if: inputs.github-instance == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
TOOLKIT_VERSION="$(echo ${CUDA_VERSION} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
|
||||
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
|
||||
# shellcheck disable=SC2001
|
||||
TOOLKIT_VERSION="$(echo "${CUDA_VERSION}" | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/${env.CUDA_KEYRING_PACKAGE}
|
||||
echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
|
||||
sha256sum -c checksum
|
||||
|
||||
@@ -67,7 +67,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -126,9 +126,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
6
.github/workflows/aws_tfhe_fast_tests.yml
vendored
6
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -174,7 +174,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -182,6 +182,7 @@ jobs:
|
||||
if: needs.should-run.outputs.csprng_test == 'true'
|
||||
run: |
|
||||
make test_tfhe_csprng
|
||||
make test_tfhe_csprng_big_endian
|
||||
|
||||
- name: Run tfhe-zk-pok tests
|
||||
if: needs.should-run.outputs.zk_pok_test == 'true'
|
||||
@@ -272,9 +273,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
|
||||
|
||||
5
.github/workflows/aws_tfhe_integer_tests.yml
vendored
5
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -114,7 +114,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -142,9 +142,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
@@ -115,7 +115,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -147,9 +147,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
5
.github/workflows/aws_tfhe_tests.yml
vendored
5
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -185,7 +185,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -254,9 +254,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
5
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
5
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -68,7 +68,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -123,9 +123,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
14
.github/workflows/benchmark_boolean.yml
vendored
14
.github/workflows/benchmark_boolean.yml
vendored
@@ -58,14 +58,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -114,8 +117,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
14
.github/workflows/benchmark_core_crypto.yml
vendored
14
.github/workflows/benchmark_core_crypto.yml
vendored
@@ -58,14 +58,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -107,8 +110,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
14
.github/workflows/benchmark_dex.yml
vendored
14
.github/workflows/benchmark_dex.yml
vendored
@@ -58,14 +58,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -116,8 +119,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
14
.github/workflows/benchmark_erc20.yml
vendored
14
.github/workflows/benchmark_erc20.yml
vendored
@@ -59,14 +59,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -111,8 +114,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
30
.github/workflows/benchmark_gpu_4090.yml
vendored
30
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -46,15 +46,18 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
echo "FAST_BENCH=TRUE";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -93,8 +96,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
@@ -124,14 +130,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -170,8 +179,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
42
.github/workflows/benchmark_gpu_common.yml
vendored
42
.github/workflows/benchmark_gpu_common.yml
vendored
@@ -120,26 +120,33 @@ jobs:
|
||||
env:
|
||||
INPUTS_PARAMS_TYPE: ${{ inputs.params_type }}
|
||||
|
||||
|
||||
- name: Set command output
|
||||
id: set_command
|
||||
run: |
|
||||
echo "command=${{ toJSON(env.COMMAND) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "command=${COMMAND_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
COMMAND_OUTPUT: ${{ toJSON(env.COMMAND) }}
|
||||
|
||||
- name: Set operation flavor output
|
||||
id: set_op_flavor
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "op_flavor=${OP_FLAVOR_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
OP_FLAVOR_OUTPUT: ${{ toJSON(env.OP_FLAVOR) }}
|
||||
|
||||
- name: Set benchmark types output
|
||||
id: set_bench_type
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "bench_type=${BENCH_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
BENCH_TYPE_OUTPUT: ${{ toJSON(env.BENCH_TYPE) }}
|
||||
|
||||
- name: Set parameters types output
|
||||
id: set_params_type
|
||||
run: |
|
||||
echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "params_type=${PARAMS_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
PARAMS_TYPE_OUTPUT: ${{ toJSON(env.PARAMS_TYPE) }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-${{ inputs.profile }}-benchmarks)
|
||||
@@ -227,6 +234,8 @@ jobs:
|
||||
include:
|
||||
- cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
@@ -237,18 +246,20 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
# Re-export environment variables as dependencies setup perform this task in the previous job.
|
||||
# Local env variables are cleaned at the end of each job.
|
||||
- name: Export CUDA variables
|
||||
shell: bash
|
||||
run: |
|
||||
CUDA_PATH=/usr/local/cuda-${{ matrix.cuda }}
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
@@ -258,13 +269,15 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
||||
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
GCC_VERSION: ${{ matrix.gcc }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -317,8 +330,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
|
||||
14
.github/workflows/benchmark_gpu_dex_common.yml
vendored
14
.github/workflows/benchmark_gpu_dex_common.yml
vendored
@@ -119,14 +119,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -167,8 +170,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
|
||||
14
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
14
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
@@ -120,14 +120,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -168,8 +171,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
|
||||
14
.github/workflows/benchmark_hpu_integer.yml
vendored
14
.github/workflows/benchmark_hpu_integer.yml
vendored
@@ -37,14 +37,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -84,5 +87,8 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
22
.github/workflows/benchmark_integer.yml
vendored
22
.github/workflows/benchmark_integer.yml
vendored
@@ -79,12 +79,16 @@ jobs:
|
||||
- name: Set operation flavor output
|
||||
id: set_op_flavor
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "op_flavor=${OP_FLAVOR_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
OP_FLAVOR_OUTPUT: ${{ toJSON(env.OP_FLAVOR) }}
|
||||
|
||||
- name: Set benchmark types output
|
||||
id: set_bench_type
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "bench_type=${BENCH_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
BENCH_TYPE_OUTPUT: ${{ toJSON(env.BENCH_TYPE) }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (integer-benchmarks)
|
||||
@@ -128,14 +132,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -193,8 +200,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
18
.github/workflows/benchmark_shortint.yml
vendored
18
.github/workflows/benchmark_shortint.yml
vendored
@@ -48,7 +48,9 @@ jobs:
|
||||
- name: Set operation flavor output
|
||||
id: set_op_flavor
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "op_flavor=${OP_FLAVOR_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
OP_FLAVOR_OUTPUT: ${{ toJSON(env.OP_FLAVOR) }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (shortint-benchmarks)
|
||||
@@ -89,14 +91,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -150,8 +155,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
22
.github/workflows/benchmark_signed_integer.yml
vendored
22
.github/workflows/benchmark_signed_integer.yml
vendored
@@ -79,12 +79,16 @@ jobs:
|
||||
- name: Set operation flavor output
|
||||
id: set_op_flavor
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "op_flavor=${OP_FLAVOR_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
OP_FLAVOR_OUTPUT: ${{ toJSON(env.OP_FLAVOR) }}
|
||||
|
||||
- name: Set benchmark types output
|
||||
id: set_bench_type
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "bench_type=${BENCH_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
BENCH_TYPE_OUTPUT: ${{ toJSON(env.BENCH_TYPE) }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (signed-integer-benchmarks)
|
||||
@@ -128,14 +132,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -185,8 +192,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
12
.github/workflows/benchmark_tfhe_fft.yml
vendored
12
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -61,11 +61,14 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
@@ -107,8 +110,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
12
.github/workflows/benchmark_tfhe_ntt.yml
vendored
12
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -61,11 +61,14 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
@@ -107,8 +110,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
14
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
14
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
@@ -98,14 +98,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -155,8 +158,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
24
.github/workflows/benchmark_wasm_client.yml
vendored
24
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -96,14 +96,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -136,12 +139,16 @@ jobs:
|
||||
|
||||
- name: Install web resources
|
||||
run: |
|
||||
make install_${{ matrix.browser }}_browser
|
||||
make install_${{ matrix.browser }}_web_driver
|
||||
make install_"${BROWSER}"_browser
|
||||
make install_"${BROWSER}"_web_driver
|
||||
env:
|
||||
BROWSER: ${{ matrix.browser }}
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make bench_web_js_api_parallel_${{ matrix.browser }}_ci
|
||||
make bench_web_js_api_parallel_"${BROWSER}"_ci
|
||||
env:
|
||||
BROWSER: ${{ matrix.browser }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
@@ -188,8 +195,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
18
.github/workflows/benchmark_zk_pke.yml
vendored
18
.github/workflows/benchmark_zk_pke.yml
vendored
@@ -93,7 +93,9 @@ jobs:
|
||||
- name: Set benchmark types output
|
||||
id: set_bench_type
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "bench_type=${BENCH_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
BENCH_TYPE_OUTPUT: ${{ toJSON(env.BENCH_TYPE) }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (pke-zk-benchmarks)
|
||||
@@ -140,14 +142,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -205,8 +210,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
2
.github/workflows/cargo_build.yml
vendored
2
.github/workflows/cargo_build.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
6
.github/workflows/ci_lint.yml
vendored
6
.github/workflows/ci_lint.yml
vendored
@@ -25,10 +25,10 @@ jobs:
|
||||
|
||||
- name: Get actionlint
|
||||
run: |
|
||||
wget "https://github.com/rhysd/actionlint/releases/download/v${{ env.ACTIONLINT_VERSION }}/actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz"
|
||||
echo "${{ env.ACTIONLINT_CHECKSUM }} actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz" > checksum
|
||||
wget "https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VERSION}/actionlint_${ACTIONLINT_VERSION}_linux_amd64.tar.gz"
|
||||
echo "${ACTIONLINT_CHECKSUM} actionlint_${ACTIONLINT_VERSION}_linux_amd64.tar.gz" > checksum
|
||||
sha256sum -c checksum
|
||||
tar -xf actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz actionlint
|
||||
tar -xf actionlint_"${ACTIONLINT_VERSION}"_linux_amd64.tar.gz actionlint
|
||||
ln -s "$(pwd)/actionlint" /usr/local/bin/
|
||||
|
||||
- name: Lint workflows
|
||||
|
||||
2
.github/workflows/code_coverage.yml
vendored
2
.github/workflows/code_coverage.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
13
.github/workflows/data_pr_close.yml
vendored
13
.github/workflows/data_pr_close.yml
vendored
@@ -59,7 +59,7 @@ jobs:
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "Authorization: Bearer ${TOKEN}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"${COMMENTS_URL}" \
|
||||
-d "${BODY}"
|
||||
@@ -71,6 +71,7 @@ jobs:
|
||||
REPO: ${{ github.repository }}
|
||||
EVENT_NUMBER: ${{ github.event.number }}
|
||||
COMMENTS_URL: ${{ fromJson(env.TARGET_REPO_PR).comments_url }}
|
||||
TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Merge the Pull Request in the data repo
|
||||
if: ${{ github.event.pull_request.merged }}
|
||||
@@ -81,7 +82,7 @@ jobs:
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X PUT \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "Authorization: Bearer ${TOKEN}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"${TARGET_REPO_PR_URL}"/merge \
|
||||
-d '{ "merge_method": "rebase" }'
|
||||
@@ -91,6 +92,7 @@ jobs:
|
||||
exit $RES
|
||||
env:
|
||||
TARGET_REPO_PR_URL: ${{ fromJson(env.TARGET_REPO_PR).url }}
|
||||
TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Close the Pull Request in the data repo
|
||||
if: ${{ !github.event.pull_request.merged }}
|
||||
@@ -101,7 +103,7 @@ jobs:
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X PATCH \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "Authorization: Bearer ${TOKEN}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"${TARGET_REPO_PR_URL}" \
|
||||
-d '{ "state": "closed" }'
|
||||
@@ -111,6 +113,7 @@ jobs:
|
||||
exit $RES
|
||||
env:
|
||||
TARGET_REPO_PR_URL: ${{ fromJson(env.TARGET_REPO_PR).url }}
|
||||
TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Delete the associated branch in the data repo
|
||||
run: |
|
||||
@@ -120,13 +123,15 @@ jobs:
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X DELETE \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "Authorization: Bearer ${TOKEN}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"${TARGET_REPO_API_URL}"/git/refs/heads/"${PR_BRANCH}"
|
||||
RES="$?"
|
||||
echo EOF
|
||||
} >> "${GITHUB_ENV}"
|
||||
exit $RES
|
||||
env:
|
||||
TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ always() && job.status == 'failure' }}
|
||||
|
||||
2
.github/workflows/gpu_4090_tests.yml
vendored
2
.github/workflows/gpu_4090_tests.yml
vendored
@@ -45,7 +45,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
5
.github/workflows/gpu_fast_h100_tests.yml
vendored
5
.github/workflows/gpu_fast_h100_tests.yml
vendored
@@ -140,7 +140,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -172,9 +172,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
5
.github/workflows/gpu_fast_tests.yml
vendored
5
.github/workflows/gpu_fast_tests.yml
vendored
@@ -124,7 +124,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -156,9 +156,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
2
.github/workflows/gpu_full_h100_tests.yml
vendored
2
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -79,7 +79,7 @@ jobs:
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -161,9 +161,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
23
.github/workflows/gpu_pcc.yml
vendored
23
.github/workflows/gpu_pcc.yml
vendored
@@ -81,16 +81,20 @@ jobs:
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
shell: bash
|
||||
run: |
|
||||
TOOLKIT_VERSION="$(echo ${{ matrix.cuda }} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
|
||||
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
|
||||
# shellcheck disable=SC2001
|
||||
TOOLKIT_VERSION="$(echo "${CUDA_VERSION}" | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/"${CUDA_KEYRING_PACKAGE}"
|
||||
echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
|
||||
sha256sum -c checksum
|
||||
sudo dpkg -i "${CUDA_KEYRING_PACKAGE}"
|
||||
sudo apt update
|
||||
sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format
|
||||
env:
|
||||
CUDA_VERSION: ${{ matrix.cuda }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -100,17 +104,21 @@ jobs:
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
env:
|
||||
CUDA_VERSION: ${{ matrix.cuda }}
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
||||
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
GCC_VERSION: ${{ matrix.gcc }}
|
||||
|
||||
- name: Run fmt checks
|
||||
run: |
|
||||
@@ -127,9 +135,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
|
||||
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -144,9 +144,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -140,7 +140,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -158,9 +158,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -156,9 +156,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -144,9 +144,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -140,7 +140,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -158,9 +158,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -156,9 +156,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
2
.github/workflows/hpu_hlapi_tests.yml
vendored
2
.github/workflows/hpu_hlapi_tests.yml
vendored
@@ -70,4 +70,4 @@ jobs:
|
||||
source setup_hpu.sh
|
||||
just -f mockups/tfhe-hpu-mockup/Justfile BUILD_PROFILE=release mockup &
|
||||
make HPU_CONFIG=sim test_high_level_api_hpu
|
||||
|
||||
make HPU_CONFIG=sim test_user_doc_hpu
|
||||
|
||||
2
.github/workflows/integer_long_run_tests.yml
vendored
2
.github/workflows/integer_long_run_tests.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
2
.github/workflows/m1_tests.yml
vendored
2
.github/workflows/m1_tests.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
26
.github/workflows/make_release_cuda.yml
vendored
26
.github/workflows/make_release_cuda.yml
vendored
@@ -67,7 +67,7 @@ jobs:
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -78,17 +78,19 @@ jobs:
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
CUDA_VERSION: ${{ matrix.cuda }}
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
||||
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
- name: Prepare package
|
||||
@@ -129,7 +131,7 @@ jobs:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
steps:
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -140,19 +142,23 @@ jobs:
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
CUDA_VERSION: ${{ matrix.cuda }}
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
||||
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
GCC_VERSION: ${{ matrix.gcc }}
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
|
||||
26
Makefile
26
Makefile
@@ -170,9 +170,13 @@ install_typos_checker: install_rs_build_toolchain
|
||||
.PHONY: install_zizmor # Install zizmor workflow security checker
|
||||
install_zizmor: install_rs_build_toolchain
|
||||
@zizmor --version > /dev/null 2>&1 || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install zizmor || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install zizmor --version ~1.9 || \
|
||||
( echo "Unable to install zizmor, unknown error." && exit 1 )
|
||||
|
||||
.PHONY: install_cargo_cross # Install custom tfhe-rs lints
|
||||
install_cargo_cross: install_rs_build_toolchain
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cross
|
||||
|
||||
.PHONY: setup_venv # Setup Python virtualenv for wasm tests
|
||||
setup_venv:
|
||||
python3 -m venv venv
|
||||
@@ -935,9 +939,21 @@ test_user_doc: install_rs_build_toolchain
|
||||
.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
|
||||
test_user_doc_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
|
||||
--features=internal-keycache,integer,zk-pok,gpu -p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
|
||||
.PHONY: test_user_doc_hpu # Run tests for HPU from the .md documentation
|
||||
test_user_doc_hpu: install_rs_build_toolchain
|
||||
ifeq ($(HPU_CONFIG), v80)
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=internal-keycache,integer,hpu,hpu-v80 -p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
else
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=internal-keycache,integer,hpu -p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
endif
|
||||
|
||||
|
||||
|
||||
.PHONY: test_regex_engine # Run tests for regex_engine example
|
||||
@@ -968,6 +984,12 @@ test_tfhe_csprng: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-csprng
|
||||
|
||||
.PHONY: test_tfhe_csprng_big_endian # Run tfhe-csprng tests on an emulated big endian system
|
||||
test_tfhe_csprng_big_endian: install_rs_build_toolchain install_cargo_cross
|
||||
RUSTFLAGS="" cross $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-csprng --target=powerpc64-unknown-linux-gnu
|
||||
|
||||
|
||||
.PHONY: test_zk_pok # Run tfhe-zk-pok tests
|
||||
test_zk_pok: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
|
||||
@@ -400,7 +400,8 @@ uint64_t scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
CudaRadixCiphertextFFI *radix_lwe_vec, int8_t *mem_ptr, void *const *bsks,
|
||||
CudaRadixCiphertextFFI *radix_lwe_vec, bool reduce_degrees_for_single_carry_propagation,
|
||||
int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
@@ -538,5 +539,13 @@ void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void extend_radix_with_trivial_zero_blocks_msb_64(
|
||||
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
|
||||
void *const *streams, uint32_t const *gpu_indexes);
|
||||
|
||||
void trim_radix_blocks_lsb_64(CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
void *const *streams,
|
||||
uint32_t const *gpu_indexes);
|
||||
} // extern C
|
||||
#endif // CUDA_INTEGER_H
|
||||
|
||||
@@ -1116,18 +1116,116 @@ template <typename Torus> struct int_overflowing_sub_memory {
|
||||
};
|
||||
|
||||
template <typename Torus> struct int_sum_ciphertexts_vec_memory {
|
||||
CudaRadixCiphertextFFI *new_blocks;
|
||||
CudaRadixCiphertextFFI *new_blocks_copy;
|
||||
CudaRadixCiphertextFFI *old_blocks;
|
||||
CudaRadixCiphertextFFI *small_lwe_vector;
|
||||
|
||||
int_radix_params params;
|
||||
|
||||
int32_t *d_smart_copy_in;
|
||||
int32_t *d_smart_copy_out;
|
||||
|
||||
bool mem_reuse = false;
|
||||
size_t max_total_blocks_in_vec;
|
||||
uint32_t num_blocks_in_radix;
|
||||
uint32_t max_num_radix_in_vec;
|
||||
uint64_t *size_tracker;
|
||||
bool gpu_memory_allocated;
|
||||
|
||||
// temporary buffers
|
||||
CudaRadixCiphertextFFI *current_blocks;
|
||||
CudaRadixCiphertextFFI *small_lwe_vector;
|
||||
|
||||
uint32_t *d_columns_data;
|
||||
uint32_t *d_columns_counter;
|
||||
uint32_t **d_columns;
|
||||
|
||||
uint32_t *d_new_columns_data;
|
||||
uint32_t *d_new_columns_counter;
|
||||
uint32_t **d_new_columns;
|
||||
|
||||
uint64_t *d_degrees;
|
||||
uint32_t *d_pbs_counters;
|
||||
|
||||
// lookup table for extracting message and carry
|
||||
int_radix_lut<Torus> *luts_message_carry;
|
||||
|
||||
bool mem_reuse = false;
|
||||
|
||||
void setup_index_buffers(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes) {
|
||||
|
||||
d_degrees = (uint64_t *)cuda_malloc_with_size_tracking_async(
|
||||
max_total_blocks_in_vec * sizeof(uint64_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, gpu_memory_allocated);
|
||||
|
||||
d_pbs_counters = (uint32_t *)cuda_malloc_with_size_tracking_async(
|
||||
3 * sizeof(uint32_t), streams[0], gpu_indexes[0], size_tracker,
|
||||
gpu_memory_allocated);
|
||||
|
||||
auto num_blocks_in_radix = this->num_blocks_in_radix;
|
||||
auto max_num_radix_in_vec = this->max_num_radix_in_vec;
|
||||
auto setup_columns =
|
||||
[num_blocks_in_radix, max_num_radix_in_vec, streams,
|
||||
gpu_indexes](uint32_t **&columns, uint32_t *&columns_data,
|
||||
uint32_t *&columns_counter, uint64_t *size_tracker,
|
||||
bool gpu_memory_allocated) {
|
||||
columns_data = (uint32_t *)cuda_malloc_with_size_tracking_async(
|
||||
num_blocks_in_radix * max_num_radix_in_vec * sizeof(uint32_t),
|
||||
streams[0], gpu_indexes[0], size_tracker, gpu_memory_allocated);
|
||||
columns_counter = (uint32_t *)cuda_malloc_with_size_tracking_async(
|
||||
num_blocks_in_radix * sizeof(uint32_t), streams[0],
|
||||
gpu_indexes[0], size_tracker, gpu_memory_allocated);
|
||||
cuda_memset_with_size_tracking_async(
|
||||
columns_counter, 0, num_blocks_in_radix * sizeof(uint32_t),
|
||||
streams[0], gpu_indexes[0], gpu_memory_allocated);
|
||||
uint32_t **h_columns = new uint32_t *[num_blocks_in_radix];
|
||||
for (int i = 0; i < num_blocks_in_radix; ++i) {
|
||||
h_columns[i] = columns_data + i * max_num_radix_in_vec;
|
||||
}
|
||||
columns = (uint32_t **)cuda_malloc_with_size_tracking_async(
|
||||
num_blocks_in_radix * sizeof(uint32_t *), streams[0],
|
||||
gpu_indexes[0], size_tracker, gpu_memory_allocated);
|
||||
cuda_memcpy_async_to_gpu(columns, h_columns,
|
||||
num_blocks_in_radix * sizeof(uint32_t *),
|
||||
streams[0], gpu_indexes[0]);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
delete[] h_columns;
|
||||
};
|
||||
|
||||
setup_columns(d_columns, d_columns_data, d_columns_counter, size_tracker,
|
||||
gpu_memory_allocated);
|
||||
setup_columns(d_new_columns, d_new_columns_data, d_new_columns_counter,
|
||||
size_tracker, gpu_memory_allocated);
|
||||
}
|
||||
|
||||
void setup_lookup_tables(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count) {
|
||||
uint32_t message_modulus = params.message_modulus;
|
||||
|
||||
if (!mem_reuse) {
|
||||
luts_message_carry = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, 2, max_total_blocks_in_vec,
|
||||
gpu_memory_allocated, size_tracker);
|
||||
}
|
||||
auto message_acc = luts_message_carry->get_lut(0, 0);
|
||||
auto carry_acc = luts_message_carry->get_lut(0, 1);
|
||||
|
||||
// define functions for each accumulator
|
||||
auto lut_f_message = [message_modulus](Torus x) -> Torus {
|
||||
return x % message_modulus;
|
||||
};
|
||||
auto lut_f_carry = [message_modulus](Torus x) -> Torus {
|
||||
return x / message_modulus;
|
||||
};
|
||||
|
||||
// generate accumulators
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], message_acc,
|
||||
luts_message_carry->get_degree(0),
|
||||
luts_message_carry->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, message_modulus, params.carry_modulus,
|
||||
lut_f_message, gpu_memory_allocated);
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], carry_acc,
|
||||
luts_message_carry->get_degree(1),
|
||||
luts_message_carry->get_max_degree(1), params.glwe_dimension,
|
||||
params.polynomial_size, message_modulus, params.carry_modulus,
|
||||
lut_f_carry, gpu_memory_allocated);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
}
|
||||
int_sum_ciphertexts_vec_memory(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_params params,
|
||||
@@ -1136,103 +1234,84 @@ template <typename Torus> struct int_sum_ciphertexts_vec_memory {
|
||||
bool allocate_gpu_memory,
|
||||
uint64_t *size_tracker) {
|
||||
this->params = params;
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->mem_reuse = false;
|
||||
this->max_total_blocks_in_vec = num_blocks_in_radix * max_num_radix_in_vec;
|
||||
this->num_blocks_in_radix = num_blocks_in_radix;
|
||||
this->max_num_radix_in_vec = max_num_radix_in_vec;
|
||||
this->gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->size_tracker = size_tracker;
|
||||
|
||||
int max_pbs_count = num_blocks_in_radix * max_num_radix_in_vec;
|
||||
setup_index_buffers(streams, gpu_indexes);
|
||||
setup_lookup_tables(streams, gpu_indexes, gpu_count);
|
||||
|
||||
// allocate gpu memory for intermediate buffers
|
||||
new_blocks = new CudaRadixCiphertextFFI;
|
||||
// create and allocate intermediate buffers
|
||||
current_blocks = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], new_blocks, max_pbs_count,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
new_blocks_copy = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], new_blocks_copy, max_pbs_count,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
old_blocks = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], old_blocks, max_pbs_count,
|
||||
streams[0], gpu_indexes[0], current_blocks, max_total_blocks_in_vec,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
small_lwe_vector = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], small_lwe_vector, max_pbs_count,
|
||||
streams[0], gpu_indexes[0], small_lwe_vector, max_total_blocks_in_vec,
|
||||
params.small_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
d_smart_copy_in = (int32_t *)cuda_malloc_with_size_tracking_async(
|
||||
max_pbs_count * sizeof(int32_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, allocate_gpu_memory);
|
||||
d_smart_copy_out = (int32_t *)cuda_malloc_with_size_tracking_async(
|
||||
max_pbs_count * sizeof(int32_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, allocate_gpu_memory);
|
||||
cuda_memset_with_size_tracking_async(
|
||||
d_smart_copy_in, 0, max_pbs_count * sizeof(int32_t), streams[0],
|
||||
gpu_indexes[0], allocate_gpu_memory);
|
||||
cuda_memset_with_size_tracking_async(
|
||||
d_smart_copy_out, 0, max_pbs_count * sizeof(int32_t), streams[0],
|
||||
gpu_indexes[0], allocate_gpu_memory);
|
||||
}
|
||||
|
||||
int_sum_ciphertexts_vec_memory(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_params params, uint32_t num_blocks_in_radix,
|
||||
uint32_t max_num_radix_in_vec, CudaRadixCiphertextFFI *new_blocks,
|
||||
CudaRadixCiphertextFFI *old_blocks,
|
||||
CudaRadixCiphertextFFI *small_lwe_vector, bool allocate_gpu_memory,
|
||||
uint32_t max_num_radix_in_vec, CudaRadixCiphertextFFI *current_blocks,
|
||||
CudaRadixCiphertextFFI *small_lwe_vector,
|
||||
int_radix_lut<Torus> *reused_lut, bool allocate_gpu_memory,
|
||||
uint64_t *size_tracker) {
|
||||
mem_reuse = true;
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->mem_reuse = true;
|
||||
this->params = params;
|
||||
this->max_total_blocks_in_vec = num_blocks_in_radix * max_num_radix_in_vec;
|
||||
this->num_blocks_in_radix = num_blocks_in_radix;
|
||||
this->max_num_radix_in_vec = max_num_radix_in_vec;
|
||||
this->gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->size_tracker = size_tracker;
|
||||
|
||||
int max_pbs_count = num_blocks_in_radix * max_num_radix_in_vec;
|
||||
|
||||
// assign gpu memory for intermediate buffers
|
||||
this->new_blocks = new_blocks;
|
||||
this->old_blocks = old_blocks;
|
||||
this->current_blocks = current_blocks;
|
||||
this->small_lwe_vector = small_lwe_vector;
|
||||
new_blocks_copy = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], new_blocks_copy, max_pbs_count,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
d_smart_copy_in = (int32_t *)cuda_malloc_with_size_tracking_async(
|
||||
max_pbs_count * sizeof(int32_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, allocate_gpu_memory);
|
||||
d_smart_copy_out = (int32_t *)cuda_malloc_with_size_tracking_async(
|
||||
max_pbs_count * sizeof(int32_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, allocate_gpu_memory);
|
||||
cuda_memset_with_size_tracking_async(
|
||||
d_smart_copy_in, 0, max_pbs_count * sizeof(int32_t), streams[0],
|
||||
gpu_indexes[0], allocate_gpu_memory);
|
||||
cuda_memset_with_size_tracking_async(
|
||||
d_smart_copy_out, 0, max_pbs_count * sizeof(int32_t), streams[0],
|
||||
gpu_indexes[0], allocate_gpu_memory);
|
||||
this->luts_message_carry = reused_lut;
|
||||
setup_index_buffers(streams, gpu_indexes);
|
||||
}
|
||||
|
||||
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
cuda_drop_with_size_tracking_async(d_smart_copy_in, streams[0],
|
||||
cuda_drop_with_size_tracking_async(d_degrees, streams[0], gpu_indexes[0],
|
||||
gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_pbs_counters, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_smart_copy_out, streams[0],
|
||||
|
||||
cuda_drop_with_size_tracking_async(d_columns_data, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_columns_counter, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_columns, streams[0], gpu_indexes[0],
|
||||
gpu_memory_allocated);
|
||||
|
||||
cuda_drop_with_size_tracking_async(d_new_columns_data, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_new_columns_counter, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_new_columns, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
|
||||
if (!mem_reuse) {
|
||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0], new_blocks,
|
||||
gpu_memory_allocated);
|
||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0], old_blocks,
|
||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0], current_blocks,
|
||||
gpu_memory_allocated);
|
||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0],
|
||||
small_lwe_vector, gpu_memory_allocated);
|
||||
luts_message_carry->release(streams, gpu_indexes, gpu_count);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
delete new_blocks;
|
||||
delete old_blocks;
|
||||
|
||||
delete current_blocks;
|
||||
delete small_lwe_vector;
|
||||
delete luts_message_carry;
|
||||
}
|
||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0], new_blocks_copy,
|
||||
gpu_memory_allocated);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
delete new_blocks_copy;
|
||||
}
|
||||
};
|
||||
|
||||
// For sequential algorithm in group propagation
|
||||
template <typename Torus> struct int_seq_group_prop_memory {
|
||||
|
||||
@@ -2549,7 +2628,7 @@ template <typename Torus> struct int_mul_memory {
|
||||
// radix_lwe_left except the last blocks of each shift
|
||||
int msb_vector_block_count = num_radix_blocks * (num_radix_blocks - 1) / 2;
|
||||
|
||||
int total_block_count = lsb_vector_block_count + msb_vector_block_count;
|
||||
int total_block_count = num_radix_blocks * num_radix_blocks;
|
||||
|
||||
// allocate memory for intermediate buffers
|
||||
vector_result_sb = new CudaRadixCiphertextFFI;
|
||||
@@ -2562,13 +2641,13 @@ template <typename Torus> struct int_mul_memory {
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
small_lwe_vector = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], small_lwe_vector, total_block_count,
|
||||
streams[0], gpu_indexes[0], small_lwe_vector, 2 * total_block_count,
|
||||
params.small_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
// create int_radix_lut objects for lsb, msb, message, carry
|
||||
// luts_array -> lut = {lsb_acc, msb_acc}
|
||||
luts_array = new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count,
|
||||
params, 2, total_block_count,
|
||||
params, 2, 2 * total_block_count,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
auto lsb_acc = luts_array->get_lut(0, 0);
|
||||
auto msb_acc = luts_array->get_lut(0, 1);
|
||||
@@ -2602,9 +2681,10 @@ template <typename Torus> struct int_mul_memory {
|
||||
|
||||
luts_array->broadcast_lut(streams, gpu_indexes, 0);
|
||||
// create memory object for sum ciphertexts
|
||||
// create memory object for sum ciphertexts
|
||||
sum_ciphertexts_mem = new int_sum_ciphertexts_vec_memory<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_radix_blocks,
|
||||
2 * num_radix_blocks, block_mul_res, vector_result_sb, small_lwe_vector,
|
||||
2 * num_radix_blocks, vector_result_sb, small_lwe_vector, luts_array,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
uint32_t uses_carry = 0;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
@@ -3918,7 +3998,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
zero_out_if_overflow_did_not_happen[0]->get_degree(0),
|
||||
zero_out_if_overflow_did_not_happen[0]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, cur_lut_f, 2, gpu_memory_allocated);
|
||||
params.carry_modulus, cur_lut_f, params.message_modulus - 2,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_did_not_happen[0]->broadcast_lut(streams, gpu_indexes,
|
||||
0);
|
||||
generate_device_accumulator_bivariate_with_factor<Torus>(
|
||||
@@ -3927,7 +4008,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
zero_out_if_overflow_did_not_happen[1]->get_degree(0),
|
||||
zero_out_if_overflow_did_not_happen[1]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, cur_lut_f, 3, gpu_memory_allocated);
|
||||
params.carry_modulus, cur_lut_f, params.message_modulus - 1,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_did_not_happen[1]->broadcast_lut(streams, gpu_indexes,
|
||||
0);
|
||||
|
||||
@@ -3954,7 +4036,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
zero_out_if_overflow_happened[0]->get_degree(0),
|
||||
zero_out_if_overflow_happened[0]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, overflow_happened_f, 2, gpu_memory_allocated);
|
||||
params.carry_modulus, overflow_happened_f, params.message_modulus - 2,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_happened[0]->broadcast_lut(streams, gpu_indexes, 0);
|
||||
generate_device_accumulator_bivariate_with_factor<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
@@ -3962,7 +4045,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
zero_out_if_overflow_happened[1]->get_degree(0),
|
||||
zero_out_if_overflow_happened[1]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, overflow_happened_f, 3, gpu_memory_allocated);
|
||||
params.carry_modulus, overflow_happened_f, params.message_modulus - 1,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_happened[1]->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
// merge_overflow_flags_luts
|
||||
|
||||
@@ -63,7 +63,7 @@ void update_degrees_after_bitor(uint64_t *output_degrees,
|
||||
auto result = max;
|
||||
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max | j > result) {
|
||||
if ((max | j) > result) {
|
||||
result = max | j;
|
||||
}
|
||||
}
|
||||
@@ -82,7 +82,7 @@ void update_degrees_after_bitxor(uint64_t *output_degrees,
|
||||
|
||||
// Try every possibility to find the worst case
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max ^ j > result) {
|
||||
if ((max ^ j) > result) {
|
||||
result = max ^ j;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ __host__ void host_integer_radix_bitop_kb(
|
||||
update_degrees_after_bitor(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
} else if (mem_ptr->op == BITXOR) {
|
||||
} else if (mem_ptr->op == BITOP_TYPE::BITXOR) {
|
||||
update_degrees_after_bitxor(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
|
||||
17
backends/tfhe-cuda-backend/cuda/src/integer/cast.cu
Normal file
17
backends/tfhe-cuda-backend/cuda/src/integer/cast.cu
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "cast.cuh"
|
||||
|
||||
void extend_radix_with_trivial_zero_blocks_msb_64(
|
||||
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
|
||||
void *const *streams, uint32_t const *gpu_indexes) {
|
||||
host_extend_radix_with_trivial_zero_blocks_msb<uint64_t>(
|
||||
output, input, (cudaStream_t *)streams, gpu_indexes);
|
||||
}
|
||||
|
||||
void trim_radix_blocks_lsb_64(CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
void *const *streams,
|
||||
uint32_t const *gpu_indexes) {
|
||||
|
||||
host_trim_radix_blocks_lsb<uint64_t>(output, input, (cudaStream_t *)streams,
|
||||
gpu_indexes);
|
||||
}
|
||||
36
backends/tfhe-cuda-backend/cuda/src/integer/cast.cuh
Normal file
36
backends/tfhe-cuda-backend/cuda/src/integer/cast.cuh
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef CAST_CUH
|
||||
#define CAST_CUH
|
||||
|
||||
#include "device.h"
|
||||
#include "integer.cuh"
|
||||
#include "integer/integer_utilities.h"
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_extend_radix_with_trivial_zero_blocks_msb(
|
||||
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes) {
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0], output,
|
||||
0, input->num_radix_blocks, input, 0,
|
||||
input->num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_trim_radix_blocks_lsb(CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes) {
|
||||
|
||||
const uint32_t input_start_lwe_index =
|
||||
input->num_radix_blocks - output->num_radix_blocks;
|
||||
|
||||
if (input->num_radix_blocks <= output->num_radix_blocks) {
|
||||
PANIC("Cuda error: input num blocks should be greater than output num "
|
||||
"blocks");
|
||||
}
|
||||
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], output, 0, output->num_radix_blocks, input,
|
||||
input_start_lwe_index, input->num_radix_blocks);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -456,7 +456,7 @@ __host__ void tree_sign_reduction(
|
||||
auto inner_tree_leaf = tree_buffer->tree_inner_leaf_lut;
|
||||
while (partial_block_count > 2) {
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], y, x, partial_block_count,
|
||||
4);
|
||||
message_modulus);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, x, y, bsks, ksks,
|
||||
@@ -477,16 +477,17 @@ __host__ void tree_sign_reduction(
|
||||
auto last_lut = tree_buffer->tree_last_leaf_lut;
|
||||
auto block_selector_f = tree_buffer->block_selector_f;
|
||||
std::function<Torus(Torus)> f;
|
||||
|
||||
auto num_bits_in_message = log2_int(params.message_modulus);
|
||||
if (partial_block_count == 2) {
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], y, x, partial_block_count,
|
||||
4);
|
||||
message_modulus);
|
||||
|
||||
f = [block_selector_f, sign_handler_f](Torus x) -> Torus {
|
||||
int msb = (x >> 2) & 3;
|
||||
int lsb = x & 3;
|
||||
f = [block_selector_f, sign_handler_f, num_bits_in_message,
|
||||
message_modulus](Torus x) -> Torus {
|
||||
Torus msb = (x >> num_bits_in_message) & (message_modulus - 1);
|
||||
Torus lsb = x & (message_modulus - 1);
|
||||
|
||||
int final_sign = block_selector_f(msb, lsb);
|
||||
Torus final_sign = block_selector_f(msb, lsb);
|
||||
return sign_handler_f(final_sign);
|
||||
};
|
||||
} else {
|
||||
|
||||
@@ -386,8 +386,9 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
subtraction_overflowed,
|
||||
at_least_one_upper_block_is_non_zero, 1);
|
||||
|
||||
int factor = (i) ? 3 : 2;
|
||||
int factor_lut_id = factor - 2;
|
||||
auto message_modulus = radix_params.message_modulus;
|
||||
int factor = (i) ? message_modulus - 1 : message_modulus - 2;
|
||||
int factor_lut_id = (i) ? 1 : 0;
|
||||
for (size_t k = 0;
|
||||
k < cleaned_merged_interesting_remainder->num_radix_blocks; k++) {
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
|
||||
@@ -520,8 +520,7 @@ __host__ void integer_radix_apply_univariate_lookup_table_kb(
|
||||
if (num_radix_blocks > lut->num_blocks)
|
||||
PANIC("Cuda error: num radix blocks on which lut is applied should be "
|
||||
"smaller or equal to the number of lut radix blocks")
|
||||
if (num_radix_blocks > lwe_array_out->num_radix_blocks ||
|
||||
num_radix_blocks > lwe_array_in->num_radix_blocks)
|
||||
if (num_radix_blocks > lwe_array_out->num_radix_blocks)
|
||||
PANIC("Cuda error: num radix blocks on which lut is applied should be "
|
||||
"smaller or equal to the number of input & output radix blocks")
|
||||
|
||||
@@ -1616,10 +1615,12 @@ __host__ void reduce_signs(
|
||||
auto message_modulus = params.message_modulus;
|
||||
auto carry_modulus = params.carry_modulus;
|
||||
|
||||
auto num_bits_in_message = log2_int(message_modulus);
|
||||
std::function<Torus(Torus)> reduce_two_orderings_function =
|
||||
[diff_buffer, sign_handler_f](Torus x) -> Torus {
|
||||
int msb = (x >> 2) & 3;
|
||||
int lsb = x & 3;
|
||||
[diff_buffer, sign_handler_f, num_bits_in_message,
|
||||
message_modulus](Torus x) -> Torus {
|
||||
Torus msb = (x >> num_bits_in_message) & (message_modulus - 1);
|
||||
Torus lsb = x & (message_modulus - 1);
|
||||
|
||||
return diff_buffer->tree_buffer->block_selector_f(msb, lsb);
|
||||
};
|
||||
@@ -1640,7 +1641,7 @@ __host__ void reduce_signs(
|
||||
|
||||
while (num_sign_blocks > 2) {
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], signs_b, signs_a,
|
||||
num_sign_blocks, 4);
|
||||
num_sign_blocks, message_modulus);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, signs_a, signs_b, bsks, ksks,
|
||||
ms_noise_reduction_key, lut, num_sign_blocks / 2);
|
||||
@@ -1669,7 +1670,8 @@ __host__ void reduce_signs(
|
||||
message_modulus, carry_modulus, final_lut_f, true);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], signs_b, signs_a, 2, 4);
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], signs_b, signs_a,
|
||||
num_sign_blocks, message_modulus);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, signs_array_out, signs_b, bsks, ksks,
|
||||
ms_noise_reduction_key, lut, 1);
|
||||
@@ -1677,8 +1679,8 @@ __host__ void reduce_signs(
|
||||
} else {
|
||||
|
||||
std::function<Torus(Torus)> final_lut_f =
|
||||
[mem_ptr, sign_handler_f](Torus x) -> Torus {
|
||||
return sign_handler_f(x & 3);
|
||||
[mem_ptr, sign_handler_f, message_modulus](Torus x) -> Torus {
|
||||
return sign_handler_f(x & (message_modulus - 1));
|
||||
};
|
||||
|
||||
auto lut = mem_ptr->diff_buffer->reduce_signs_lut;
|
||||
|
||||
@@ -226,7 +226,8 @@ uint64_t scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
CudaRadixCiphertextFFI *radix_lwe_vec, int8_t *mem_ptr, void *const *bsks,
|
||||
CudaRadixCiphertextFFI *radix_lwe_vec, bool reduce_degrees_for_single_carry_propagation,
|
||||
int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
|
||||
@@ -234,64 +235,59 @@ void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
if (radix_lwe_vec->num_radix_blocks % radix_lwe_out->num_radix_blocks != 0)
|
||||
PANIC("Cuda error: input vector length should be a multiple of the "
|
||||
"output's number of radix blocks")
|
||||
// FIXME: this should not be necessary, we should make sure sum_ctxt works in
|
||||
// the general case
|
||||
for (int i = 0; i < radix_lwe_vec->num_radix_blocks; i++) {
|
||||
radix_lwe_vec->degrees[i] = mem->params.message_modulus - 1;
|
||||
}
|
||||
switch (mem->params.polynomial_size) {
|
||||
case 512:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t, AmortizedDegree<512>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
case 1024:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<1024>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
case 2048:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<2048>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
case 4096:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<4096>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
case 8192:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<8192>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
case 16384:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<16384>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
|
||||
|
||||
@@ -20,28 +20,11 @@
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <omp.h>
|
||||
#include <queue>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
template <typename Torus>
|
||||
__global__ void smart_copy(Torus *dst, Torus *src, int32_t *id_out,
|
||||
int32_t *id_in, size_t lwe_size) {
|
||||
size_t tid = threadIdx.x;
|
||||
size_t b_id = blockIdx.x;
|
||||
size_t stride = blockDim.x;
|
||||
|
||||
auto input_id = id_in[b_id];
|
||||
auto output_id = id_out[b_id];
|
||||
|
||||
auto cur_src = (input_id >= 0) ? &src[input_id * lwe_size] : nullptr;
|
||||
auto cur_dst = &dst[output_id * lwe_size];
|
||||
|
||||
for (int i = tid; i < lwe_size; i += stride) {
|
||||
cur_dst[i] = (input_id >= 0) ? cur_src[i] : 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
__global__ void
|
||||
all_shifted_lhs_rhs(Torus const *radix_lwe_left, Torus *lsb_ciphertext,
|
||||
@@ -94,33 +77,173 @@ all_shifted_lhs_rhs(Torus const *radix_lwe_left, Torus *lsb_ciphertext,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ void tree_add_chunks(Torus *result_blocks, Torus *input_blocks,
|
||||
uint32_t chunk_size, uint32_t block_size,
|
||||
uint32_t num_blocks) {
|
||||
__global__ inline void radix_vec_to_columns(uint32_t *const *const columns,
|
||||
uint32_t *const columns_counter,
|
||||
const uint64_t *const degrees,
|
||||
const uint32_t num_radix_blocks,
|
||||
const uint32_t num_radix_in_vec) {
|
||||
|
||||
size_t stride = blockDim.x;
|
||||
size_t chunk_id = blockIdx.x;
|
||||
size_t chunk_elem_size = chunk_size * num_blocks * block_size;
|
||||
size_t radix_elem_size = num_blocks * block_size;
|
||||
auto src_chunk = &input_blocks[chunk_id * chunk_elem_size];
|
||||
auto dst_radix = &result_blocks[chunk_id * radix_elem_size];
|
||||
size_t block_stride = blockIdx.y * block_size;
|
||||
auto result = &dst_radix[block_stride];
|
||||
|
||||
// init shared mem with first radix of chunk
|
||||
size_t tid = threadIdx.x;
|
||||
for (int i = tid; i < block_size; i += stride) {
|
||||
result[i] = src_chunk[block_stride + i];
|
||||
}
|
||||
|
||||
// accumulate rest of the radixes
|
||||
for (int r_id = 1; r_id < chunk_size; r_id++) {
|
||||
auto cur_src_radix = &src_chunk[r_id * radix_elem_size];
|
||||
for (int i = tid; i < block_size; i += stride) {
|
||||
result[i] += cur_src_radix[block_stride + i];
|
||||
const uint32_t idx = threadIdx.x;
|
||||
size_t cnt = 0;
|
||||
for (int i = 0; i < num_radix_in_vec; i++) {
|
||||
size_t ct_id = i * num_radix_blocks + idx;
|
||||
if (degrees[ct_id] != 0) {
|
||||
columns[idx][cnt] = ct_id;
|
||||
++cnt;
|
||||
}
|
||||
}
|
||||
columns_counter[idx] = cnt;
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ inline void prepare_new_columns_and_pbs_indexes(
|
||||
uint32_t *const *const new_columns, uint32_t *const new_columns_counter,
|
||||
Torus *const pbs_indexes_in, Torus *const pbs_indexes_out,
|
||||
Torus *const lut_indexes, uint32_t *const pbs_counters,
|
||||
const uint32_t *const *const columns, const uint32_t *const columns_counter,
|
||||
const uint32_t chunk_size) {
|
||||
__shared__ uint32_t counter, sharedOr;
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
counter = 0;
|
||||
sharedOr = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
const uint32_t base_id = threadIdx.x;
|
||||
const uint32_t column_len = columns_counter[base_id];
|
||||
|
||||
uint32_t ct_count = 0;
|
||||
for (uint32_t i = 0; i + chunk_size <= column_len; i += chunk_size) {
|
||||
// those indexes are for message ciphertexts
|
||||
// for message ciphertexts in and out index should be same
|
||||
const uint32_t in_index = columns[base_id][i];
|
||||
new_columns[base_id][ct_count] = in_index;
|
||||
const uint32_t pbs_index = atomicAdd(&counter, 1);
|
||||
pbs_indexes_in[pbs_index] = in_index;
|
||||
pbs_indexes_out[pbs_index] = in_index;
|
||||
lut_indexes[pbs_index] = 0;
|
||||
++ct_count;
|
||||
}
|
||||
// ct1 ct2 ct3
|
||||
// pbs_indexes: 0, 1, 2
|
||||
// pbs_indexes: 2, 1, 0
|
||||
|
||||
__syncthreads();
|
||||
uint32_t message_count = counter;
|
||||
|
||||
if (base_id > 0) {
|
||||
const uint32_t prev_base_id = base_id - 1;
|
||||
const uint32_t prev_column_len = columns_counter[prev_base_id];
|
||||
|
||||
for (uint32_t i = 0; i + chunk_size <= prev_column_len; i += chunk_size) {
|
||||
// those indexes are for carry ciphertexts
|
||||
// for carry ciphertexts input is same as for message
|
||||
// output will be placed to next block in the column
|
||||
const uint32_t in_index = columns[prev_base_id][i];
|
||||
const uint32_t out_index = columns[prev_base_id][i + 1];
|
||||
new_columns[base_id][ct_count] = out_index;
|
||||
const uint32_t pbs_index = atomicAdd(&counter, 1);
|
||||
pbs_indexes_in[pbs_index] = in_index;
|
||||
pbs_indexes_out[pbs_index] = out_index;
|
||||
lut_indexes[pbs_index] = 1;
|
||||
++ct_count;
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t start_index = column_len - column_len % chunk_size;
|
||||
for (uint32_t i = start_index; i < column_len; ++i) {
|
||||
new_columns[base_id][ct_count] = columns[base_id][i];
|
||||
++ct_count;
|
||||
}
|
||||
|
||||
new_columns_counter[base_id] = ct_count;
|
||||
|
||||
if (ct_count > chunk_size) {
|
||||
atomicOr(&sharedOr, 1);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
pbs_counters[0] = counter;
|
||||
pbs_counters[1] = message_count;
|
||||
pbs_counters[2] = sharedOr;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ inline void prepare_final_pbs_indexes(
|
||||
Torus *const pbs_indexes_in, Torus *const pbs_indexes_out,
|
||||
Torus *const lut_indexes, const uint32_t num_radix_blocks) {
|
||||
int idx = threadIdx.x;
|
||||
pbs_indexes_in[idx] = idx % num_radix_blocks;
|
||||
pbs_indexes_out[idx] = idx + idx / num_radix_blocks;
|
||||
lut_indexes[idx] = idx / num_radix_blocks;
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ void calculate_chunks(Torus *const input_blocks,
|
||||
const uint32_t *const *const columns,
|
||||
const uint32_t *const columns_counter,
|
||||
const uint32_t chunk_size,
|
||||
const uint32_t block_size) {
|
||||
|
||||
const uint32_t part_size = blockDim.x;
|
||||
const uint32_t base_id = blockIdx.x;
|
||||
const uint32_t part_id = blockIdx.y;
|
||||
const uint32_t coef_id = part_id * part_size + threadIdx.x;
|
||||
|
||||
if (coef_id >= block_size)
|
||||
return;
|
||||
|
||||
const uint32_t column_len = columns_counter[base_id];
|
||||
|
||||
if (column_len >= chunk_size) {
|
||||
const uint32_t num_chunks = column_len / chunk_size;
|
||||
Torus result = 0;
|
||||
|
||||
for (uint32_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
|
||||
const uint32_t first_ct_id = columns[base_id][chunk_id * chunk_size];
|
||||
result = input_blocks[first_ct_id * block_size + coef_id];
|
||||
|
||||
for (uint32_t ct_id = 1; ct_id < chunk_size; ++ct_id) {
|
||||
const uint32_t cur_ct_id =
|
||||
columns[base_id][chunk_id * chunk_size + ct_id];
|
||||
result += input_blocks[cur_ct_id * block_size + coef_id];
|
||||
}
|
||||
|
||||
input_blocks[first_ct_id * block_size + coef_id] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ void calculate_final_chunk_into_radix(
|
||||
Torus *const out_radix, const Torus *const input_blocks,
|
||||
const uint32_t *const *const columns, const uint32_t *const columns_counter,
|
||||
const uint32_t chunk_size, const uint32_t block_size) {
|
||||
|
||||
const uint32_t part_size = blockDim.x;
|
||||
const uint32_t base_id = blockIdx.x;
|
||||
const uint32_t part_id = blockIdx.y;
|
||||
const uint32_t coef_id = part_id * part_size + threadIdx.x;
|
||||
|
||||
if (coef_id >= block_size)
|
||||
return;
|
||||
|
||||
const uint32_t column_len = columns_counter[base_id];
|
||||
|
||||
Torus result = 0;
|
||||
if (column_len) {
|
||||
const uint32_t first_ct_id = columns[base_id][0];
|
||||
result = input_blocks[first_ct_id * block_size + coef_id];
|
||||
|
||||
for (uint32_t i = 1; i < column_len; ++i) {
|
||||
const uint32_t cur_ct_it = columns[base_id][i];
|
||||
result += input_blocks[cur_ct_it * block_size + coef_id];
|
||||
}
|
||||
}
|
||||
out_radix[base_id * block_size + coef_id] = result;
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
@@ -167,6 +290,65 @@ __global__ void fill_radix_from_lsb_msb(Torus *result_blocks, Torus *lsb_blocks,
|
||||
(process_msb) ? cur_msb_ct[params::degree] : 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool at_least_one_column_needs_processing(
|
||||
const uint64_t *const degrees, const uint32_t num_radix_blocks,
|
||||
const uint32_t num_radix_in_vec, const uint32_t chunk_size) {
|
||||
std::vector<uint32_t> columns_count(num_radix_blocks, 0);
|
||||
|
||||
for (size_t column = 0; column < num_radix_blocks; ++column) {
|
||||
for (size_t block = 0; block < num_radix_in_vec; ++block) {
|
||||
const size_t block_index = block * num_radix_blocks + column;
|
||||
if (degrees[block_index]) {
|
||||
columns_count[column]++;
|
||||
if (columns_count[column] > chunk_size) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void calculate_final_degrees(uint64_t *const out_degrees,
|
||||
const uint64_t *const input_degrees,
|
||||
size_t num_blocks, size_t num_radix_in_vec,
|
||||
size_t chunk_size,
|
||||
uint64_t message_modulus) {
|
||||
|
||||
auto get_degree = [message_modulus](uint64_t degree) -> uint64_t {
|
||||
return std::min(message_modulus - 1, degree);
|
||||
};
|
||||
std::vector<std::queue<uint64_t>> columns(num_blocks);
|
||||
for (size_t i = 0; i < num_radix_in_vec; ++i) {
|
||||
for (size_t j = 0; j < num_blocks; ++j) {
|
||||
if (input_degrees[i * num_blocks + j])
|
||||
columns[j].push(input_degrees[i * num_blocks + j]);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
auto &col = columns[i];
|
||||
while (col.size() > 1) {
|
||||
uint32_t cur_degree = 0;
|
||||
size_t mn = std::min(chunk_size, col.size());
|
||||
for (int j = 0; j < mn; ++j) {
|
||||
cur_degree += col.front();
|
||||
col.pop();
|
||||
}
|
||||
const uint64_t new_degree = get_degree(cur_degree);
|
||||
col.push(new_degree);
|
||||
if ((i + 1) < num_blocks) {
|
||||
columns[i + 1].push(new_degree);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_blocks; i++) {
|
||||
out_degrees[i] = (columns[i].empty()) ? 0 : columns[i].front();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ uint64_t scratch_cuda_integer_partial_sum_ciphertexts_vec_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
@@ -181,15 +363,107 @@ __host__ uint64_t scratch_cuda_integer_partial_sum_ciphertexts_vec_kb(
|
||||
return size_tracker;
|
||||
}
|
||||
|
||||
__global__ inline void DEBUG_PRINT_COLUMNS(uint32_t *const *const columns,
|
||||
uint32_t *const columns_counter,
|
||||
const uint32_t num_radix_blocks) {
|
||||
printf("cuda_columns_counter:\n");
|
||||
for (int i = 0; i < num_radix_blocks; i++) {
|
||||
printf("%d ", columns_counter[i]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("cuda_columns:\n");
|
||||
|
||||
for (int i = 0; i < num_radix_blocks; i++) {
|
||||
printf("column[%d]: ", i);
|
||||
for (int j = 0; j < columns_counter[i]; j++)
|
||||
{
|
||||
printf("%d ", columns[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
}
|
||||
|
||||
__global__ inline void DEBUG_PRINT_COLUMNS_DATA(uint32_t *const *const columns,
|
||||
uint32_t *const columns_counter,
|
||||
uint64_t* data,
|
||||
const uint32_t num_radix_blocks, size_t lwe_size) {
|
||||
|
||||
uint64_t delta = 576460752303423488ULL;
|
||||
__syncthreads();
|
||||
printf("cuda_new_columns:\n");
|
||||
__syncthreads();
|
||||
for (int i = 0; i < num_radix_blocks; i++) {
|
||||
__syncthreads();
|
||||
printf("column[%d]: ", i);
|
||||
__syncthreads();
|
||||
for (int j = 0; j < columns_counter[i]; j++)
|
||||
{
|
||||
__syncthreads();
|
||||
auto cur_data =data[ columns[i][j] * lwe_size + lwe_size - 1];
|
||||
cur_data /= delta;
|
||||
printf("%llu ", cur_data);
|
||||
__syncthreads();
|
||||
}
|
||||
__syncthreads();
|
||||
printf("\n");
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
printf("\n");
|
||||
__syncthreads();
|
||||
|
||||
}
|
||||
|
||||
template<typename Torus, bool input, bool clear>
|
||||
__global__ inline void DEBUG_PRINT_PBS_DATA(Torus * data, Torus* input_indexes, Torus*
|
||||
output_indexes, Torus *lut_indexes, size_t lwe_size, int num) {
|
||||
printf("input_output_indexes: \n");
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
auto input_val = data[input_indexes[i] * lwe_size + lwe_size -1];
|
||||
auto output_val = data[output_indexes[i] * lwe_size + lwe_size -1];
|
||||
|
||||
auto val = input ? input_val : output_val;
|
||||
auto val_clear = clear ? val / 576460752303423488ULL : val;
|
||||
|
||||
printf("%d %lu %lu %lu %lu %lu\n", i, input_indexes[i], output_indexes[i], lut_indexes[i],
|
||||
val_clear, val);
|
||||
}
|
||||
}
|
||||
|
||||
//template<typename Torus>
|
||||
//__global__ inline void DEBUG_PRINT_RADIX(Torus * data, size_t num_blocks, size_t lwe_size) {
|
||||
// for (int i = 0; i < num_blocks; i++) {
|
||||
// auto val = data[i * lwe_size + lwe_size - 1];
|
||||
// auto val_clear = val / 576460752303423488ULL;
|
||||
// printf("cuda_partial_sum_result: %lu %lu\n", val, val_clear);
|
||||
// }
|
||||
//}
|
||||
|
||||
template <typename Torus, class params>
|
||||
__host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
CudaRadixCiphertextFFI *terms, void *const *bsks, uint64_t *const *ksks,
|
||||
CudaRadixCiphertextFFI *terms, bool reduce_degrees_for_single_carry_propagation, void *const
|
||||
*bsks, uint64_t *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
int_sum_ciphertexts_vec_memory<uint64_t> *mem_ptr,
|
||||
uint32_t num_radix_blocks, uint32_t num_radix_in_vec,
|
||||
int_radix_lut<Torus> *reused_lut) {
|
||||
uint32_t num_radix_blocks, uint32_t num_radix_in_vec) {
|
||||
// cudaDeviceSynchronize();
|
||||
// print_body<Torus>("cuda_input_partial_sum", (Torus*)terms->ptr, num_radix_blocks * num_radix_in_vec,
|
||||
// 2048,
|
||||
// 576460752303423488ULL);
|
||||
|
||||
// for (int i = 0; i <num_radix_blocks * num_radix_in_vec; i++ ) {
|
||||
// printf("cuda_input_degrees: %d\n", terms->degrees[i]);
|
||||
// }
|
||||
// cudaDeviceSynchronize();
|
||||
auto big_lwe_dimension = mem_ptr->params.big_lwe_dimension;
|
||||
auto big_lwe_size = big_lwe_dimension + 1;
|
||||
|
||||
if (terms->lwe_dimension != radix_lwe_out->lwe_dimension)
|
||||
PANIC("Cuda error: output and input radix ciphertexts should have the same "
|
||||
@@ -199,22 +473,28 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
PANIC("Cuda error: input vector does not have enough blocks")
|
||||
if (num_radix_blocks > radix_lwe_out->num_radix_blocks)
|
||||
PANIC("Cuda error: output does not have enough blocks")
|
||||
auto new_blocks = mem_ptr->new_blocks;
|
||||
auto new_blocks_copy = mem_ptr->new_blocks_copy;
|
||||
auto old_blocks = mem_ptr->old_blocks;
|
||||
|
||||
auto current_blocks = mem_ptr->current_blocks;
|
||||
auto small_lwe_vector = mem_ptr->small_lwe_vector;
|
||||
auto d_degrees = mem_ptr->d_degrees;
|
||||
auto d_columns = mem_ptr->d_columns;
|
||||
auto d_columns_counter = mem_ptr->d_columns_counter;
|
||||
auto d_new_columns = mem_ptr->d_new_columns;
|
||||
auto d_new_columns_counter = mem_ptr->d_new_columns_counter;
|
||||
auto d_pbs_indexes_in = mem_ptr->luts_message_carry->lwe_indexes_in;
|
||||
auto d_pbs_indexes_out = mem_ptr->luts_message_carry->lwe_indexes_out;
|
||||
auto d_pbs_counters = mem_ptr->d_pbs_counters;
|
||||
|
||||
auto d_smart_copy_in = mem_ptr->d_smart_copy_in;
|
||||
auto d_smart_copy_out = mem_ptr->d_smart_copy_out;
|
||||
auto luts_message_carry = mem_ptr->luts_message_carry;
|
||||
|
||||
auto message_modulus = mem_ptr->params.message_modulus;
|
||||
auto carry_modulus = mem_ptr->params.carry_modulus;
|
||||
auto big_lwe_dimension = mem_ptr->params.big_lwe_dimension;
|
||||
auto big_lwe_size = big_lwe_dimension + 1;
|
||||
auto glwe_dimension = mem_ptr->params.glwe_dimension;
|
||||
auto polynomial_size = mem_ptr->params.polynomial_size;
|
||||
auto small_lwe_dimension = mem_ptr->params.small_lwe_dimension;
|
||||
auto small_lwe_size = small_lwe_dimension + 1;
|
||||
auto chunk_size =
|
||||
(mem_ptr->params.message_modulus * mem_ptr->params.carry_modulus - 1) /
|
||||
(mem_ptr->params.message_modulus - 1);
|
||||
|
||||
size_t total_blocks_in_vec = num_radix_blocks * num_radix_in_vec;
|
||||
|
||||
// In the case of extracting a single LWE this parameters are dummy
|
||||
uint32_t num_many_lut = 1;
|
||||
@@ -228,244 +508,202 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
terms, 0, num_radix_blocks);
|
||||
return;
|
||||
}
|
||||
if (old_blocks != terms) {
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], old_blocks,
|
||||
terms);
|
||||
}
|
||||
|
||||
if (num_radix_in_vec == 2) {
|
||||
CudaRadixCiphertextFFI old_blocks_slice;
|
||||
as_radix_ciphertext_slice<Torus>(&old_blocks_slice, old_blocks,
|
||||
num_radix_blocks, 2 * num_radix_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks_slice, num_radix_blocks);
|
||||
CudaRadixCiphertextFFI terms_slice;
|
||||
as_radix_ciphertext_slice<Torus>(&terms_slice, terms, num_radix_blocks,
|
||||
2 * num_radix_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, terms,
|
||||
&terms_slice, num_radix_blocks);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t r = num_radix_in_vec;
|
||||
size_t total_modulus = message_modulus * carry_modulus;
|
||||
size_t message_max = message_modulus - 1;
|
||||
size_t chunk_size = (total_modulus - 1) / message_max;
|
||||
|
||||
size_t h_lwe_idx_in[terms->num_radix_blocks];
|
||||
size_t h_lwe_idx_out[terms->num_radix_blocks];
|
||||
int32_t h_smart_copy_in[terms->num_radix_blocks];
|
||||
int32_t h_smart_copy_out[terms->num_radix_blocks];
|
||||
|
||||
/// Here it is important to query the default max shared memory on device 0
|
||||
/// instead of cuda_get_max_shared_memory,
|
||||
/// to avoid bugs with tree_add_chunks trying to use too much shared memory
|
||||
auto max_shared_memory = 0;
|
||||
check_cuda_error(cudaDeviceGetAttribute(
|
||||
&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock, 0));
|
||||
|
||||
// create lut object for message and carry
|
||||
// we allocate luts_message_carry in the host function (instead of scratch)
|
||||
// to reduce average memory consumption
|
||||
int_radix_lut<Torus> *luts_message_carry;
|
||||
size_t ch_amount = r / chunk_size;
|
||||
if (!ch_amount)
|
||||
ch_amount++;
|
||||
if (reused_lut == nullptr) {
|
||||
luts_message_carry = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->params, 2,
|
||||
2 * ch_amount * num_radix_blocks, true, nullptr);
|
||||
} else {
|
||||
luts_message_carry = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->params, 2,
|
||||
2 * ch_amount * num_radix_blocks, reused_lut, true, nullptr);
|
||||
if (current_blocks != terms) {
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0],
|
||||
current_blocks, terms);
|
||||
}
|
||||
auto message_acc = luts_message_carry->get_lut(0, 0);
|
||||
auto carry_acc = luts_message_carry->get_lut(0, 1);
|
||||
|
||||
// define functions for each accumulator
|
||||
auto lut_f_message = [message_modulus](Torus x) -> Torus {
|
||||
return x % message_modulus;
|
||||
};
|
||||
auto lut_f_carry = [message_modulus](Torus x) -> Torus {
|
||||
return x / message_modulus;
|
||||
};
|
||||
cuda_memcpy_async_to_gpu(d_degrees, current_blocks->degrees,
|
||||
total_blocks_in_vec * sizeof(uint64_t), streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
// generate accumulators
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], message_acc,
|
||||
luts_message_carry->get_degree(0), luts_message_carry->get_max_degree(0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
lut_f_message, true);
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], carry_acc, luts_message_carry->get_degree(1),
|
||||
luts_message_carry->get_max_degree(1), glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, lut_f_carry, true);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
radix_vec_to_columns<<<1, num_radix_blocks, 0, streams[0]>>>(
|
||||
d_columns, d_columns_counter, d_degrees, num_radix_blocks,
|
||||
num_radix_in_vec);
|
||||
|
||||
while (r > 2) {
|
||||
size_t cur_total_blocks = r * num_radix_blocks;
|
||||
size_t ch_amount = r / chunk_size;
|
||||
if (!ch_amount)
|
||||
ch_amount++;
|
||||
dim3 add_grid(ch_amount, num_radix_blocks, 1);
|
||||
bool needs_processing = at_least_one_column_needs_processing(
|
||||
current_blocks->degrees, num_radix_blocks, num_radix_in_vec, chunk_size);
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
tree_add_chunks<Torus><<<add_grid, 512, 0, streams[0]>>>(
|
||||
(Torus *)new_blocks->ptr, (Torus *)old_blocks->ptr,
|
||||
std::min(r, chunk_size), big_lwe_size, num_radix_blocks);
|
||||
int number_of_threads = min(256, params::degree);
|
||||
int part_count = (big_lwe_size + number_of_threads - 1) / number_of_threads;
|
||||
const dim3 number_of_blocks_2d(num_radix_blocks, part_count, 1);
|
||||
|
||||
check_cuda_error(cudaGetLastError());
|
||||
// h_pbs_counters[0] - total ciphertexts
|
||||
// h_pbs_counters[1] - message ciphertexts
|
||||
// h_pbs_counters[2] - at_least_one_column_needs_processing
|
||||
uint32_t *h_pbs_counters;
|
||||
cudaMallocHost((void **)&h_pbs_counters, 3 * sizeof(uint32_t));
|
||||
if (mem_ptr->mem_reuse) {
|
||||
mem_ptr->setup_lookup_tables(streams, gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
size_t total_count = 0;
|
||||
size_t message_count = 0;
|
||||
size_t carry_count = 0;
|
||||
size_t sm_copy_count = 0;
|
||||
|
||||
generate_ids_update_degrees(
|
||||
terms->degrees, h_lwe_idx_in, h_lwe_idx_out, h_smart_copy_in,
|
||||
h_smart_copy_out, ch_amount, r, num_radix_blocks, chunk_size,
|
||||
message_max, total_count, message_count, carry_count, sm_copy_count);
|
||||
auto lwe_indexes_in = luts_message_carry->lwe_indexes_in;
|
||||
auto lwe_indexes_out = luts_message_carry->lwe_indexes_out;
|
||||
luts_message_carry->set_lwe_indexes(streams[0], gpu_indexes[0],
|
||||
h_lwe_idx_in, h_lwe_idx_out);
|
||||
while (needs_processing) {
|
||||
// cudaDeviceSynchronize();
|
||||
// DEBUG_PRINT_COLUMNS<<<1, 1, 0, streams[0]>>>(d_columns, d_columns_counter, num_radix_blocks);
|
||||
// DEBUG_PRINT_COLUMNS_DATA<<<1, 1, 0, streams[0]>>>(d_columns, d_columns_counter, (uint64_t *)
|
||||
// (current_blocks->ptr), num_radix_blocks, big_lwe_size);
|
||||
calculate_chunks<Torus>
|
||||
<<<number_of_blocks_2d, number_of_threads, 0, streams[0]>>>(
|
||||
(Torus *)(current_blocks->ptr), d_columns, d_columns_counter,
|
||||
chunk_size, big_lwe_size);
|
||||
|
||||
size_t copy_size = sm_copy_count * sizeof(int32_t);
|
||||
cuda_memcpy_async_to_gpu(d_smart_copy_in, h_smart_copy_in, copy_size,
|
||||
streams[0], gpu_indexes[0]);
|
||||
cuda_memcpy_async_to_gpu(d_smart_copy_out, h_smart_copy_out, copy_size,
|
||||
streams[0], gpu_indexes[0]);
|
||||
prepare_new_columns_and_pbs_indexes<<<1, num_radix_blocks, 0, streams[0]>>>(
|
||||
d_new_columns, d_new_columns_counter, d_pbs_indexes_in,
|
||||
d_pbs_indexes_out, luts_message_carry->get_lut_indexes(0, 0),
|
||||
d_pbs_counters, d_columns, d_columns_counter, chunk_size);
|
||||
|
||||
// inside d_smart_copy_in there are only -1 values
|
||||
// it's fine to call smart_copy with same pointer
|
||||
// as source and destination
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], new_blocks_copy, 0, r * num_radix_blocks,
|
||||
new_blocks, 0, r * num_radix_blocks);
|
||||
smart_copy<Torus><<<sm_copy_count, 1024, 0, streams[0]>>>(
|
||||
(Torus *)new_blocks->ptr, (Torus *)new_blocks_copy->ptr,
|
||||
d_smart_copy_out, d_smart_copy_in, big_lwe_size);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
cuda_memcpy_async_to_cpu(h_pbs_counters, d_pbs_counters,
|
||||
3 * sizeof(uint32_t), streams[0], gpu_indexes[0]);
|
||||
|
||||
if (carry_count > 0)
|
||||
cuda_set_value_async<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
luts_message_carry->get_lut_indexes(0, message_count), 1,
|
||||
carry_count);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
const uint32_t total_ciphertexts = h_pbs_counters[0];
|
||||
const uint32_t total_messages = h_pbs_counters[1];
|
||||
needs_processing = (h_pbs_counters[2] != 0);
|
||||
|
||||
auto active_gpu_count = get_active_gpu_count(total_ciphertexts, gpu_count);
|
||||
|
||||
// DEBUG_PRINT_PBS_DATA<Torus, true, true><<<1, 1, 0, streams[0]>>>(
|
||||
// (Torus *)(current_blocks->ptr), d_pbs_indexes_in, d_pbs_indexes_out,
|
||||
// luts_message_carry->get_lut_indexes(0, 0), big_lwe_size, total_ciphertexts
|
||||
// );
|
||||
|
||||
/// For multi GPU execution we create vectors of pointers for inputs and
|
||||
/// outputs
|
||||
std::vector<Torus *> new_blocks_vec = luts_message_carry->lwe_array_in_vec;
|
||||
std::vector<Torus *> small_lwe_vector_vec =
|
||||
luts_message_carry->lwe_after_ks_vec;
|
||||
std::vector<Torus *> lwe_after_pbs_vec =
|
||||
luts_message_carry->lwe_after_pbs_vec;
|
||||
std::vector<Torus *> lwe_trivial_indexes_vec =
|
||||
luts_message_carry->lwe_trivial_indexes_vec;
|
||||
|
||||
auto active_gpu_count = get_active_gpu_count(total_count, gpu_count);
|
||||
if (active_gpu_count == 1) {
|
||||
/// Apply KS to go from a big LWE dimension to a small LWE dimension
|
||||
/// After this keyswitch execution, we need to synchronize the streams
|
||||
/// because the keyswitch and PBS do not operate on the same number of
|
||||
/// inputs
|
||||
execute_keyswitch_async<Torus>(
|
||||
streams, gpu_indexes, 1, (Torus *)small_lwe_vector->ptr,
|
||||
lwe_indexes_in, (Torus *)new_blocks->ptr, lwe_indexes_in, ksks,
|
||||
polynomial_size * glwe_dimension, small_lwe_dimension,
|
||||
mem_ptr->params.ks_base_log, mem_ptr->params.ks_level, message_count);
|
||||
|
||||
/// Apply PBS to apply a LUT, reduce the noise and go from a small LWE
|
||||
/// dimension to a big LWE dimension
|
||||
execute_pbs_async<Torus>(
|
||||
streams, gpu_indexes, 1, (Torus *)new_blocks->ptr, lwe_indexes_out,
|
||||
luts_message_carry->lut_vec, luts_message_carry->lut_indexes_vec,
|
||||
(Torus *)small_lwe_vector->ptr, lwe_indexes_in, bsks,
|
||||
ms_noise_reduction_key, luts_message_carry->buffer, glwe_dimension,
|
||||
small_lwe_dimension, polynomial_size, mem_ptr->params.pbs_base_log,
|
||||
mem_ptr->params.pbs_level, mem_ptr->params.grouping_factor,
|
||||
total_count, mem_ptr->params.pbs_type, num_many_lut, lut_stride);
|
||||
} else {
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
multi_gpu_scatter_lwe_async<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, new_blocks_vec,
|
||||
(Torus *)new_blocks->ptr, luts_message_carry->h_lwe_indexes_in,
|
||||
luts_message_carry->using_trivial_lwe_indexes, message_count,
|
||||
big_lwe_size);
|
||||
|
||||
/// Apply KS to go from a big LWE dimension to a small LWE dimension
|
||||
/// After this keyswitch execution, we need to synchronize the streams
|
||||
/// because the keyswitch and PBS do not operate on the same number of
|
||||
/// inputs
|
||||
execute_keyswitch_async<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, small_lwe_vector_vec,
|
||||
lwe_trivial_indexes_vec, new_blocks_vec, lwe_trivial_indexes_vec,
|
||||
d_pbs_indexes_in, (Torus *)current_blocks->ptr, d_pbs_indexes_in,
|
||||
ksks, big_lwe_dimension, small_lwe_dimension,
|
||||
mem_ptr->params.ks_base_log, mem_ptr->params.ks_level, total_count);
|
||||
mem_ptr->params.ks_base_log, mem_ptr->params.ks_level,
|
||||
total_messages);
|
||||
|
||||
/// Copy data back to GPU 0, rebuild the lwe array, and scatter again on a
|
||||
/// different configuration
|
||||
multi_gpu_gather_lwe_async<Torus>(
|
||||
streams, gpu_indexes, gpu_count, (Torus *)small_lwe_vector->ptr,
|
||||
small_lwe_vector_vec, luts_message_carry->h_lwe_indexes_in,
|
||||
luts_message_carry->using_trivial_lwe_indexes, message_count,
|
||||
small_lwe_size);
|
||||
/// Synchronize all GPUs
|
||||
for (uint i = 0; i < active_gpu_count; i++) {
|
||||
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
|
||||
}
|
||||
|
||||
multi_gpu_scatter_lwe_async<Torus>(
|
||||
streams, gpu_indexes, gpu_count, small_lwe_vector_vec,
|
||||
(Torus *)small_lwe_vector->ptr, luts_message_carry->h_lwe_indexes_in,
|
||||
luts_message_carry->using_trivial_lwe_indexes, total_count,
|
||||
small_lwe_size);
|
||||
|
||||
/// Apply PBS to apply a LUT, reduce the noise and go from a small LWE
|
||||
/// dimension to a big LWE dimension
|
||||
execute_pbs_async<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, lwe_after_pbs_vec,
|
||||
lwe_trivial_indexes_vec, luts_message_carry->lut_vec,
|
||||
luts_message_carry->lut_indexes_vec, small_lwe_vector_vec,
|
||||
lwe_trivial_indexes_vec, bsks, ms_noise_reduction_key,
|
||||
streams, gpu_indexes, 1, (Torus *)current_blocks->ptr,
|
||||
d_pbs_indexes_out, luts_message_carry->lut_vec,
|
||||
luts_message_carry->lut_indexes_vec, (Torus *)small_lwe_vector->ptr,
|
||||
d_pbs_indexes_in, bsks, ms_noise_reduction_key,
|
||||
luts_message_carry->buffer, glwe_dimension, small_lwe_dimension,
|
||||
polynomial_size, mem_ptr->params.pbs_base_log,
|
||||
mem_ptr->params.pbs_level, mem_ptr->params.grouping_factor,
|
||||
total_count, mem_ptr->params.pbs_type, num_many_lut, lut_stride);
|
||||
total_ciphertexts, mem_ptr->params.pbs_type, num_many_lut,
|
||||
lut_stride);
|
||||
} else {
|
||||
cuda_memcpy_async_to_cpu(luts_message_carry->h_lwe_indexes_in,
|
||||
luts_message_carry->lwe_indexes_in,
|
||||
total_ciphertexts * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_memcpy_async_to_cpu(luts_message_carry->h_lwe_indexes_out,
|
||||
luts_message_carry->lwe_indexes_out,
|
||||
total_ciphertexts * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
multi_gpu_gather_lwe_async<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, (Torus *)new_blocks->ptr,
|
||||
lwe_after_pbs_vec, luts_message_carry->h_lwe_indexes_out,
|
||||
luts_message_carry->using_trivial_lwe_indexes, total_count,
|
||||
big_lwe_size);
|
||||
/// Synchronize all GPUs
|
||||
for (uint i = 0; i < active_gpu_count; i++) {
|
||||
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
|
||||
}
|
||||
}
|
||||
for (uint i = 0; i < total_count; i++) {
|
||||
auto degrees_index = luts_message_carry->h_lut_indexes[i];
|
||||
new_blocks->degrees[i] = luts_message_carry->degrees[degrees_index];
|
||||
new_blocks->noise_levels[i] = NoiseLevel::NOMINAL;
|
||||
luts_message_carry->using_trivial_lwe_indexes = false;
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, current_blocks,
|
||||
current_blocks, bsks, ksks, ms_noise_reduction_key,
|
||||
luts_message_carry, total_ciphertexts);
|
||||
}
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
// DEBUG_PRINT_PBS_DATA<Torus, false, true><<<1, 1, 0, streams[0]>>>(
|
||||
// (Torus *)(current_blocks->ptr), d_pbs_indexes_in, d_pbs_indexes_out,
|
||||
// luts_message_carry->get_lut_indexes(0, 0), big_lwe_size, total_ciphertexts
|
||||
// );
|
||||
|
||||
int rem_blocks = (r > chunk_size) ? r % chunk_size * num_radix_blocks : 0;
|
||||
int new_blocks_created = 2 * ch_amount * num_radix_blocks;
|
||||
|
||||
if (rem_blocks > 0)
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], new_blocks, new_blocks_created,
|
||||
new_blocks_created + rem_blocks, old_blocks,
|
||||
cur_total_blocks - rem_blocks, cur_total_blocks);
|
||||
std::swap(new_blocks, old_blocks);
|
||||
r = (new_blocks_created + rem_blocks) / num_radix_blocks;
|
||||
std::swap(d_columns, d_new_columns);
|
||||
std::swap(d_columns_counter, d_new_columns_counter);
|
||||
}
|
||||
luts_message_carry->release(streams, gpu_indexes, gpu_count);
|
||||
delete (luts_message_carry);
|
||||
|
||||
CudaRadixCiphertextFFI old_blocks_slice;
|
||||
as_radix_ciphertext_slice<Torus>(&old_blocks_slice, old_blocks,
|
||||
num_radix_blocks, 2 * num_radix_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks_slice, num_radix_blocks);
|
||||
cudaFreeHost(h_pbs_counters);
|
||||
calculate_final_chunk_into_radix<Torus>
|
||||
<<<number_of_blocks_2d, number_of_threads, 0, streams[0]>>>(
|
||||
(Torus *)(radix_lwe_out->ptr), (Torus *)(current_blocks->ptr),
|
||||
d_columns, d_columns_counter, chunk_size, big_lwe_size);
|
||||
|
||||
if (reduce_degrees_for_single_carry_propagation) {
|
||||
prepare_final_pbs_indexes<Torus><<<1, 2 * num_radix_blocks, 0, streams[0]>>>(
|
||||
d_pbs_indexes_in, d_pbs_indexes_out,
|
||||
luts_message_carry->get_lut_indexes(0, 0), num_radix_blocks);
|
||||
|
||||
cuda_memset_async(
|
||||
(Torus *)(current_blocks->ptr) + big_lwe_size * num_radix_blocks, 0,
|
||||
big_lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
|
||||
auto active_gpu_count = get_active_gpu_count(2 * num_radix_blocks, gpu_count);
|
||||
|
||||
if (active_gpu_count == 1) {
|
||||
execute_keyswitch_async<Torus>(
|
||||
streams, gpu_indexes, 1, (Torus *)small_lwe_vector->ptr,
|
||||
d_pbs_indexes_in, (Torus *)radix_lwe_out->ptr, d_pbs_indexes_in, ksks,
|
||||
big_lwe_dimension, small_lwe_dimension, mem_ptr->params.ks_base_log,
|
||||
mem_ptr->params.ks_level, num_radix_blocks);
|
||||
|
||||
execute_pbs_async<Torus>(
|
||||
streams, gpu_indexes, 1, (Torus *)current_blocks->ptr,
|
||||
d_pbs_indexes_out, luts_message_carry->lut_vec,
|
||||
luts_message_carry->lut_indexes_vec, (Torus *)small_lwe_vector->ptr,
|
||||
d_pbs_indexes_in, bsks, ms_noise_reduction_key,
|
||||
luts_message_carry->buffer, glwe_dimension, small_lwe_dimension,
|
||||
polynomial_size, mem_ptr->params.pbs_base_log,
|
||||
mem_ptr->params.pbs_level, mem_ptr->params.grouping_factor,
|
||||
2 * num_radix_blocks, mem_ptr->params.pbs_type, num_many_lut,
|
||||
lut_stride);
|
||||
} else {
|
||||
cuda_memcpy_async_to_cpu(luts_message_carry->h_lwe_indexes_in,
|
||||
luts_message_carry->lwe_indexes_in,
|
||||
2 * num_radix_blocks * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_memcpy_async_to_cpu(luts_message_carry->h_lwe_indexes_out,
|
||||
luts_message_carry->lwe_indexes_out,
|
||||
2 * num_radix_blocks * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
luts_message_carry->using_trivial_lwe_indexes = false;
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, current_blocks, radix_lwe_out,
|
||||
bsks, ksks, ms_noise_reduction_key, luts_message_carry,
|
||||
2 * num_radix_blocks);
|
||||
}
|
||||
// cudaDeviceSynchronize();
|
||||
// print_body<Torus>("cuda_before_add", (Torus*)radix_lwe_out->ptr, num_radix_blocks, 2048,
|
||||
// 576460752303423488ULL);
|
||||
// cudaDeviceSynchronize();
|
||||
calculate_final_degrees(radix_lwe_out->degrees, terms->degrees,
|
||||
num_radix_blocks, num_radix_in_vec, chunk_size,
|
||||
mem_ptr->params.message_modulus);
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
CudaRadixCiphertextFFI current_blocks_slice;
|
||||
as_radix_ciphertext_slice<Torus>(¤t_blocks_slice, current_blocks,
|
||||
num_radix_blocks, 2 * num_radix_blocks);
|
||||
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out,
|
||||
current_blocks, ¤t_blocks_slice, num_radix_blocks);
|
||||
// printf("add_happened\n");
|
||||
}
|
||||
|
||||
|
||||
// cudaDeviceSynchronize();
|
||||
//
|
||||
// print_body<Torus>("cuda_out_after_add", (Torus*)radix_lwe_out->ptr, num_radix_blocks, 2048,
|
||||
// 576460752303423488ULL);
|
||||
// cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
@@ -599,14 +837,28 @@ __host__ void host_integer_mult_radix_kb(
|
||||
size_t b_id = i % num_blocks;
|
||||
terms_degree_msb[i] = (b_id > r_id) ? message_modulus - 2 : 0;
|
||||
}
|
||||
|
||||
|
||||
for (int i = 0; i < num_blocks * 2 * num_blocks; i++)
|
||||
{
|
||||
auto cur_ptr = (Torus*)vector_result_sb->ptr;
|
||||
cur_ptr += i * 2049 + 2048;
|
||||
print_debug<Torus>("", cur_ptr, 1);
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_blocks * 2 * num_blocks; i++) {
|
||||
printf("%d\n", vector_result_sb->degrees[i]);
|
||||
}
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<Torus, params>(
|
||||
streams, gpu_indexes, gpu_count, radix_lwe_out, vector_result_sb, bsks,
|
||||
streams, gpu_indexes, gpu_count, radix_lwe_out, vector_result_sb, true, bsks,
|
||||
ksks, ms_noise_reduction_key, mem_ptr->sum_ciphertexts_mem, num_blocks,
|
||||
2 * num_blocks, mem_ptr->luts_array);
|
||||
2 * num_blocks);
|
||||
|
||||
auto scp_mem_ptr = mem_ptr->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
|
||||
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, radix_lwe_out, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, ms_noise_reduction_key, requested_flag,
|
||||
|
||||
@@ -115,13 +115,10 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
set_zero_radix_ciphertext_slice_async<T>(streams[0], gpu_indexes[0],
|
||||
lwe_array, 0, num_radix_blocks);
|
||||
} else {
|
||||
for (int i = 0; i < j * num_radix_blocks; i++) {
|
||||
all_shifted_buffer->degrees[i] = message_modulus - 1;
|
||||
}
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<T, params>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, all_shifted_buffer, bsks,
|
||||
streams, gpu_indexes, gpu_count, lwe_array, all_shifted_buffer, true, bsks,
|
||||
ksks, ms_noise_reduction_key, mem->sum_ciphertexts_vec_mem,
|
||||
num_radix_blocks, j, nullptr);
|
||||
num_radix_blocks, j);
|
||||
|
||||
auto scp_mem_ptr = mem->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
|
||||
@@ -398,20 +398,32 @@ uint64_t scratch_cuda_multi_bit_programmable_bootstrap_64(
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory) {
|
||||
|
||||
bool supports_cg =
|
||||
supports_cooperative_groups_on_multibit_programmable_bootstrap<uint64_t>(
|
||||
glwe_dimension, polynomial_size, level_count,
|
||||
input_lwe_ciphertext_count, cuda_get_max_shared_memory(gpu_index));
|
||||
#if (CUDA_ARCH >= 900)
|
||||
if (has_support_to_cuda_programmable_bootstrap_tbc_multi_bit<uint64_t>(
|
||||
// On H100s we should be using TBC until num_samples < num_sms / 2.
|
||||
// After that we switch to CG until not supported anymore.
|
||||
// At this point we return to TBC.
|
||||
int num_sms = 0;
|
||||
check_cuda_error(cudaDeviceGetAttribute(
|
||||
&num_sms, cudaDevAttrMultiProcessorCount, gpu_index));
|
||||
|
||||
bool supports_tbc =
|
||||
has_support_to_cuda_programmable_bootstrap_tbc_multi_bit<uint64_t>(
|
||||
input_lwe_ciphertext_count, glwe_dimension, polynomial_size,
|
||||
level_count, cuda_get_max_shared_memory(gpu_index)))
|
||||
level_count, cuda_get_max_shared_memory(gpu_index));
|
||||
|
||||
if (supports_tbc &&
|
||||
!(input_lwe_ciphertext_count > num_sms / 2 && supports_cg))
|
||||
return scratch_cuda_tbc_multi_bit_programmable_bootstrap<uint64_t>(
|
||||
stream, gpu_index, (pbs_buffer<uint64_t, MULTI_BIT> **)buffer,
|
||||
glwe_dimension, polynomial_size, level_count,
|
||||
input_lwe_ciphertext_count, allocate_gpu_memory);
|
||||
else
|
||||
#endif
|
||||
if (supports_cooperative_groups_on_multibit_programmable_bootstrap<
|
||||
uint64_t>(glwe_dimension, polynomial_size, level_count,
|
||||
input_lwe_ciphertext_count,
|
||||
cuda_get_max_shared_memory(gpu_index)))
|
||||
if (supports_cg)
|
||||
return scratch_cuda_cg_multi_bit_programmable_bootstrap<uint64_t>(
|
||||
stream, gpu_index, (pbs_buffer<uint64_t, MULTI_BIT> **)buffer,
|
||||
glwe_dimension, polynomial_size, level_count,
|
||||
|
||||
@@ -37,18 +37,21 @@ template <typename T> void print_debug(const char *name, const T *src, int N) {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void print_body_kernel(T *src, int N, int lwe_dimension) {
|
||||
__global__ void print_body_kernel(T *src, int N, int lwe_dimension, T delta) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
printf("%lu, ", src[i * (lwe_dimension + 1) + lwe_dimension]);
|
||||
T body = src[i * (lwe_dimension + 1) + lwe_dimension];
|
||||
T clear = body / delta;
|
||||
printf("(%lu, %lu), ", body, clear);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void print_body(const char *name, T *src, int n, int lwe_dimension) {
|
||||
void print_body(const char *name, T *src, int n, int lwe_dimension, T delta) {
|
||||
printf("%s: ", name);
|
||||
cudaDeviceSynchronize();
|
||||
print_body_kernel<<<1, 1>>>(src, n, lwe_dimension);
|
||||
print_body_kernel<<<1, 1>>>(src, n, lwe_dimension, delta);
|
||||
cudaDeviceSynchronize();
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
@@ -1018,6 +1018,7 @@ unsafe extern "C" {
|
||||
gpu_count: u32,
|
||||
radix_lwe_out: *mut CudaRadixCiphertextFFI,
|
||||
radix_lwe_vec: *mut CudaRadixCiphertextFFI,
|
||||
reduce_degrees_for_single_carry_propagation: bool,
|
||||
mem_ptr: *mut i8,
|
||||
bsks: *const *mut ffi::c_void,
|
||||
ksks: *const *mut ffi::c_void,
|
||||
@@ -1316,6 +1317,22 @@ unsafe extern "C" {
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn trim_radix_blocks_lsb_64(
|
||||
output: *mut CudaRadixCiphertextFFI,
|
||||
input: *const CudaRadixCiphertextFFI,
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn extend_radix_with_trivial_zero_blocks_msb_64(
|
||||
output: *mut CudaRadixCiphertextFFI,
|
||||
input: *const CudaRadixCiphertextFFI,
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
);
|
||||
}
|
||||
pub const KS_TYPE_BIG_TO_SMALL: KS_TYPE = 0;
|
||||
pub const KS_TYPE_SMALL_TO_BIG: KS_TYPE = 1;
|
||||
pub type KS_TYPE = ffi::c_uint;
|
||||
|
||||
@@ -13,7 +13,7 @@ rust-version = "1.72"
|
||||
|
||||
[dependencies]
|
||||
aes = "0.8.2"
|
||||
rayon = { workspace = true , optional = true }
|
||||
rayon = { workspace = true, optional = true }
|
||||
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
libc = "0.2.133"
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use crate::generators::aes_ctr::index::AesIndex;
|
||||
use crate::generators::aes_ctr::BYTES_PER_BATCH;
|
||||
use crate::generators::aes_ctr::{AES_CALLS_PER_BATCH, BYTES_PER_AES_CALL, BYTES_PER_BATCH};
|
||||
|
||||
/// Represents a key used in the AES block cipher.
|
||||
#[derive(Clone, Copy)]
|
||||
@@ -16,5 +15,7 @@ pub trait AesBlockCipher: Clone + Send + Sync {
|
||||
/// Instantiate a new generator from a secret key.
|
||||
fn new(key: AesKey) -> Self;
|
||||
/// Generates the batch corresponding to the given index.
|
||||
fn generate_batch(&mut self, index: AesIndex) -> [u8; BYTES_PER_BATCH];
|
||||
fn generate_batch(&mut self, data: [u128; AES_CALLS_PER_BATCH]) -> [u8; BYTES_PER_BATCH];
|
||||
/// Generate next bytes
|
||||
fn generate_next(&mut self, data: u128) -> [u8; BYTES_PER_AES_CALL];
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ use crate::generators::aes_ctr::index::TableIndex;
|
||||
use crate::generators::aes_ctr::states::{BufferPointer, ShiftAction, State};
|
||||
use crate::generators::aes_ctr::BYTES_PER_BATCH;
|
||||
use crate::generators::{ByteCount, BytesPerChild, ChildrenCount, ForkError};
|
||||
use crate::seeders::SeedKind;
|
||||
|
||||
// Usually, to work with iterators and parallel iterators, we would use opaque types such as
|
||||
// `impl Iterator<..>`. Unfortunately, it is not yet possible to return existential types in
|
||||
@@ -82,6 +83,26 @@ impl<BlockCipher: AesBlockCipher> AesCtrGenerator<BlockCipher> {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn from_seed(seed: impl Into<SeedKind>) -> Self {
|
||||
match seed.into() {
|
||||
SeedKind::Ctr(seed) => Self::new(AesKey(seed.0), None, None),
|
||||
SeedKind::Xof(seed) => {
|
||||
let (key, init_index) = super::xof_init(seed);
|
||||
let last_index = TableIndex::LAST.decremented();
|
||||
let state = State::with_offset(TableIndex::SECOND, init_index);
|
||||
let block_cipher = Box::new(BlockCipher::new(key));
|
||||
let buffer = [0u8; BYTES_PER_BATCH];
|
||||
|
||||
Self {
|
||||
block_cipher,
|
||||
state,
|
||||
last: last_index,
|
||||
buffer,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the table index related to the previous random byte.
|
||||
pub fn table_index(&self) -> TableIndex {
|
||||
self.state.table_index()
|
||||
@@ -189,7 +210,9 @@ impl<BlockCipher: AesBlockCipher> Iterator for AesCtrGenerator<BlockCipher> {
|
||||
match self.state.increment() {
|
||||
ShiftAction::OutputByte(BufferPointer(ptr)) => Some(self.buffer[ptr]),
|
||||
ShiftAction::RefreshBatchAndOutputByte(aes_index, BufferPointer(ptr)) => {
|
||||
self.buffer = self.block_cipher.generate_batch(aes_index);
|
||||
let aes_inputs =
|
||||
core::array::from_fn(|i| aes_index.0.wrapping_add(i as u128).to_le());
|
||||
self.buffer = self.block_cipher.generate_batch(aes_inputs);
|
||||
Some(self.buffer[ptr])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ impl TableIndex {
|
||||
let new_byte_index = self.byte_index.0 + shift_remainder;
|
||||
let full_aes_shifts = full_aes_shifts + new_byte_index / BYTES_PER_AES_CALL;
|
||||
|
||||
// Store the reaminder in the byte index
|
||||
// Store the remainder in the byte index
|
||||
self.byte_index.0 = new_byte_index % BYTES_PER_AES_CALL;
|
||||
|
||||
self.aes_index.0 = self.aes_index.0.wrapping_add(full_aes_shifts as u128);
|
||||
|
||||
@@ -202,6 +202,7 @@ pub const BYTES_PER_BATCH: usize = BYTES_PER_AES_CALL * AES_CALLS_PER_BATCH;
|
||||
|
||||
/// A module containing structures to manage table indices.
|
||||
mod index;
|
||||
|
||||
pub use index::*;
|
||||
|
||||
/// A module containing structures to manage table indices and buffer pointers together properly.
|
||||
@@ -218,5 +219,28 @@ pub use generic::*;
|
||||
/// A module extending `generic` to the `rayon` paradigm.
|
||||
#[cfg(feature = "parallel")]
|
||||
mod parallel;
|
||||
|
||||
use crate::seeders::XofSeed;
|
||||
#[cfg(feature = "parallel")]
|
||||
pub use parallel::*;
|
||||
|
||||
pub(crate) fn xof_init(seed: XofSeed) -> (AesKey, AesIndex) {
|
||||
let init_key = AesKey(0);
|
||||
let mut aes = crate::generators::default::DefaultBlockCipher::new(init_key);
|
||||
|
||||
let blocks = seed
|
||||
.iter_u128_blocks()
|
||||
.chain(std::iter::once(seed.bit_len().to_le()));
|
||||
|
||||
let mut prev_c = 0;
|
||||
let mut c = 0;
|
||||
for mi in blocks {
|
||||
prev_c = c;
|
||||
c = u128::from_ne_bytes(aes.generate_next(prev_c ^ mi));
|
||||
}
|
||||
|
||||
let init = AesIndex(prev_c.to_le());
|
||||
let key = AesKey(c);
|
||||
|
||||
(key, init)
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ pub struct BufferPointer(pub usize);
|
||||
pub struct State {
|
||||
table_index: TableIndex,
|
||||
buffer_pointer: BufferPointer,
|
||||
offset: AesIndex,
|
||||
}
|
||||
|
||||
/// A structure representing the action to be taken by the generator after shifting its state.
|
||||
@@ -25,24 +26,42 @@ pub enum ShiftAction {
|
||||
impl State {
|
||||
/// Creates a new state from the initial table index.
|
||||
///
|
||||
/// Note :
|
||||
/// Note:
|
||||
/// ------
|
||||
///
|
||||
/// The `table_index` input, is the __first__ table index that will be outputted on the next
|
||||
/// The `table_index` input is the __first__ table index that will be outputted on the next
|
||||
/// call to `increment`. Put differently, the current table index of the newly created state
|
||||
/// is the predecessor of this one.
|
||||
pub fn new(table_index: TableIndex) -> Self {
|
||||
Self::with_offset(table_index, AesIndex(0))
|
||||
}
|
||||
|
||||
/// Creates a new state from the initial table index and offset
|
||||
///
|
||||
/// The `offset` AesIndex will be applied to all AES encryption.
|
||||
/// AES(Key, counter + offset).
|
||||
/// This is to be used when one wants to start the AES
|
||||
/// counter at a specific value but still output all the (2^128-1) values
|
||||
///
|
||||
/// Note:
|
||||
/// ------
|
||||
///
|
||||
/// The `table_index` input is the __first__ table index that will be outputted on the next
|
||||
/// call to `increment`. Put differently, the current table index of the newly created state
|
||||
/// is the predecessor of this one.
|
||||
pub fn with_offset(table_index: TableIndex, offset: AesIndex) -> Self {
|
||||
// We ensure that the table index is not the first one, to prevent wrapping on `decrement`,
|
||||
// and outputting `RefreshBatchAndOutputByte(AesIndex::MAX, ...)` on the first increment
|
||||
// (which would lead to loading a non continuous batch).
|
||||
// (which would lead to loading a non-continuous batch).
|
||||
assert_ne!(table_index, TableIndex::FIRST);
|
||||
State {
|
||||
Self {
|
||||
// To ensure that the first outputted table index is the proper one, we decrement the
|
||||
// table index.
|
||||
table_index: table_index.decremented(),
|
||||
// To ensure that the first `ShiftAction` will be a `RefreshBatchAndOutputByte`, we set
|
||||
// the buffer to the last allowed value.
|
||||
buffer_pointer: BufferPointer(BYTES_PER_BATCH - 1),
|
||||
offset,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,7 +71,8 @@ impl State {
|
||||
let total_batch_index = self.buffer_pointer.0 + shift;
|
||||
if total_batch_index > BYTES_PER_BATCH - 1 {
|
||||
self.buffer_pointer.0 = self.table_index.byte_index.0;
|
||||
ShiftAction::RefreshBatchAndOutputByte(self.table_index.aes_index, self.buffer_pointer)
|
||||
let index = AesIndex(self.table_index.aes_index.0.wrapping_add(self.offset.0));
|
||||
ShiftAction::RefreshBatchAndOutputByte(index, self.buffer_pointer)
|
||||
} else {
|
||||
self.buffer_pointer.0 = total_batch_index;
|
||||
ShiftAction::OutputByte(self.buffer_pointer)
|
||||
|
||||
@@ -7,3 +7,13 @@ pub type DefaultRandomGenerator = super::NeonAesRandomGenerator;
|
||||
not(any(target_arch = "x86_64", target_arch = "aarch64"))
|
||||
))]
|
||||
pub type DefaultRandomGenerator = super::SoftwareRandomGenerator;
|
||||
|
||||
#[cfg(all(target_arch = "x86_64", not(feature = "software-prng")))]
|
||||
pub type DefaultBlockCipher = super::implem::AesniBlockCipher;
|
||||
#[cfg(all(target_arch = "aarch64", not(feature = "software-prng")))]
|
||||
pub type DefaultBlockCipher = super::implem::ArmAesBlockCipher;
|
||||
#[cfg(any(
|
||||
feature = "software-prng",
|
||||
not(any(target_arch = "x86_64", target_arch = "aarch64"))
|
||||
))]
|
||||
pub type DefaultBlockCipher = super::SoftwareBlockCipher;
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use crate::generators::aes_ctr::{AesBlockCipher, AesIndex, AesKey, BYTES_PER_BATCH};
|
||||
use crate::generators::aes_ctr::{
|
||||
AesBlockCipher, AesKey, AES_CALLS_PER_BATCH, BYTES_PER_AES_CALL, BYTES_PER_BATCH,
|
||||
};
|
||||
use core::arch::aarch64::{
|
||||
uint8x16_t, vaeseq_u8, vaesmcq_u8, vdupq_n_u32, vdupq_n_u8, veorq_u8, vgetq_lane_u32,
|
||||
vreinterpretq_u32_u8, vreinterpretq_u8_u32,
|
||||
@@ -34,24 +36,28 @@ impl AesBlockCipher for ArmAesBlockCipher {
|
||||
ArmAesBlockCipher { round_keys }
|
||||
}
|
||||
|
||||
fn generate_batch(&mut self, AesIndex(aes_ctr): AesIndex) -> [u8; BYTES_PER_BATCH] {
|
||||
fn generate_batch(&mut self, data: [u128; AES_CALLS_PER_BATCH]) -> [u8; BYTES_PER_BATCH] {
|
||||
#[target_feature(enable = "aes,neon")]
|
||||
unsafe fn implementation(
|
||||
this: &ArmAesBlockCipher,
|
||||
AesIndex(aes_ctr): AesIndex,
|
||||
data: [u128; AES_CALLS_PER_BATCH],
|
||||
) -> [u8; BYTES_PER_BATCH] {
|
||||
let mut output = [0u8; BYTES_PER_BATCH];
|
||||
// We want 128 bytes of output, the ctr gives 128 bit message (16 bytes)
|
||||
for (i, out) in output.chunks_exact_mut(16).enumerate() {
|
||||
for (input, out) in data.iter().copied().zip(output.chunks_exact_mut(16)) {
|
||||
// Safe because we prevent the user from creating the Generator
|
||||
// on non-supported hardware
|
||||
let encrypted = encrypt(aes_ctr + (i as u128), &this.round_keys);
|
||||
let encrypted = encrypt(input, &this.round_keys);
|
||||
out.copy_from_slice(&encrypted.to_ne_bytes());
|
||||
}
|
||||
output
|
||||
}
|
||||
// SAFETY: we checked for aes and neon availability in `Self::new`
|
||||
unsafe { implementation(self, AesIndex(aes_ctr)) }
|
||||
unsafe { implementation(self, data) }
|
||||
}
|
||||
|
||||
fn generate_next(&mut self, data: u128) -> [u8; BYTES_PER_AES_CALL] {
|
||||
unsafe { encrypt(data, &self.round_keys) }.to_ne_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use crate::generators::aes_ctr::{AesCtrGenerator, AesKey, ChildrenIterator};
|
||||
use crate::generators::aes_ctr::{AesCtrGenerator, ChildrenIterator};
|
||||
use crate::generators::implem::aarch64::block_cipher::ArmAesBlockCipher;
|
||||
use crate::generators::{ByteCount, BytesPerChild, ChildrenCount, ForkError, RandomGenerator};
|
||||
use crate::seeders::Seed;
|
||||
use crate::seeders::SeedKind;
|
||||
|
||||
/// A random number generator using the `aesni` instructions.
|
||||
/// A random number generator using the arm `neon` instructions.
|
||||
pub struct NeonAesRandomGenerator(pub(super) AesCtrGenerator<ArmAesBlockCipher>);
|
||||
|
||||
/// The children iterator used by [`NeonAesRandomGenerator`].
|
||||
@@ -21,8 +21,8 @@ impl Iterator for ArmAesChildrenIterator {
|
||||
|
||||
impl RandomGenerator for NeonAesRandomGenerator {
|
||||
type ChildrenIter = ArmAesChildrenIterator;
|
||||
fn new(seed: Seed) -> Self {
|
||||
NeonAesRandomGenerator(AesCtrGenerator::new(AesKey(seed.0), None, None))
|
||||
fn new(seed: impl Into<SeedKind>) -> Self {
|
||||
NeonAesRandomGenerator(AesCtrGenerator::from_seed(seed))
|
||||
}
|
||||
fn remaining_bytes(&self) -> ByteCount {
|
||||
self.0.remaining_bytes()
|
||||
@@ -107,4 +107,14 @@ mod test {
|
||||
fn test_bounded_panic() {
|
||||
generator_generic_test::test_bounded_none_should_panic::<NeonAesRandomGenerator>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector() {
|
||||
generator_generic_test::test_vectors::<NeonAesRandomGenerator>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_xof_seed() {
|
||||
generator_generic_test::test_vectors_xof_seed::<NeonAesRandomGenerator>();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
//! [intel aesni white paper 323641-001 revision 3.0](https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf).
|
||||
|
||||
mod block_cipher;
|
||||
pub use block_cipher::ArmAesBlockCipher;
|
||||
|
||||
mod generator;
|
||||
pub use generator::*;
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use crate::generators::aes_ctr::{AesBlockCipher, AesIndex, AesKey, BYTES_PER_BATCH};
|
||||
use crate::generators::aes_ctr::{
|
||||
AesBlockCipher, AesKey, AES_CALLS_PER_BATCH, BYTES_PER_AES_CALL, BYTES_PER_BATCH,
|
||||
};
|
||||
use std::arch::x86_64::{
|
||||
__m128i, _mm_aesenc_si128, _mm_aesenclast_si128, _mm_aeskeygenassist_si128, _mm_shuffle_epi32,
|
||||
_mm_slli_si128, _mm_store_si128, _mm_xor_si128,
|
||||
@@ -30,26 +32,30 @@ impl AesBlockCipher for AesniBlockCipher {
|
||||
AesniBlockCipher { round_keys }
|
||||
}
|
||||
|
||||
fn generate_batch(&mut self, AesIndex(aes_ctr): AesIndex) -> [u8; BYTES_PER_BATCH] {
|
||||
fn generate_batch(&mut self, data: [u128; AES_CALLS_PER_BATCH]) -> [u8; BYTES_PER_BATCH] {
|
||||
#[target_feature(enable = "sse2,aes")]
|
||||
unsafe fn implementation(
|
||||
this: &AesniBlockCipher,
|
||||
AesIndex(aes_ctr): AesIndex,
|
||||
data: [u128; AES_CALLS_PER_BATCH],
|
||||
) -> [u8; BYTES_PER_BATCH] {
|
||||
si128arr_to_u8arr(aes_encrypt_many(
|
||||
u128_to_si128(aes_ctr),
|
||||
u128_to_si128(aes_ctr + 1),
|
||||
u128_to_si128(aes_ctr + 2),
|
||||
u128_to_si128(aes_ctr + 3),
|
||||
u128_to_si128(aes_ctr + 4),
|
||||
u128_to_si128(aes_ctr + 5),
|
||||
u128_to_si128(aes_ctr + 6),
|
||||
u128_to_si128(aes_ctr + 7),
|
||||
u128_to_si128(data[0]),
|
||||
u128_to_si128(data[1]),
|
||||
u128_to_si128(data[2]),
|
||||
u128_to_si128(data[3]),
|
||||
u128_to_si128(data[4]),
|
||||
u128_to_si128(data[5]),
|
||||
u128_to_si128(data[6]),
|
||||
u128_to_si128(data[7]),
|
||||
&this.round_keys,
|
||||
))
|
||||
}
|
||||
// SAFETY: we checked for aes and sse2 availability in `Self::new`
|
||||
unsafe { implementation(self, AesIndex(aes_ctr)) }
|
||||
unsafe { implementation(self, data) }
|
||||
}
|
||||
|
||||
fn generate_next(&mut self, data: u128) -> [u8; BYTES_PER_AES_CALL] {
|
||||
unsafe { transmute(aes_encrypt_one(u128_to_si128(data), &self.round_keys)) }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,6 +67,19 @@ unsafe fn generate_round_keys(key: AesKey) -> [__m128i; 11] {
|
||||
keys
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn aes_encrypt_one(message: __m128i, keys: &[__m128i; 11]) -> __m128i {
|
||||
unsafe {
|
||||
let mut tmp_1 = _mm_xor_si128(message, keys[0]);
|
||||
|
||||
for key in keys.iter().take(10).skip(1) {
|
||||
tmp_1 = _mm_aesenc_si128(tmp_1, *key);
|
||||
}
|
||||
|
||||
_mm_aesenclast_si128(tmp_1, keys[10])
|
||||
}
|
||||
}
|
||||
|
||||
// Uses aes to encrypt many values at once. This allows a substantial speedup (around 30%)
|
||||
// compared to the naive approach.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
@@ -228,4 +247,14 @@ mod test {
|
||||
assert_eq!(CIPHERTEXT, si128_to_u128(*ct));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encrypt_one_message() {
|
||||
let message = u128_to_si128(PLAINTEXT);
|
||||
let key = u128_to_si128(CIPHER_KEY);
|
||||
let mut keys: [__m128i; 11] = [u128_to_si128(0); 11];
|
||||
aes_128_key_expansion(key, &mut keys);
|
||||
let ciphertext = aes_encrypt_one(message, &keys);
|
||||
assert_eq!(CIPHERTEXT, si128_to_u128(ciphertext));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::generators::aes_ctr::{AesCtrGenerator, AesKey, ChildrenIterator};
|
||||
use crate::generators::aes_ctr::{AesCtrGenerator, ChildrenIterator};
|
||||
use crate::generators::implem::aesni::block_cipher::AesniBlockCipher;
|
||||
use crate::generators::{ByteCount, BytesPerChild, ChildrenCount, ForkError, RandomGenerator};
|
||||
use crate::seeders::Seed;
|
||||
use crate::seeders::SeedKind;
|
||||
|
||||
/// A random number generator using the `aesni` instructions.
|
||||
pub struct AesniRandomGenerator(pub(super) AesCtrGenerator<AesniBlockCipher>);
|
||||
@@ -21,8 +21,8 @@ impl Iterator for AesniChildrenIterator {
|
||||
|
||||
impl RandomGenerator for AesniRandomGenerator {
|
||||
type ChildrenIter = AesniChildrenIterator;
|
||||
fn new(seed: Seed) -> Self {
|
||||
AesniRandomGenerator(AesCtrGenerator::new(AesKey(seed.0), None, None))
|
||||
fn new(seed: impl Into<SeedKind>) -> Self {
|
||||
AesniRandomGenerator(AesCtrGenerator::from_seed(seed))
|
||||
}
|
||||
fn remaining_bytes(&self) -> ByteCount {
|
||||
self.0.remaining_bytes()
|
||||
@@ -107,4 +107,14 @@ mod test {
|
||||
fn test_bounded_panic() {
|
||||
generator_generic_test::test_bounded_none_should_panic::<AesniRandomGenerator>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector() {
|
||||
generator_generic_test::test_vectors::<AesniRandomGenerator>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_xof_seed() {
|
||||
generator_generic_test::test_vectors_xof_seed::<AesniRandomGenerator>();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
//! [intel aesni white paper 323641-001 revision 3.0](https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf).
|
||||
|
||||
mod block_cipher;
|
||||
pub use block_cipher::AesniBlockCipher;
|
||||
|
||||
mod generator;
|
||||
pub use generator::*;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::generators::aes_ctr::{
|
||||
AesBlockCipher, AesIndex, AesKey, AES_CALLS_PER_BATCH, BYTES_PER_AES_CALL, BYTES_PER_BATCH,
|
||||
AesBlockCipher, AesKey, AES_CALLS_PER_BATCH, BYTES_PER_AES_CALL, BYTES_PER_BATCH,
|
||||
};
|
||||
use aes::cipher::generic_array::GenericArray;
|
||||
use aes::cipher::{BlockEncrypt, KeyInit};
|
||||
@@ -19,19 +19,23 @@ impl AesBlockCipher for SoftwareBlockCipher {
|
||||
SoftwareBlockCipher { aes }
|
||||
}
|
||||
|
||||
fn generate_batch(&mut self, AesIndex(aes_ctr): AesIndex) -> [u8; BYTES_PER_BATCH] {
|
||||
fn generate_batch(&mut self, data: [u128; AES_CALLS_PER_BATCH]) -> [u8; BYTES_PER_BATCH] {
|
||||
aes_encrypt_many(
|
||||
aes_ctr,
|
||||
aes_ctr + 1,
|
||||
aes_ctr + 2,
|
||||
aes_ctr + 3,
|
||||
aes_ctr + 4,
|
||||
aes_ctr + 5,
|
||||
aes_ctr + 6,
|
||||
aes_ctr + 7,
|
||||
&self.aes,
|
||||
data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7], &self.aes,
|
||||
)
|
||||
}
|
||||
|
||||
fn generate_next(&mut self, data: u128) -> [u8; BYTES_PER_AES_CALL] {
|
||||
aes_encrypt_one(data, &self.aes)
|
||||
}
|
||||
}
|
||||
|
||||
fn aes_encrypt_one(message: u128, cipher: &Aes128) -> [u8; BYTES_PER_AES_CALL] {
|
||||
let mut b1 = GenericArray::clone_from_slice(&message.to_ne_bytes()[..]);
|
||||
|
||||
cipher.encrypt_block(&mut b1);
|
||||
|
||||
b1.into()
|
||||
}
|
||||
|
||||
// Uses aes to encrypt many values at once. This allows a substantial speedup (around 30%)
|
||||
@@ -110,4 +114,12 @@ mod test {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encrypt_one_message() {
|
||||
let key: [u8; BYTES_PER_AES_CALL] = CIPHER_KEY.to_ne_bytes();
|
||||
let aes = Aes128::new(&GenericArray::from(key));
|
||||
let ciphertext = aes_encrypt_one(PLAINTEXT, &aes);
|
||||
assert_eq!(u128::from_ne_bytes(ciphertext), CIPHERTEXT);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::generators::aes_ctr::{AesCtrGenerator, AesKey, ChildrenIterator};
|
||||
use crate::generators::aes_ctr::{AesCtrGenerator, ChildrenIterator};
|
||||
use crate::generators::implem::soft::block_cipher::SoftwareBlockCipher;
|
||||
use crate::generators::{ByteCount, BytesPerChild, ChildrenCount, ForkError, RandomGenerator};
|
||||
use crate::seeders::Seed;
|
||||
use crate::seeders::SeedKind;
|
||||
|
||||
/// A random number generator using a software implementation.
|
||||
pub struct SoftwareRandomGenerator(pub(super) AesCtrGenerator<SoftwareBlockCipher>);
|
||||
@@ -21,8 +21,8 @@ impl Iterator for SoftwareChildrenIterator {
|
||||
|
||||
impl RandomGenerator for SoftwareRandomGenerator {
|
||||
type ChildrenIter = SoftwareChildrenIterator;
|
||||
fn new(seed: Seed) -> Self {
|
||||
SoftwareRandomGenerator(AesCtrGenerator::new(AesKey(seed.0), None, None))
|
||||
fn new(seed: impl Into<SeedKind>) -> Self {
|
||||
SoftwareRandomGenerator(AesCtrGenerator::from_seed(seed))
|
||||
}
|
||||
fn remaining_bytes(&self) -> ByteCount {
|
||||
self.0.remaining_bytes()
|
||||
@@ -52,49 +52,57 @@ mod test {
|
||||
use crate::generators::aes_ctr::aes_ctr_generic_test;
|
||||
use crate::generators::generator_generic_test;
|
||||
|
||||
#[test]
|
||||
fn prop_fork_first_state_table_index() {
|
||||
aes_ctr_generic_test::prop_fork_first_state_table_index::<SoftwareBlockCipher>();
|
||||
}
|
||||
// We use powerpc64 as the target to test behavior on big-endian
|
||||
// However, we run these tests using an emulator. Thus, these get really slow
|
||||
// so we skip them
|
||||
#[cfg(not(target_arch = "powerpc64"))]
|
||||
mod fork_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn prop_fork_last_bound_table_index() {
|
||||
aes_ctr_generic_test::prop_fork_last_bound_table_index::<SoftwareBlockCipher>();
|
||||
}
|
||||
#[test]
|
||||
fn prop_fork_first_state_table_index() {
|
||||
aes_ctr_generic_test::prop_fork_first_state_table_index::<SoftwareBlockCipher>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prop_fork_parent_bound_table_index() {
|
||||
aes_ctr_generic_test::prop_fork_parent_bound_table_index::<SoftwareBlockCipher>();
|
||||
}
|
||||
#[test]
|
||||
fn prop_fork_last_bound_table_index() {
|
||||
aes_ctr_generic_test::prop_fork_last_bound_table_index::<SoftwareBlockCipher>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prop_fork_parent_state_table_index() {
|
||||
aes_ctr_generic_test::prop_fork_parent_state_table_index::<SoftwareBlockCipher>();
|
||||
}
|
||||
#[test]
|
||||
fn prop_fork_parent_bound_table_index() {
|
||||
aes_ctr_generic_test::prop_fork_parent_bound_table_index::<SoftwareBlockCipher>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prop_fork() {
|
||||
aes_ctr_generic_test::prop_fork::<SoftwareBlockCipher>();
|
||||
}
|
||||
#[test]
|
||||
fn prop_fork_parent_state_table_index() {
|
||||
aes_ctr_generic_test::prop_fork_parent_state_table_index::<SoftwareBlockCipher>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prop_fork_children_remaining_bytes() {
|
||||
aes_ctr_generic_test::prop_fork_children_remaining_bytes::<SoftwareBlockCipher>();
|
||||
}
|
||||
#[test]
|
||||
fn prop_fork() {
|
||||
aes_ctr_generic_test::prop_fork::<SoftwareBlockCipher>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prop_fork_parent_remaining_bytes() {
|
||||
aes_ctr_generic_test::prop_fork_parent_remaining_bytes::<SoftwareBlockCipher>();
|
||||
}
|
||||
#[test]
|
||||
fn prop_fork_children_remaining_bytes() {
|
||||
aes_ctr_generic_test::prop_fork_children_remaining_bytes::<SoftwareBlockCipher>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_roughly_uniform() {
|
||||
generator_generic_test::test_roughly_uniform::<SoftwareRandomGenerator>();
|
||||
}
|
||||
#[test]
|
||||
fn prop_fork_parent_remaining_bytes() {
|
||||
aes_ctr_generic_test::prop_fork_parent_remaining_bytes::<SoftwareBlockCipher>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fork() {
|
||||
generator_generic_test::test_fork_children::<SoftwareRandomGenerator>();
|
||||
#[test]
|
||||
fn test_fork() {
|
||||
generator_generic_test::test_fork_children::<SoftwareRandomGenerator>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_roughly_uniform() {
|
||||
generator_generic_test::test_roughly_uniform::<SoftwareRandomGenerator>();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -107,4 +115,14 @@ mod test {
|
||||
fn test_bounded_panic() {
|
||||
generator_generic_test::test_bounded_none_should_panic::<SoftwareRandomGenerator>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector() {
|
||||
generator_generic_test::test_vectors::<SoftwareRandomGenerator>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_xof_seed() {
|
||||
generator_generic_test::test_vectors_xof_seed::<SoftwareRandomGenerator>();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
//! A module using a software fallback implementation of random number generator.
|
||||
|
||||
mod block_cipher;
|
||||
pub use block_cipher::SoftwareBlockCipher;
|
||||
|
||||
mod generator;
|
||||
pub use generator::*;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! A module containing random generators objects.
|
||||
//!
|
||||
//! See [crate-level](`crate`) explanations.
|
||||
use crate::seeders::Seed;
|
||||
use crate::seeders::SeedKind;
|
||||
use std::error::Error;
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
@@ -62,7 +62,7 @@ pub trait RandomGenerator: Iterator<Item = u8> {
|
||||
///
|
||||
/// This operation is usually costly to perform, as the aes round keys need to be generated from
|
||||
/// the seed.
|
||||
fn new(seed: Seed) -> Self;
|
||||
fn new(seed: impl Into<SeedKind>) -> Self;
|
||||
|
||||
/// Returns the number of bytes that can still be outputted by the generator before reaching its
|
||||
/// bound.
|
||||
@@ -131,6 +131,7 @@ pub use default::DefaultRandomGenerator;
|
||||
#[allow(unused)] // to please clippy when tests are not activated
|
||||
pub mod generator_generic_test {
|
||||
use super::*;
|
||||
use crate::seeders::{Seed, XofSeed};
|
||||
use rand::Rng;
|
||||
|
||||
const REPEATS: usize = 1_000;
|
||||
@@ -236,4 +237,65 @@ pub mod generator_generic_test {
|
||||
// One call too many, should panic
|
||||
bounded.next().ok_or("expected test panic").unwrap();
|
||||
}
|
||||
|
||||
pub fn test_vectors<G: RandomGenerator>() {
|
||||
// Number of random bytes to generate,
|
||||
// this should be 2 batch worth of aes calls (where a batch is 8 aes)
|
||||
const N_BYTES: usize = 16 * 2 * 8;
|
||||
|
||||
const EXPECTED_BYTE: [u8; N_BYTES] = [
|
||||
14, 216, 93, 249, 97, 26, 187, 114, 73, 205, 209, 104, 197, 70, 126, 250, 235, 1, 136,
|
||||
141, 46, 146, 174, 231, 14, 204, 28, 99, 139, 246, 214, 112, 253, 151, 34, 114, 235, 7,
|
||||
76, 37, 36, 154, 226, 148, 68, 238, 117, 87, 212, 183, 174, 200, 222, 153, 62, 48, 166,
|
||||
134, 27, 97, 230, 206, 78, 128, 151, 166, 15, 156, 120, 158, 35, 41, 121, 55, 180, 184,
|
||||
108, 160, 33, 208, 255, 147, 246, 159, 10, 239, 6, 103, 124, 123, 83, 72, 189, 237,
|
||||
225, 36, 30, 151, 134, 94, 211, 181, 108, 239, 137, 18, 246, 237, 233, 59, 61, 24, 111,
|
||||
198, 76, 92, 86, 129, 171, 50, 124, 2, 72, 143, 160, 223, 32, 187, 175, 239, 111, 51,
|
||||
85, 110, 134, 45, 193, 113, 247, 249, 78, 230, 103, 123, 66, 48, 31, 169, 228, 140,
|
||||
202, 168, 202, 199, 147, 89, 135, 104, 254, 198, 72, 31, 103, 236, 207, 138, 24, 100,
|
||||
230, 168, 233, 214, 130, 195, 0, 25, 220, 136, 128, 173, 40, 154, 116, 87, 114, 187,
|
||||
170, 150, 131, 163, 155, 98, 217, 198, 238, 178, 165, 214, 168, 252, 107, 123, 214, 33,
|
||||
17, 114, 35, 23, 172, 145, 5, 39, 16, 33, 92, 163, 132, 240, 167, 128, 226, 165, 80, 9,
|
||||
153, 252, 139, 0, 139, 0, 54, 188, 253, 141, 2, 78, 97, 53, 214, 173, 155, 84, 98, 51,
|
||||
70, 110, 91, 181, 229, 231, 27, 225, 185, 143, 63, 238,
|
||||
];
|
||||
|
||||
let seed_bytes: [u8; 16] = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let seed = Seed(u128::from_ne_bytes(seed_bytes));
|
||||
|
||||
let mut rng = G::new(seed);
|
||||
let bytes = rng.take(N_BYTES).collect::<Vec<_>>();
|
||||
assert_eq!(bytes, EXPECTED_BYTE);
|
||||
}
|
||||
|
||||
pub fn test_vectors_xof_seed<G: RandomGenerator>() {
|
||||
// Number of random bytes to generate,
|
||||
// this should be 2 batch worth of aes calls (where a batch is 8 aes)
|
||||
const N_BYTES: usize = 16 * 2 * 8;
|
||||
|
||||
const EXPECTED_BYTE: [u8; N_BYTES] = [
|
||||
134, 231, 117, 200, 60, 174, 158, 95, 80, 64, 236, 147, 204, 196, 251, 198, 110, 155,
|
||||
74, 69, 162, 251, 224, 46, 46, 83, 209, 224, 89, 108, 68, 240, 37, 16, 109, 194, 92, 3,
|
||||
164, 21, 167, 224, 205, 31, 90, 178, 59, 150, 142, 238, 113, 144, 181, 118, 160, 72,
|
||||
187, 38, 29, 61, 189, 229, 66, 22, 4, 38, 210, 63, 232, 182, 115, 49, 96, 6, 120, 226,
|
||||
40, 51, 144, 59, 136, 224, 252, 195, 50, 250, 134, 45, 149, 220, 32, 27, 35, 225, 190,
|
||||
73, 161, 182, 250, 149, 153, 131, 220, 143, 181, 152, 187, 25, 62, 197, 24, 10, 142,
|
||||
57, 172, 15, 17, 244, 242, 232, 51, 50, 244, 85, 58, 69, 28, 113, 151, 143, 138, 166,
|
||||
198, 16, 210, 46, 234, 138, 32, 124, 98, 167, 141, 251, 60, 13, 158, 106, 29, 86, 63,
|
||||
73, 42, 138, 174, 195, 192, 72, 122, 74, 54, 134, 107, 144, 241, 12, 33, 70, 27, 116,
|
||||
154, 123, 1, 252, 141, 73, 79, 30, 162, 43, 57, 8, 99, 62, 222, 117, 232, 147, 81, 189,
|
||||
54, 17, 233, 33, 41, 132, 155, 246, 185, 189, 17, 77, 32, 107, 134, 61, 174, 64, 174,
|
||||
80, 229, 239, 243, 143, 152, 249, 254, 125, 42, 0, 170, 253, 34, 57, 100, 82, 244, 9,
|
||||
101, 126, 138, 218, 215, 55, 58, 177, 154, 5, 28, 113, 89, 123, 129, 254, 212, 191,
|
||||
162, 44, 120, 67, 241, 157, 31, 162, 113, 91,
|
||||
];
|
||||
|
||||
let seed_bytes: [u8; 16] = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
||||
let seed = u128::from_ne_bytes(seed_bytes);
|
||||
let xof_seed = XofSeed::new_u128(seed, [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h']);
|
||||
|
||||
let mut rng = G::new(xof_seed);
|
||||
let bytes = rng.take(N_BYTES).collect::<Vec<_>>();
|
||||
assert_eq!(bytes, EXPECTED_BYTE);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::seeders::{Seed, Seeder};
|
||||
|
||||
/// There is no `rseed` equivalent in the ARM specification until `ARMv8.5-A`.
|
||||
/// However it seems that these instructions are not exposed in `core::arch::aarch64`.
|
||||
/// However, it seems that these instructions are not exposed in `core::arch::aarch64`.
|
||||
///
|
||||
/// Our primary interest for supporting aarch64 targets is AppleSilicon support
|
||||
/// which for the M1 macs available, they are based on the `ARMv8.4-A` set.
|
||||
|
||||
@@ -9,6 +9,90 @@
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub struct Seed(pub u128);
|
||||
|
||||
/// A Seed as described in the [NIST document]
|
||||
///
|
||||
/// This seed contains 2 information:
|
||||
/// * The domain separator bytes (ASCII string)
|
||||
/// * The seed bytes
|
||||
///
|
||||
/// [NIST document]: https://eprint.iacr.org/2025/699
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct XofSeed {
|
||||
// We store the domain separator concatenated with the seed bytes (str||seed)
|
||||
// as it makes it easier to create the iterator of u128 blocks
|
||||
data: Vec<u8>,
|
||||
}
|
||||
|
||||
impl XofSeed {
|
||||
const DOMAIN_SEP_LEN: usize = 8;
|
||||
|
||||
// Creates a new seed of 128 bits
|
||||
pub fn new_u128(seed: u128, domain_separator: [u8; Self::DOMAIN_SEP_LEN]) -> Self {
|
||||
let mut data = vec![0u8; size_of::<u128>() + domain_separator.len()];
|
||||
data[..Self::DOMAIN_SEP_LEN].copy_from_slice(domain_separator.as_slice());
|
||||
data[Self::DOMAIN_SEP_LEN..].copy_from_slice(seed.to_ne_bytes().as_slice());
|
||||
|
||||
Self { data }
|
||||
}
|
||||
|
||||
pub fn new(mut seed: Vec<u8>, domain_separator: [u8; Self::DOMAIN_SEP_LEN]) -> Self {
|
||||
seed.resize(domain_separator.len() + seed.len(), 0);
|
||||
seed.rotate_right(domain_separator.len());
|
||||
seed[..Self::DOMAIN_SEP_LEN].copy_from_slice(domain_separator.as_slice());
|
||||
Self { data: seed }
|
||||
}
|
||||
|
||||
/// Returns the seed part
|
||||
pub fn seed(&self) -> &[u8] {
|
||||
&self.data[Self::DOMAIN_SEP_LEN..]
|
||||
}
|
||||
|
||||
/// Returns the domain separator
|
||||
pub fn domain_separator(&self) -> [u8; Self::DOMAIN_SEP_LEN] {
|
||||
let mut sep = [0u8; Self::DOMAIN_SEP_LEN];
|
||||
sep.copy_from_slice(&self.data[..Self::DOMAIN_SEP_LEN]);
|
||||
sep
|
||||
}
|
||||
|
||||
/// Total len (seed bytes + domain separator) in bits
|
||||
pub fn bit_len(&self) -> u128 {
|
||||
(self.data.len()) as u128 * 8
|
||||
}
|
||||
|
||||
/// Returns an iterator that iterates over the concatenated seed||domain_separator
|
||||
/// as blocks of u128 bits
|
||||
pub(crate) fn iter_u128_blocks(&self) -> impl Iterator<Item = u128> + '_ {
|
||||
self.data.chunks(size_of::<u128>()).map(move |chunk| {
|
||||
let mut buf = [0u8; size_of::<u128>()];
|
||||
buf[..chunk.len()].copy_from_slice(chunk);
|
||||
u128::from_ne_bytes(buf)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub enum SeedKind {
|
||||
/// Initializes the Aes-Ctr with a counter starting at 0
|
||||
/// and uses the seed as the Aes key.
|
||||
Ctr(Seed),
|
||||
/// Seed that initialized the Aes-Ctr following the NIST document (see [XofSeed]).
|
||||
///
|
||||
/// An Aes-Key and starting counter will be derived from the XofSeed, to
|
||||
/// then initialize the Aes-Ctr random generator
|
||||
Xof(XofSeed),
|
||||
}
|
||||
|
||||
impl From<Seed> for SeedKind {
|
||||
fn from(value: Seed) -> Self {
|
||||
Self::Ctr(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<XofSeed> for SeedKind {
|
||||
fn from(value: XofSeed) -> Self {
|
||||
Self::Xof(value)
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait representing a seeding strategy.
|
||||
pub trait Seeder {
|
||||
/// Generates a new seed.
|
||||
@@ -29,7 +113,7 @@ pub use implem::*;
|
||||
|
||||
#[cfg(test)]
|
||||
mod generic_tests {
|
||||
use crate::seeders::Seeder;
|
||||
use crate::seeders::{Seeder, XofSeed};
|
||||
|
||||
/// Naively verifies that two fixed-size sequences generated by repeatedly calling the seeder
|
||||
/// are different.
|
||||
@@ -47,4 +131,31 @@ mod generic_tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_xof_seed_getters() {
|
||||
let bits = u128::from_ne_bytes([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
|
||||
let dsep = [b't', b'f', b'h', b'e', b'k', b's', b'p', b's'];
|
||||
let seed = XofSeed::new_u128(bits, dsep);
|
||||
|
||||
let s = u128::from_ne_bytes(seed.seed().try_into().unwrap());
|
||||
assert_eq!(s, bits);
|
||||
assert_eq!(seed.domain_separator(), dsep);
|
||||
assert_eq!(seed.bit_len(), 192);
|
||||
|
||||
let collected_u128s = seed.iter_u128_blocks().collect::<Vec<_>>();
|
||||
assert_eq!(
|
||||
collected_u128s,
|
||||
vec![
|
||||
u128::from_ne_bytes([
|
||||
b't', b'f', b'h', b'e', b'k', b's', b'p', b's', 1, 2, 3, 4, 5, 6, 7, 8
|
||||
]),
|
||||
u128::from_ne_bytes([9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]),
|
||||
]
|
||||
);
|
||||
|
||||
// To make sure both constructors yield the same results
|
||||
let seed2 = XofSeed::new(bits.to_ne_bytes().to_vec(), dsep);
|
||||
assert_eq!(seed.data, seed2.data);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,9 +43,9 @@ Comparing to the [CPU example](../../getting_started/quick_start.md), HPU set up
|
||||
Here is a full example (combining the client and server parts):
|
||||
|
||||
```rust
|
||||
use tfhe::{ConfigBuilder, set_server_key, FheUint8, ClientKey, CompressedServerKey};
|
||||
use tfhe::{Config, set_server_key, FheUint8, ClientKey, CompressedServerKey};
|
||||
use tfhe::prelude::*;
|
||||
use tfhe_hpu_backend::prelude::*;
|
||||
use tfhe::tfhe_hpu_backend::prelude::*;
|
||||
|
||||
fn main() {
|
||||
|
||||
@@ -53,7 +53,7 @@ fn main() {
|
||||
// HPU configuration knobs are retrieved from a TOML configuration file. Prebuilt configurations could be find in `backends/tfhe-hpu-backend/config_store`
|
||||
// For ease of use a setup_hpu.sh script is available in repository root folder and it handle the required environment variables setup and driver initialisation
|
||||
// More details are available in `backends/tfhe-hpu-backend/README.md`
|
||||
let hpu_device = HpuDevice::from_config(ShellString::new("${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_config.toml".to_string()));
|
||||
let hpu_device = HpuDevice::from_config(ShellString::new("${HPU_BACKEND_DIR}/config_store/${HPU_CONFIG}/hpu_config.toml".to_string()).expand().as_str());
|
||||
|
||||
// Generate keys ----------------------------------------------------------
|
||||
let config = Config::from_hpu_device(&hpu_device);
|
||||
@@ -106,7 +106,7 @@ The server first needs to set up its keys with `set_server_key((hpu_device, comp
|
||||
|
||||
Then, homomorphic computations are performed using the same approach as the [CPU operations](../../fhe-computation/operations/README.md).
|
||||
|
||||
``` rust
|
||||
``` Rust
|
||||
// Server-side
|
||||
let result = a + b;
|
||||
|
||||
|
||||
@@ -260,16 +260,19 @@ pub fn main() {
|
||||
let roi_start = Instant::now();
|
||||
|
||||
let res_hpu = (0..args.iter)
|
||||
.map(|_i| {
|
||||
.filter_map(|i| {
|
||||
let res = HpuRadixCiphertext::exec(&proto, iop.opcode(), &srcs_enc, &imms);
|
||||
std::hint::black_box(&res);
|
||||
res
|
||||
if i == (args.iter - 1) {
|
||||
Some(res)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.next_back()
|
||||
.expect("Iteration must be greater than 0");
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// let res_fhe = $fhe_type::from(res_hpu);
|
||||
let res_fhe = res_hpu
|
||||
.last()
|
||||
.expect("Iteration must be greater than 0")
|
||||
.iter()
|
||||
.map(|x| x.to_radix_ciphertext())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
@@ -25,6 +25,8 @@ use tfhe_versionable::Versionize;
|
||||
|
||||
#[cfg(feature = "hpu")]
|
||||
use crate::integer::hpu::ciphertext::HpuRadixCiphertext;
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::prelude::IfThenElseSizeOnGpu;
|
||||
#[cfg(feature = "hpu")]
|
||||
use tfhe_hpu_backend::prelude::*;
|
||||
|
||||
@@ -1769,3 +1771,47 @@ impl std::ops::Not for &FheBool {
|
||||
FheBool::new(ciphertext, tag)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
impl<Id> IfThenElseSizeOnGpu<FheUint<Id>> for FheBool
|
||||
where
|
||||
Id: FheUintId,
|
||||
{
|
||||
fn get_if_then_else_size_on_gpu(&self, ct_then: &FheUint<Id>, ct_else: &FheUint<Id>) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_if_then_else_size_on_gpu(
|
||||
&CudaBooleanBlock(self.ciphertext.on_gpu(streams).duplicate(streams)),
|
||||
&*ct_then.ciphertext.on_gpu(streams),
|
||||
&*ct_else.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
impl<Id> IfThenElseSizeOnGpu<FheInt<Id>> for FheBool
|
||||
where
|
||||
Id: FheIntId,
|
||||
{
|
||||
fn get_if_then_else_size_on_gpu(&self, ct_then: &FheInt<Id>, ct_else: &FheInt<Id>) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_if_then_else_size_on_gpu(
|
||||
&CudaBooleanBlock(self.ciphertext.on_gpu(streams).duplicate(streams)),
|
||||
&*ct_then.ciphertext.on_gpu(streams),
|
||||
&*ct_else.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::high_level_api::details::MaybeCloned;
|
||||
use crate::high_level_api::global_state;
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::high_level_api::global_state::with_cuda_internal_keys;
|
||||
use crate::high_level_api::integers::{FheIntId, FheUintId};
|
||||
use crate::high_level_api::keys::InternalServerKey;
|
||||
#[cfg(feature = "gpu")]
|
||||
@@ -2256,14 +2254,12 @@ where
|
||||
fn get_gt_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_gt_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_gt_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2272,14 +2268,12 @@ where
|
||||
fn get_ge_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_ge_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_ge_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2288,14 +2282,12 @@ where
|
||||
fn get_lt_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_lt_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_lt_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2304,14 +2296,12 @@ where
|
||||
fn get_le_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_le_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_le_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2326,14 +2316,12 @@ where
|
||||
fn get_min_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_min_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_min_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2349,14 +2337,12 @@ where
|
||||
fn get_max_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_max_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_max_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2373,14 +2359,12 @@ where
|
||||
fn get_left_shift_size_on_gpu(&self, rhs: &FheUint<Id2>) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_left_shift_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_left_shift_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2396,14 +2380,12 @@ where
|
||||
fn get_right_shift_size_on_gpu(&self, rhs: &FheUint<Id2>) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_right_shift_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_right_shift_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2419,14 +2401,12 @@ where
|
||||
fn get_rotate_left_size_on_gpu(&self, rhs: &FheUint<Id2>) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_rotate_left_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_rotate_left_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2442,14 +2422,12 @@ where
|
||||
fn get_rotate_right_size_on_gpu(&self, rhs: &FheUint<Id2>) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_rotate_right_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_rotate_right_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use crate::high_level_api::global_state;
|
||||
use crate::high_level_api::integers::FheIntId;
|
||||
use crate::high_level_api::keys::InternalServerKey;
|
||||
use crate::integer::block_decomposition::DecomposableInto;
|
||||
use crate::prelude::{OverflowingAdd, OverflowingMul, OverflowingSub};
|
||||
use crate::prelude::{OverflowingAdd, OverflowingMul, OverflowingNeg, OverflowingSub};
|
||||
use crate::{FheBool, FheInt};
|
||||
|
||||
impl<Id> OverflowingAdd<Self> for &FheInt<Id>
|
||||
@@ -537,3 +537,73 @@ where
|
||||
<&Self as OverflowingMul<&Self>>::overflowing_mul(&self, other)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Id> OverflowingNeg for &FheInt<Id>
|
||||
where
|
||||
Id: FheIntId,
|
||||
{
|
||||
type Output = FheInt<Id>;
|
||||
|
||||
/// Negates self, overflowing if this is equal to the minimum value.
|
||||
///
|
||||
/// * The operation is modular, i.e. on overflow the result wraps around.
|
||||
/// * On overflow the [FheBool] is true (if self encrypts the minimum value), otherwise false
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```rust
|
||||
/// use tfhe::prelude::*;
|
||||
/// use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheInt16};
|
||||
///
|
||||
/// let (client_key, server_key) = generate_keys(ConfigBuilder::default());
|
||||
/// set_server_key(server_key);
|
||||
///
|
||||
/// let a = FheInt16::encrypt(i16::MIN, &client_key);
|
||||
///
|
||||
/// let (result, overflowed) = a.overflowing_neg();
|
||||
/// let (expected_result, expected_overflowed) = i16::MIN.overflowing_neg();
|
||||
/// let result: i16 = result.decrypt(&client_key);
|
||||
/// assert_eq!(result, expected_result);
|
||||
/// assert_eq!(overflowed.decrypt(&client_key), expected_overflowed);
|
||||
/// assert!(overflowed.decrypt(&client_key));
|
||||
/// ```
|
||||
fn overflowing_neg(self) -> (Self::Output, FheBool) {
|
||||
global_state::with_internal_keys(|key| match key {
|
||||
InternalServerKey::Cpu(cpu_key) => {
|
||||
let (result, overflow) = cpu_key
|
||||
.pbs_key()
|
||||
.overflowing_neg_parallelized(&*self.ciphertext.on_cpu());
|
||||
(
|
||||
FheInt::new(result, cpu_key.tag.clone()),
|
||||
FheBool::new(overflow, cpu_key.tag.clone()),
|
||||
)
|
||||
}
|
||||
#[cfg(feature = "gpu")]
|
||||
InternalServerKey::Cuda(cuda_key) => {
|
||||
let (result, overflow) = cuda_key.pbs_key().overflowing_neg(
|
||||
&*self.ciphertext.on_gpu(&cuda_key.streams),
|
||||
&cuda_key.streams,
|
||||
);
|
||||
(
|
||||
FheInt::new(result, cuda_key.tag.clone()),
|
||||
FheBool::new(overflow, cuda_key.tag.clone()),
|
||||
)
|
||||
}
|
||||
#[cfg(feature = "hpu")]
|
||||
InternalServerKey::Hpu(_device) => {
|
||||
panic!("Hpu does not support this overflowing_neg yet.")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<Id> OverflowingNeg for FheInt<Id>
|
||||
where
|
||||
Id: FheIntId,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
fn overflowing_neg(self) -> (Self::Output, FheBool) {
|
||||
<&Self as OverflowingNeg>::overflowing_neg(&self)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
use crate::core_crypto::commons::numeric::CastFrom;
|
||||
use crate::high_level_api::errors::UnwrapResultExt;
|
||||
use crate::high_level_api::global_state;
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::high_level_api::global_state::with_cuda_internal_keys;
|
||||
use crate::high_level_api::integers::signed::inner::SignedRadixCiphertext;
|
||||
use crate::high_level_api::integers::FheIntId;
|
||||
use crate::high_level_api::keys::InternalServerKey;
|
||||
@@ -409,13 +407,11 @@ where
|
||||
fn get_gt_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -424,13 +420,11 @@ where
|
||||
fn get_ge_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -439,13 +433,11 @@ where
|
||||
fn get_lt_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -454,13 +446,11 @@ where
|
||||
fn get_le_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -477,13 +467,11 @@ where
|
||||
fn get_min_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_min_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_min_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -499,13 +487,11 @@ where
|
||||
fn get_max_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_max_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_max_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -634,13 +620,11 @@ macro_rules! define_scalar_rotate_shifts {
|
||||
|lhs: &FheInt<_>, _rhs| {
|
||||
global_state::with_internal_keys(|key|
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_scalar_left_shift_size_on_gpu(
|
||||
&*lhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
} else {
|
||||
0
|
||||
})
|
||||
@@ -692,13 +676,11 @@ macro_rules! define_scalar_rotate_shifts {
|
||||
|lhs: &FheInt<_>, _rhs| {
|
||||
global_state::with_internal_keys(|key|
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_scalar_right_shift_size_on_gpu(
|
||||
&*lhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
} else {
|
||||
0
|
||||
})
|
||||
@@ -750,13 +732,11 @@ macro_rules! define_scalar_rotate_shifts {
|
||||
|lhs: &FheInt<_>, _rhs| {
|
||||
global_state::with_internal_keys(|key|
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_scalar_rotate_left_size_on_gpu(
|
||||
&*lhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
} else {
|
||||
0
|
||||
})
|
||||
@@ -808,13 +788,11 @@ macro_rules! define_scalar_rotate_shifts {
|
||||
|lhs: &FheInt<_>, _rhs| {
|
||||
global_state::with_internal_keys(|key|
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_scalar_rotate_right_size_on_gpu(
|
||||
&*lhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
} else {
|
||||
0
|
||||
})
|
||||
|
||||
@@ -4,11 +4,12 @@ use crate::high_level_api::integers::signed::tests::{
|
||||
use crate::high_level_api::integers::unsigned::tests::gpu::setup_gpu;
|
||||
use crate::prelude::{
|
||||
check_valid_cuda_malloc, AddSizeOnGpu, BitAndSizeOnGpu, BitNotSizeOnGpu, BitOrSizeOnGpu,
|
||||
BitXorSizeOnGpu, FheMaxSizeOnGpu, FheMinSizeOnGpu, FheOrdSizeOnGpu, FheTryEncrypt,
|
||||
RotateLeftSizeOnGpu, RotateRightSizeOnGpu, ShlSizeOnGpu, ShrSizeOnGpu, SubSizeOnGpu,
|
||||
BitXorSizeOnGpu, FheEncrypt, FheMaxSizeOnGpu, FheMinSizeOnGpu, FheOrdSizeOnGpu, FheTryEncrypt,
|
||||
IfThenElseSizeOnGpu, RotateLeftSizeOnGpu, RotateRightSizeOnGpu, ShlSizeOnGpu, ShrSizeOnGpu,
|
||||
SubSizeOnGpu,
|
||||
};
|
||||
use crate::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS;
|
||||
use crate::{FheInt32, FheUint32, GpuIndex};
|
||||
use crate::{FheBool, FheInt32, FheUint32, GpuIndex};
|
||||
use rand::Rng;
|
||||
|
||||
#[test]
|
||||
@@ -291,3 +292,35 @@ fn test_gpu_get_shift_rotate_size_on_gpu() {
|
||||
GpuIndex::new(0)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gpu_get_if_then_else_size_on_gpu() {
|
||||
let cks = setup_gpu(Some(PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS));
|
||||
let mut rng = rand::thread_rng();
|
||||
let clear_a = rng.gen_range(1..=i32::MAX);
|
||||
let clear_b = rng.gen_range(1..=i32::MAX);
|
||||
let clear_c = rng.gen_range(0..=1);
|
||||
let mut a = FheInt32::try_encrypt(clear_a, &cks).unwrap();
|
||||
let mut b = FheInt32::try_encrypt(clear_b, &cks).unwrap();
|
||||
let c = FheBool::encrypt(clear_c != 0, &cks);
|
||||
a.move_to_current_device();
|
||||
b.move_to_current_device();
|
||||
let a = &a;
|
||||
let b = &b;
|
||||
|
||||
let if_then_else_tmp_buffer_size = c.get_if_then_else_size_on_gpu(a, b);
|
||||
assert!(check_valid_cuda_malloc(
|
||||
if_then_else_tmp_buffer_size,
|
||||
GpuIndex::new(0)
|
||||
));
|
||||
let select_tmp_buffer_size = c.get_select_size_on_gpu(a, b);
|
||||
assert!(check_valid_cuda_malloc(
|
||||
select_tmp_buffer_size,
|
||||
GpuIndex::new(0)
|
||||
));
|
||||
let cmux_tmp_buffer_size = c.get_cmux_size_on_gpu(a, b);
|
||||
assert!(check_valid_cuda_malloc(
|
||||
cmux_tmp_buffer_size,
|
||||
GpuIndex::new(0)
|
||||
));
|
||||
}
|
||||
|
||||
@@ -6,8 +6,6 @@ use super::inner::RadixCiphertext;
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::high_level_api::details::MaybeCloned;
|
||||
use crate::high_level_api::global_state;
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::high_level_api::global_state::with_cuda_internal_keys;
|
||||
use crate::high_level_api::integers::FheUintId;
|
||||
use crate::high_level_api::keys::InternalServerKey;
|
||||
#[cfg(feature = "gpu")]
|
||||
@@ -2496,14 +2494,12 @@ where
|
||||
fn get_gt_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_gt_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_gt_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2512,14 +2508,12 @@ where
|
||||
fn get_ge_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_ge_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_ge_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2528,14 +2522,12 @@ where
|
||||
fn get_lt_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_lt_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_lt_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2544,14 +2536,12 @@ where
|
||||
fn get_le_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_le_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_le_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2566,14 +2556,12 @@ where
|
||||
fn get_min_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_min_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_min_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2589,14 +2577,12 @@ where
|
||||
fn get_max_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_max_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_max_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2611,14 +2597,12 @@ where
|
||||
fn get_left_shift_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_left_shift_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_left_shift_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2633,14 +2617,12 @@ where
|
||||
fn get_right_shift_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_right_shift_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_right_shift_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2655,14 +2637,12 @@ where
|
||||
fn get_rotate_left_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_rotate_left_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_rotate_left_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -2677,14 +2657,12 @@ where
|
||||
fn get_rotate_right_size_on_gpu(&self, rhs: &Self) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key.key.key.get_rotate_right_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_rotate_right_size_on_gpu(
|
||||
&*self.ciphertext.on_gpu(streams),
|
||||
&rhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use crate::high_level_api::global_state;
|
||||
use crate::high_level_api::integers::FheUintId;
|
||||
use crate::high_level_api::keys::InternalServerKey;
|
||||
use crate::integer::block_decomposition::DecomposableInto;
|
||||
use crate::prelude::{CastInto, OverflowingAdd, OverflowingMul, OverflowingSub};
|
||||
use crate::prelude::{CastInto, OverflowingAdd, OverflowingMul, OverflowingNeg, OverflowingSub};
|
||||
use crate::{FheBool, FheUint};
|
||||
|
||||
impl<Id> OverflowingAdd<Self> for &FheUint<Id>
|
||||
@@ -530,3 +530,50 @@ where
|
||||
<&Self as OverflowingMul<&Self>>::overflowing_mul(&self, other)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Id> OverflowingNeg for &FheUint<Id>
|
||||
where
|
||||
Id: FheUintId,
|
||||
{
|
||||
type Output = FheUint<Id>;
|
||||
|
||||
fn overflowing_neg(self) -> (Self::Output, FheBool) {
|
||||
global_state::with_internal_keys(|key| match key {
|
||||
InternalServerKey::Cpu(cpu_key) => {
|
||||
let (result, overflow) = cpu_key
|
||||
.pbs_key()
|
||||
.overflowing_neg_parallelized(&*self.ciphertext.on_cpu());
|
||||
(
|
||||
FheUint::new(result, cpu_key.tag.clone()),
|
||||
FheBool::new(overflow, cpu_key.tag.clone()),
|
||||
)
|
||||
}
|
||||
#[cfg(feature = "gpu")]
|
||||
InternalServerKey::Cuda(cuda_key) => {
|
||||
let (result, overflow) = cuda_key.pbs_key().overflowing_neg(
|
||||
&*self.ciphertext.on_gpu(&cuda_key.streams),
|
||||
&cuda_key.streams,
|
||||
);
|
||||
(
|
||||
FheUint::new(result, cuda_key.tag.clone()),
|
||||
FheBool::new(overflow, cuda_key.tag.clone()),
|
||||
)
|
||||
}
|
||||
#[cfg(feature = "hpu")]
|
||||
InternalServerKey::Hpu(_device) => {
|
||||
panic!("Hpu does not support this overflowing_neg yet.")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<Id> OverflowingNeg for FheUint<Id>
|
||||
where
|
||||
Id: FheUintId,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
fn overflowing_neg(self) -> (Self::Output, FheBool) {
|
||||
<&Self as OverflowingNeg>::overflowing_neg(&self)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,8 +7,6 @@ use super::inner::RadixCiphertext;
|
||||
use crate::error::InvalidRangeError;
|
||||
use crate::high_level_api::errors::UnwrapResultExt;
|
||||
use crate::high_level_api::global_state;
|
||||
#[cfg(feature = "gpu")]
|
||||
use crate::high_level_api::global_state::with_cuda_internal_keys;
|
||||
use crate::high_level_api::integers::FheUintId;
|
||||
use crate::high_level_api::keys::InternalServerKey;
|
||||
#[cfg(feature = "gpu")]
|
||||
@@ -318,13 +316,11 @@ where
|
||||
fn get_gt_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_gt_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_gt_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -333,13 +329,11 @@ where
|
||||
fn get_ge_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_ge_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_ge_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -348,13 +342,11 @@ where
|
||||
fn get_lt_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_lt_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_lt_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -363,13 +355,11 @@ where
|
||||
fn get_le_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_le_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -386,13 +376,11 @@ where
|
||||
fn get_min_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_min_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_min_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -408,13 +396,11 @@ where
|
||||
fn get_max_size_on_gpu(&self, _rhs: Clear) -> u64 {
|
||||
global_state::with_internal_keys(|key| {
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_max_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
})
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key
|
||||
.key
|
||||
.key
|
||||
.get_scalar_max_size_on_gpu(&*self.ciphertext.on_gpu(streams), streams)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
@@ -937,13 +923,11 @@ macro_rules! define_scalar_rotate_shifts {
|
||||
|lhs: &FheUint<_>, _rhs| {
|
||||
global_state::with_internal_keys(|key|
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_scalar_left_shift_size_on_gpu(
|
||||
&*lhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
} else {
|
||||
0
|
||||
})
|
||||
@@ -996,13 +980,11 @@ macro_rules! define_scalar_rotate_shifts {
|
||||
|lhs: &FheUint<_>, _rhs| {
|
||||
global_state::with_internal_keys(|key|
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_scalar_right_shift_size_on_gpu(
|
||||
&*lhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
} else {
|
||||
0
|
||||
})
|
||||
@@ -1055,13 +1037,11 @@ macro_rules! define_scalar_rotate_shifts {
|
||||
|lhs: &FheUint<_>, _rhs| {
|
||||
global_state::with_internal_keys(|key|
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_scalar_rotate_left_size_on_gpu(
|
||||
&*lhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
} else {
|
||||
0
|
||||
})
|
||||
@@ -1114,13 +1094,11 @@ macro_rules! define_scalar_rotate_shifts {
|
||||
|lhs: &FheUint<_>, _rhs| {
|
||||
global_state::with_internal_keys(|key|
|
||||
if let InternalServerKey::Cuda(cuda_key) = key {
|
||||
with_cuda_internal_keys(|keys| {
|
||||
let streams = &keys.streams;
|
||||
let streams = &cuda_key.streams;
|
||||
cuda_key.key.key.get_scalar_rotate_right_size_on_gpu(
|
||||
&*lhs.ciphertext.on_gpu(streams),
|
||||
streams,
|
||||
)
|
||||
})
|
||||
} else {
|
||||
0
|
||||
})
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
use crate::high_level_api::traits::AddSizeOnGpu;
|
||||
use crate::prelude::{
|
||||
check_valid_cuda_malloc, BitAndSizeOnGpu, BitNotSizeOnGpu, BitOrSizeOnGpu, BitXorSizeOnGpu,
|
||||
FheMaxSizeOnGpu, FheMinSizeOnGpu, FheOrdSizeOnGpu, FheTryEncrypt, RotateLeftSizeOnGpu,
|
||||
RotateRightSizeOnGpu, ShlSizeOnGpu, ShrSizeOnGpu, SubSizeOnGpu,
|
||||
FheEncrypt, FheMaxSizeOnGpu, FheMinSizeOnGpu, FheOrdSizeOnGpu, FheTryEncrypt,
|
||||
IfThenElseSizeOnGpu, RotateLeftSizeOnGpu, RotateRightSizeOnGpu, ShlSizeOnGpu, ShrSizeOnGpu,
|
||||
SubSizeOnGpu,
|
||||
};
|
||||
use crate::shortint::parameters::{
|
||||
TestParameters, PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS,
|
||||
};
|
||||
use crate::shortint::ClassicPBSParameters;
|
||||
use crate::{set_server_key, ClientKey, ConfigBuilder, FheUint32, GpuIndex};
|
||||
use crate::{set_server_key, ClientKey, ConfigBuilder, FheBool, FheUint32, GpuIndex};
|
||||
use rand::Rng;
|
||||
|
||||
/// GPU setup for tests
|
||||
@@ -384,3 +385,35 @@ fn test_gpu_get_shift_rotate_size_on_gpu() {
|
||||
GpuIndex::new(0)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_gpu_get_if_then_else_size_on_gpu() {
|
||||
let cks = setup_gpu(Some(PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS));
|
||||
let mut rng = rand::thread_rng();
|
||||
let clear_a = rng.gen_range(1..=u32::MAX);
|
||||
let clear_b = rng.gen_range(1..=u32::MAX);
|
||||
let clear_c = rng.gen_range(0..=1);
|
||||
let mut a = FheUint32::try_encrypt(clear_a, &cks).unwrap();
|
||||
let mut b = FheUint32::try_encrypt(clear_b, &cks).unwrap();
|
||||
let c = FheBool::encrypt(clear_c != 0, &cks);
|
||||
a.move_to_current_device();
|
||||
b.move_to_current_device();
|
||||
let a = &a;
|
||||
let b = &b;
|
||||
|
||||
let if_then_else_tmp_buffer_size = c.get_if_then_else_size_on_gpu(a, b);
|
||||
assert!(check_valid_cuda_malloc(
|
||||
if_then_else_tmp_buffer_size,
|
||||
GpuIndex::new(0)
|
||||
));
|
||||
let select_tmp_buffer_size = c.get_select_size_on_gpu(a, b);
|
||||
assert!(check_valid_cuda_malloc(
|
||||
select_tmp_buffer_size,
|
||||
GpuIndex::new(0)
|
||||
));
|
||||
let cmux_tmp_buffer_size = c.get_cmux_size_on_gpu(a, b);
|
||||
assert!(check_valid_cuda_malloc(
|
||||
cmux_tmp_buffer_size,
|
||||
GpuIndex::new(0)
|
||||
));
|
||||
}
|
||||
|
||||
@@ -9,8 +9,8 @@
|
||||
pub use crate::high_level_api::traits::{
|
||||
BitSlice, CiphertextList, DivRem, FheDecrypt, FheEncrypt, FheEq, FheKeyswitch, FheMax, FheMin,
|
||||
FheOrd, FheTrivialEncrypt, FheTryEncrypt, FheTryTrivialEncrypt, FheWait, IfThenElse,
|
||||
OverflowingAdd, OverflowingMul, OverflowingSub, RotateLeft, RotateLeftAssign, RotateRight,
|
||||
RotateRightAssign, ScalarIfThenElse, SquashNoise, Tagged,
|
||||
OverflowingAdd, OverflowingMul, OverflowingNeg, OverflowingSub, RotateLeft, RotateLeftAssign,
|
||||
RotateRight, RotateRightAssign, ScalarIfThenElse, SquashNoise, Tagged,
|
||||
};
|
||||
#[cfg(feature = "hpu")]
|
||||
pub use crate::high_level_api::traits::{FheHpu, HpuHandle};
|
||||
@@ -27,6 +27,6 @@ pub use crate::high_level_api::strings::traits::*;
|
||||
#[cfg(feature = "gpu")]
|
||||
pub use crate::high_level_api::traits::{
|
||||
AddSizeOnGpu, BitAndSizeOnGpu, BitNotSizeOnGpu, BitOrSizeOnGpu, BitXorSizeOnGpu,
|
||||
FheMaxSizeOnGpu, FheMinSizeOnGpu, FheOrdSizeOnGpu, RotateLeftSizeOnGpu, RotateRightSizeOnGpu,
|
||||
ShlSizeOnGpu, ShrSizeOnGpu, SizeOnGpu, SubSizeOnGpu,
|
||||
FheMaxSizeOnGpu, FheMinSizeOnGpu, FheOrdSizeOnGpu, IfThenElseSizeOnGpu, RotateLeftSizeOnGpu,
|
||||
RotateRightSizeOnGpu, ShlSizeOnGpu, ShrSizeOnGpu, SizeOnGpu, SubSizeOnGpu,
|
||||
};
|
||||
|
||||
@@ -180,6 +180,12 @@ pub trait OverflowingMul<Rhs> {
|
||||
fn overflowing_mul(self, rhs: Rhs) -> (Self::Output, FheBool);
|
||||
}
|
||||
|
||||
pub trait OverflowingNeg {
|
||||
type Output;
|
||||
|
||||
fn overflowing_neg(self) -> (Self::Output, FheBool);
|
||||
}
|
||||
|
||||
pub trait BitSlice<Bounds> {
|
||||
type Output;
|
||||
|
||||
@@ -302,3 +308,14 @@ pub trait RotateLeftSizeOnGpu<Rhs = Self> {
|
||||
pub trait RotateRightSizeOnGpu<Rhs = Self> {
|
||||
fn get_rotate_right_size_on_gpu(&self, other: Rhs) -> u64;
|
||||
}
|
||||
|
||||
#[cfg(feature = "gpu")]
|
||||
pub trait IfThenElseSizeOnGpu<Ciphertext> {
|
||||
fn get_if_then_else_size_on_gpu(&self, ct_then: &Ciphertext, ct_else: &Ciphertext) -> u64;
|
||||
fn get_select_size_on_gpu(&self, ct_when_true: &Ciphertext, ct_when_false: &Ciphertext) -> u64 {
|
||||
self.get_if_then_else_size_on_gpu(ct_when_true, ct_when_false)
|
||||
}
|
||||
fn get_cmux_size_on_gpu(&self, ct_then: &Ciphertext, ct_else: &Ciphertext) -> u64 {
|
||||
self.get_if_then_else_size_on_gpu(ct_then, ct_else)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -77,40 +77,6 @@ impl CudaRadixCiphertextInfo {
|
||||
new_block_info
|
||||
}
|
||||
|
||||
pub(crate) fn after_extend_radix_with_trivial_zero_blocks_msb(
|
||||
&self,
|
||||
num_blocks: usize,
|
||||
) -> Self {
|
||||
assert!(num_blocks > 0);
|
||||
|
||||
let mut new_block_info = Self {
|
||||
blocks: Vec::with_capacity(self.blocks.len() + num_blocks),
|
||||
};
|
||||
for &b in self.blocks.iter() {
|
||||
new_block_info.blocks.push(b);
|
||||
}
|
||||
for _ in 0..num_blocks {
|
||||
new_block_info.blocks.push(CudaBlockInfo {
|
||||
degree: Degree::new(0),
|
||||
message_modulus: self.blocks.first().unwrap().message_modulus,
|
||||
carry_modulus: self.blocks.first().unwrap().carry_modulus,
|
||||
atomic_pattern: self.blocks.first().unwrap().atomic_pattern,
|
||||
noise_level: NoiseLevel::ZERO,
|
||||
});
|
||||
}
|
||||
new_block_info
|
||||
}
|
||||
|
||||
pub(crate) fn after_trim_radix_blocks_lsb(&self, num_blocks: usize) -> Self {
|
||||
let mut new_block_info = Self {
|
||||
blocks: Vec::with_capacity(self.blocks.len().saturating_sub(num_blocks)),
|
||||
};
|
||||
new_block_info
|
||||
.blocks
|
||||
.extend(self.blocks[num_blocks..].iter().copied());
|
||||
new_block_info
|
||||
}
|
||||
|
||||
pub(crate) fn after_trim_radix_blocks_msb(&self, num_blocks: usize) -> Self {
|
||||
assert!(num_blocks > 0);
|
||||
|
||||
|
||||
@@ -3608,6 +3608,60 @@ pub unsafe fn unchecked_cmux_integer_radix_kb_async<T: UnsignedInteger, B: Numer
|
||||
update_noise_degree(radix_lwe_out, &cuda_ffi_radix_lwe_out);
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn get_cmux_integer_radix_kb_size_on_gpu(
|
||||
streams: &CudaStreams,
|
||||
message_modulus: MessageModulus,
|
||||
carry_modulus: CarryModulus,
|
||||
glwe_dimension: GlweDimension,
|
||||
polynomial_size: PolynomialSize,
|
||||
big_lwe_dimension: LweDimension,
|
||||
small_lwe_dimension: LweDimension,
|
||||
ks_level: DecompositionLevelCount,
|
||||
ks_base_log: DecompositionBaseLog,
|
||||
pbs_level: DecompositionLevelCount,
|
||||
pbs_base_log: DecompositionBaseLog,
|
||||
num_blocks: u32,
|
||||
pbs_type: PBSType,
|
||||
grouping_factor: LweBskGroupingFactor,
|
||||
noise_reduction_key: Option<&CudaModulusSwitchNoiseReductionKey>,
|
||||
) -> u64 {
|
||||
let allocate_ms_noise_array = noise_reduction_key.is_some();
|
||||
let mut mem_ptr: *mut i8 = std::ptr::null_mut();
|
||||
let size_tracker = unsafe {
|
||||
scratch_cuda_integer_radix_cmux_kb_64(
|
||||
streams.ptr.as_ptr(),
|
||||
streams.gpu_indexes_ptr(),
|
||||
streams.len() as u32,
|
||||
std::ptr::addr_of_mut!(mem_ptr),
|
||||
glwe_dimension.0 as u32,
|
||||
polynomial_size.0 as u32,
|
||||
big_lwe_dimension.0 as u32,
|
||||
small_lwe_dimension.0 as u32,
|
||||
ks_level.0 as u32,
|
||||
ks_base_log.0 as u32,
|
||||
pbs_level.0 as u32,
|
||||
pbs_base_log.0 as u32,
|
||||
grouping_factor.0 as u32,
|
||||
num_blocks,
|
||||
message_modulus.0 as u32,
|
||||
carry_modulus.0 as u32,
|
||||
pbs_type as u32,
|
||||
false,
|
||||
allocate_ms_noise_array,
|
||||
)
|
||||
};
|
||||
unsafe {
|
||||
cleanup_cuda_integer_radix_cmux(
|
||||
streams.ptr.as_ptr(),
|
||||
streams.gpu_indexes_ptr(),
|
||||
streams.len() as u32,
|
||||
std::ptr::addr_of_mut!(mem_ptr),
|
||||
);
|
||||
}
|
||||
size_tracker
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
/// # Safety
|
||||
///
|
||||
@@ -3954,6 +4008,7 @@ pub unsafe fn unchecked_partial_sum_ciphertexts_integer_radix_kb_assign_async<
|
||||
streams: &CudaStreams,
|
||||
result: &mut CudaRadixCiphertext,
|
||||
radix_list: &mut CudaRadixCiphertext,
|
||||
reduce_degrees_for_single_carry_propagation: bool,
|
||||
bootstrapping_key: &CudaVec<B>,
|
||||
keyswitch_key: &CudaVec<T>,
|
||||
message_modulus: MessageModulus,
|
||||
@@ -4048,6 +4103,7 @@ pub unsafe fn unchecked_partial_sum_ciphertexts_integer_radix_kb_assign_async<
|
||||
streams.len() as u32,
|
||||
&raw mut cuda_ffi_result,
|
||||
&raw mut cuda_ffi_radix_list,
|
||||
reduce_degrees_for_single_carry_propagation,
|
||||
mem_ptr,
|
||||
bootstrapping_key.ptr.as_ptr(),
|
||||
keyswitch_key.ptr.as_ptr(),
|
||||
@@ -5387,3 +5443,60 @@ pub unsafe fn unchecked_negate_integer_radix_async(
|
||||
);
|
||||
update_noise_degree(radix_lwe_out, &cuda_ffi_radix_lwe_out);
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must not
|
||||
/// be dropped until streams is synchronized
|
||||
pub unsafe fn trim_radix_blocks_lsb_async(
|
||||
output: &mut CudaRadixCiphertext,
|
||||
input: &CudaRadixCiphertext,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
let mut input_degrees = input.info.blocks.iter().map(|b| b.degree.0).collect();
|
||||
let mut input_noise_levels = input.info.blocks.iter().map(|b| b.noise_level.0).collect();
|
||||
let mut output_degrees = output.info.blocks.iter().map(|b| b.degree.0).collect();
|
||||
let mut output_noise_levels = output.info.blocks.iter().map(|b| b.noise_level.0).collect();
|
||||
|
||||
let mut cuda_ffi_output =
|
||||
prepare_cuda_radix_ffi(output, &mut output_degrees, &mut output_noise_levels);
|
||||
|
||||
let cuda_ffi_input = prepare_cuda_radix_ffi(input, &mut input_degrees, &mut input_noise_levels);
|
||||
|
||||
trim_radix_blocks_lsb_64(
|
||||
&raw mut cuda_ffi_output,
|
||||
&raw const cuda_ffi_input,
|
||||
streams.ptr.as_ptr(),
|
||||
streams.gpu_indexes_ptr(),
|
||||
);
|
||||
update_noise_degree(output, &cuda_ffi_output);
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must not
|
||||
/// be dropped until streams is synchronized
|
||||
pub unsafe fn extend_radix_with_trivial_zero_blocks_msb_async(
|
||||
output: &mut CudaRadixCiphertext,
|
||||
input: &CudaRadixCiphertext,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
let mut input_degrees = input.info.blocks.iter().map(|b| b.degree.0).collect();
|
||||
let mut input_noise_levels = input.info.blocks.iter().map(|b| b.noise_level.0).collect();
|
||||
let mut output_degrees = output.info.blocks.iter().map(|b| b.degree.0).collect();
|
||||
let mut output_noise_levels = output.info.blocks.iter().map(|b| b.noise_level.0).collect();
|
||||
|
||||
let mut cuda_ffi_output =
|
||||
prepare_cuda_radix_ffi(output, &mut output_degrees, &mut output_noise_levels);
|
||||
|
||||
let cuda_ffi_input = prepare_cuda_radix_ffi(input, &mut input_degrees, &mut input_noise_levels);
|
||||
|
||||
extend_radix_with_trivial_zero_blocks_msb_64(
|
||||
&raw mut cuda_ffi_output,
|
||||
&raw const cuda_ffi_input,
|
||||
streams.ptr.as_ptr(),
|
||||
streams.gpu_indexes_ptr(),
|
||||
);
|
||||
|
||||
update_noise_degree(output, &cuda_ffi_output);
|
||||
}
|
||||
|
||||
@@ -337,6 +337,7 @@ impl CudaServerKey {
|
||||
&self,
|
||||
result: &mut T,
|
||||
ciphertexts: &[T],
|
||||
reduce_degrees_for_single_carry_propagation: bool,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
if ciphertexts.is_empty() {
|
||||
@@ -377,6 +378,7 @@ impl CudaServerKey {
|
||||
streams,
|
||||
result.as_mut(),
|
||||
&mut terms,
|
||||
reduce_degrees_for_single_carry_propagation,
|
||||
&d_bsk.d_vec,
|
||||
&self.key_switching_key.d_vec,
|
||||
self.message_modulus,
|
||||
@@ -402,6 +404,7 @@ impl CudaServerKey {
|
||||
streams,
|
||||
result.as_mut(),
|
||||
&mut terms,
|
||||
reduce_degrees_for_single_carry_propagation,
|
||||
&d_multibit_bsk.d_vec,
|
||||
&self.key_switching_key.d_vec,
|
||||
self.message_modulus,
|
||||
@@ -445,7 +448,7 @@ impl CudaServerKey {
|
||||
streams: &CudaStreams,
|
||||
) -> T {
|
||||
let mut result = self
|
||||
.unchecked_partial_sum_ciphertexts_async(ciphertexts, streams)
|
||||
.unchecked_partial_sum_ciphertexts_async(ciphertexts, true, streams)
|
||||
.unwrap();
|
||||
|
||||
self.propagate_single_carry_assign_async(&mut result, streams, None, OutputFlag::None);
|
||||
@@ -458,7 +461,8 @@ impl CudaServerKey {
|
||||
ciphertexts: &[T],
|
||||
streams: &CudaStreams,
|
||||
) -> Option<T> {
|
||||
let result = unsafe { self.unchecked_partial_sum_ciphertexts_async(ciphertexts, streams) };
|
||||
let result = unsafe { self.unchecked_partial_sum_ciphertexts_async(ciphertexts,
|
||||
false, streams) };
|
||||
streams.synchronize();
|
||||
result
|
||||
}
|
||||
@@ -470,6 +474,7 @@ impl CudaServerKey {
|
||||
pub unsafe fn unchecked_partial_sum_ciphertexts_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ciphertexts: &[T],
|
||||
reduce_degrees_for_single_carry_propagation: bool,
|
||||
streams: &CudaStreams,
|
||||
) -> Option<T> {
|
||||
if ciphertexts.is_empty() {
|
||||
@@ -483,7 +488,8 @@ impl CudaServerKey {
|
||||
return Some(result);
|
||||
}
|
||||
|
||||
self.unchecked_partial_sum_ciphertexts_assign_async(&mut result, ciphertexts, streams);
|
||||
self.unchecked_partial_sum_ciphertexts_assign_async(&mut result, ciphertexts,
|
||||
reduce_degrees_for_single_carry_propagation, streams);
|
||||
|
||||
Some(result)
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ use crate::core_crypto::gpu::algorithms::{
|
||||
use crate::core_crypto::gpu::vec::CudaVec;
|
||||
use crate::core_crypto::gpu::CudaStreams;
|
||||
use crate::core_crypto::prelude::LweBskGroupingFactor;
|
||||
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
|
||||
use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
|
||||
use crate::integer::gpu::server_key::CudaBootstrappingKey;
|
||||
use crate::integer::gpu::{
|
||||
@@ -96,6 +97,31 @@ impl CudaServerKey {
|
||||
ct.as_mut().info = ct.as_ref().info.after_bitnot();
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn unchecked_boolean_bitnot_assign_async(
|
||||
&self,
|
||||
ct: &mut CudaBooleanBlock,
|
||||
streams: &CudaStreams,
|
||||
) {
|
||||
// We do (-ciphertext) + (msg_mod -1) as it allows to avoid an allocation
|
||||
cuda_lwe_ciphertext_negate_assign(&mut ct.0.as_mut().d_blocks, streams);
|
||||
|
||||
let ct_blocks = ct.0.as_ref().d_blocks.lwe_ciphertext_count().0;
|
||||
|
||||
let shift_plaintext = self.encoding().encode(Cleartext(1u64)).0;
|
||||
|
||||
let scalar_vector = vec![shift_plaintext; ct_blocks];
|
||||
let mut d_decomposed_scalar =
|
||||
CudaVec::<u64>::new_async(ct.0.as_ref().d_blocks.lwe_ciphertext_count().0, streams, 0);
|
||||
d_decomposed_scalar.copy_from_cpu_async(scalar_vector.as_slice(), streams, 0);
|
||||
|
||||
cuda_lwe_ciphertext_plaintext_add_assign(
|
||||
&mut ct.0.as_mut().d_blocks,
|
||||
&d_decomposed_scalar,
|
||||
streams,
|
||||
);
|
||||
// Neither noise level nor the degree changes
|
||||
}
|
||||
|
||||
pub fn unchecked_bitnot_assign<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
@@ -165,7 +191,7 @@ impl CudaServerKey {
|
||||
/// # Safety
|
||||
///
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
/// not be dropped until streams is synchronized
|
||||
pub unsafe fn unchecked_bitop_assign_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
@@ -560,7 +586,7 @@ impl CudaServerKey {
|
||||
/// # Safety
|
||||
///
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
/// not be dropped until streams is synchronized
|
||||
pub unsafe fn bitand_assign_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
@@ -666,7 +692,7 @@ impl CudaServerKey {
|
||||
/// # Safety
|
||||
///
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
/// not be dropped until streams is synchronized
|
||||
pub unsafe fn bitor_assign_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
@@ -771,7 +797,7 @@ impl CudaServerKey {
|
||||
/// # Safety
|
||||
///
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
/// not be dropped until streams is synchronized
|
||||
pub unsafe fn bitxor_assign_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct_left: &mut T,
|
||||
@@ -869,7 +895,7 @@ impl CudaServerKey {
|
||||
/// # Safety
|
||||
///
|
||||
/// - `streams` __must__ be synchronized to guarantee computation has finished, and inputs must
|
||||
/// not be dropped until streams is synchronised
|
||||
/// not be dropped until streams is synchronized
|
||||
pub unsafe fn bitnot_assign_async<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
ct: &mut T,
|
||||
|
||||
@@ -3,7 +3,10 @@ use crate::core_crypto::prelude::LweBskGroupingFactor;
|
||||
use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
|
||||
use crate::integer::gpu::ciphertext::CudaIntegerRadixCiphertext;
|
||||
use crate::integer::gpu::server_key::CudaBootstrappingKey;
|
||||
use crate::integer::gpu::{unchecked_cmux_integer_radix_kb_async, CudaServerKey, PBSType};
|
||||
use crate::integer::gpu::{
|
||||
get_cmux_integer_radix_kb_size_on_gpu, get_full_propagate_assign_size_on_gpu,
|
||||
unchecked_cmux_integer_radix_kb_async, CudaServerKey, PBSType,
|
||||
};
|
||||
|
||||
impl CudaServerKey {
|
||||
/// # Safety
|
||||
@@ -131,4 +134,113 @@ impl CudaServerKey {
|
||||
|
||||
self.unchecked_if_then_else(condition, true_ct, false_ct, stream)
|
||||
}
|
||||
pub fn get_if_then_else_size_on_gpu<T: CudaIntegerRadixCiphertext>(
|
||||
&self,
|
||||
_condition: &CudaBooleanBlock,
|
||||
true_ct: &T,
|
||||
false_ct: &T,
|
||||
streams: &CudaStreams,
|
||||
) -> u64 {
|
||||
assert_eq!(
|
||||
true_ct.as_ref().d_blocks.lwe_dimension(),
|
||||
false_ct.as_ref().d_blocks.lwe_dimension()
|
||||
);
|
||||
assert_eq!(
|
||||
true_ct.as_ref().d_blocks.lwe_ciphertext_count(),
|
||||
false_ct.as_ref().d_blocks.lwe_ciphertext_count()
|
||||
);
|
||||
let full_prop_mem = match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => get_full_propagate_assign_size_on_gpu(
|
||||
streams,
|
||||
d_bsk.input_lwe_dimension(),
|
||||
d_bsk.glwe_dimension(),
|
||||
d_bsk.polynomial_size(),
|
||||
self.key_switching_key.decomposition_level_count(),
|
||||
self.key_switching_key.decomposition_base_log(),
|
||||
d_bsk.decomp_level_count(),
|
||||
d_bsk.decomp_base_log(),
|
||||
self.message_modulus,
|
||||
self.carry_modulus,
|
||||
PBSType::Classical,
|
||||
LweBskGroupingFactor(0),
|
||||
d_bsk.d_ms_noise_reduction_key.as_ref(),
|
||||
),
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
get_full_propagate_assign_size_on_gpu(
|
||||
streams,
|
||||
d_multibit_bsk.input_lwe_dimension(),
|
||||
d_multibit_bsk.glwe_dimension(),
|
||||
d_multibit_bsk.polynomial_size(),
|
||||
self.key_switching_key.decomposition_level_count(),
|
||||
self.key_switching_key.decomposition_base_log(),
|
||||
d_multibit_bsk.decomp_level_count(),
|
||||
d_multibit_bsk.decomp_base_log(),
|
||||
self.message_modulus,
|
||||
self.carry_modulus,
|
||||
PBSType::MultiBit,
|
||||
d_multibit_bsk.grouping_factor,
|
||||
None,
|
||||
)
|
||||
}
|
||||
};
|
||||
let actual_full_prop_mem = match (
|
||||
true_ct.block_carries_are_empty(),
|
||||
false_ct.block_carries_are_empty(),
|
||||
) {
|
||||
(true, true) => 0,
|
||||
(true, false) => self.get_ciphertext_size_on_gpu(true_ct) + full_prop_mem,
|
||||
(false, true) => full_prop_mem,
|
||||
(false, false) => self.get_ciphertext_size_on_gpu(false_ct) + full_prop_mem,
|
||||
};
|
||||
|
||||
let lwe_ciphertext_count = true_ct.as_ref().d_blocks.lwe_ciphertext_count();
|
||||
|
||||
let cmux_mem = match &self.bootstrapping_key {
|
||||
CudaBootstrappingKey::Classic(d_bsk) => get_cmux_integer_radix_kb_size_on_gpu(
|
||||
streams,
|
||||
self.message_modulus,
|
||||
self.carry_modulus,
|
||||
d_bsk.glwe_dimension,
|
||||
d_bsk.polynomial_size,
|
||||
self.key_switching_key
|
||||
.input_key_lwe_size()
|
||||
.to_lwe_dimension(),
|
||||
self.key_switching_key
|
||||
.output_key_lwe_size()
|
||||
.to_lwe_dimension(),
|
||||
self.key_switching_key.decomposition_level_count(),
|
||||
self.key_switching_key.decomposition_base_log(),
|
||||
d_bsk.decomp_level_count,
|
||||
d_bsk.decomp_base_log,
|
||||
lwe_ciphertext_count.0 as u32,
|
||||
PBSType::Classical,
|
||||
LweBskGroupingFactor(0),
|
||||
d_bsk.d_ms_noise_reduction_key.as_ref(),
|
||||
),
|
||||
CudaBootstrappingKey::MultiBit(d_multibit_bsk) => {
|
||||
get_cmux_integer_radix_kb_size_on_gpu(
|
||||
streams,
|
||||
self.message_modulus,
|
||||
self.carry_modulus,
|
||||
d_multibit_bsk.glwe_dimension,
|
||||
d_multibit_bsk.polynomial_size,
|
||||
self.key_switching_key
|
||||
.input_key_lwe_size()
|
||||
.to_lwe_dimension(),
|
||||
self.key_switching_key
|
||||
.output_key_lwe_size()
|
||||
.to_lwe_dimension(),
|
||||
self.key_switching_key.decomposition_level_count(),
|
||||
self.key_switching_key.decomposition_base_log(),
|
||||
d_multibit_bsk.decomp_level_count,
|
||||
d_multibit_bsk.decomp_base_log,
|
||||
lwe_ciphertext_count.0 as u32,
|
||||
PBSType::MultiBit,
|
||||
d_multibit_bsk.grouping_factor,
|
||||
None,
|
||||
)
|
||||
}
|
||||
};
|
||||
actual_full_prop_mem.max(cmux_mem)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,8 +93,7 @@ impl CudaServerKey {
|
||||
},
|
||||
);
|
||||
|
||||
let mut output_cts: T =
|
||||
self.create_trivial_zero_radix_async(num_ct_blocks * num_ct_blocks, streams);
|
||||
let mut output_cts: T = self.create_trivial_zero_radix_async(num_ct_blocks, streams);
|
||||
|
||||
self.compute_prefix_sum_hillis_steele_async(
|
||||
output_cts.as_mut(),
|
||||
@@ -455,7 +454,7 @@ impl CudaServerKey {
|
||||
cts.push(new_trivial);
|
||||
|
||||
let result = self
|
||||
.unchecked_partial_sum_ciphertexts_async(&cts, streams)
|
||||
.unchecked_partial_sum_ciphertexts_async(&cts, false, streams)
|
||||
.expect("internal error, empty ciphertext count");
|
||||
|
||||
// This is the part where we extract message and carry blocks
|
||||
@@ -497,28 +496,6 @@ impl CudaServerKey {
|
||||
.as_mut_slice(0..lwe_size, 0)
|
||||
.unwrap();
|
||||
|
||||
let mut carry_blocks_last = carry_blocks
|
||||
.as_mut()
|
||||
.d_blocks
|
||||
.0
|
||||
.d_vec
|
||||
.as_mut_slice(
|
||||
lwe_size * (counter_num_blocks - 1)..lwe_size * counter_num_blocks,
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
carry_blocks_last.copy_from_gpu_async(&trivial_last_block_slice, streams, 0);
|
||||
carry_blocks.as_mut().info.blocks.last_mut().unwrap().degree =
|
||||
Degree(self.message_modulus.0 - 1);
|
||||
carry_blocks
|
||||
.as_mut()
|
||||
.info
|
||||
.blocks
|
||||
.last_mut()
|
||||
.unwrap()
|
||||
.noise_level = NoiseLevel::ZERO;
|
||||
|
||||
self.apply_lookup_table_async(
|
||||
carry_blocks.as_mut(),
|
||||
result.as_ref(),
|
||||
@@ -527,10 +504,43 @@ impl CudaServerKey {
|
||||
streams,
|
||||
);
|
||||
|
||||
let mut rotated_carry_blocks: CudaSignedRadixCiphertext =
|
||||
self.create_trivial_zero_radix(counter_num_blocks, streams);
|
||||
|
||||
let mut rotated_slice = rotated_carry_blocks
|
||||
.as_mut()
|
||||
.d_blocks
|
||||
.0
|
||||
.d_vec
|
||||
.as_mut_slice(0..(counter_num_blocks) * lwe_size, 0)
|
||||
.unwrap();
|
||||
|
||||
let first_block;
|
||||
let last_blocks;
|
||||
(first_block, last_blocks) = rotated_slice.split_at_mut(lwe_size, 0);
|
||||
|
||||
let mut tmp_carry_blocks3 = carry_blocks.duplicate(streams);
|
||||
let carry_slice = tmp_carry_blocks3
|
||||
.as_mut()
|
||||
.d_blocks
|
||||
.0
|
||||
.d_vec
|
||||
.as_mut_slice(0..(counter_num_blocks - 1) * lwe_size, 0)
|
||||
.unwrap();
|
||||
|
||||
last_blocks
|
||||
.unwrap()
|
||||
.copy_from_gpu_async(&carry_slice, streams, 0);
|
||||
first_block
|
||||
.unwrap()
|
||||
.copy_from_gpu_async(&trivial_last_block_slice, streams, 0);
|
||||
let mut ciphertexts = Vec::<CudaSignedRadixCiphertext>::with_capacity(3);
|
||||
|
||||
for mut info in &mut rotated_carry_blocks.ciphertext.info.blocks {
|
||||
info.degree = Degree(self.message_modulus.0 - 1);
|
||||
}
|
||||
ciphertexts.push(message_blocks);
|
||||
ciphertexts.push(carry_blocks);
|
||||
ciphertexts.push(rotated_carry_blocks);
|
||||
|
||||
let trivial_ct: CudaSignedRadixCiphertext =
|
||||
self.create_trivial_radix_async(2u32, counter_num_blocks, streams);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user