mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-11 15:48:20 -05:00
Compare commits
85 Commits
chore/rust
...
al/debug_s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
071934532e | ||
|
|
87b41ebbe5 | ||
|
|
7ba7dd3e19 | ||
|
|
7f122bb435 | ||
|
|
56c0811751 | ||
|
|
580ac5aa99 | ||
|
|
cf34e9b7f0 | ||
|
|
856fc1a709 | ||
|
|
fe0a195630 | ||
|
|
aca7e79585 | ||
|
|
c0e89a53ef | ||
|
|
312952007f | ||
|
|
ff51ed3f34 | ||
|
|
9737bdcb98 | ||
|
|
87a43a4900 | ||
|
|
345bdbf17f | ||
|
|
cc54ba2236 | ||
|
|
11df6c69ee | ||
|
|
b76f4dbfe0 | ||
|
|
be21c15c80 | ||
|
|
aa51b25313 | ||
|
|
300c95fe3d | ||
|
|
524adda8f6 | ||
|
|
dedcf205b4 | ||
|
|
2c8d4c0fb0 | ||
|
|
3370fb5b7e | ||
|
|
cd77eac42b | ||
|
|
40f20b4ecb | ||
|
|
59a78c76a9 | ||
|
|
1025246b17 | ||
|
|
338e9eaeef | ||
|
|
0bec4d2ba1 | ||
|
|
c5fab98900 | ||
|
|
14e1ee5bd3 | ||
|
|
52bc778629 | ||
|
|
10405c9836 | ||
|
|
5eaf6cec55 | ||
|
|
3bfacc1e9d | ||
|
|
a47a418d41 | ||
|
|
75b3141e19 | ||
|
|
d01328e0fe | ||
|
|
6e102b5fa1 | ||
|
|
8aa6fa514e | ||
|
|
21a19cd3c5 | ||
|
|
f51c70d536 | ||
|
|
66e3c02838 | ||
|
|
408e81c45a | ||
|
|
4152906c5d | ||
|
|
9fc8a0b5bc | ||
|
|
5dc3e59d13 | ||
|
|
b40996a7e5 | ||
|
|
b066ef19fa | ||
|
|
25d008bae8 | ||
|
|
2749c1088c | ||
|
|
c19cd9f021 | ||
|
|
45fdba04b1 | ||
|
|
69d46810b8 | ||
|
|
a16eeb983f | ||
|
|
8278a9373c | ||
|
|
e2a2768484 | ||
|
|
57cfc38b66 | ||
|
|
259d125434 | ||
|
|
2571196b41 | ||
|
|
9f3dc6167d | ||
|
|
59c17692a3 | ||
|
|
e29d615b9d | ||
|
|
8caff604ed | ||
|
|
16badf0c00 | ||
|
|
99a27c1cbe | ||
|
|
9131aaa383 | ||
|
|
a01949e630 | ||
|
|
30a58cdd1a | ||
|
|
03325bf94e | ||
|
|
786fe66495 | ||
|
|
9ee8259002 | ||
|
|
a7d8d2b1d4 | ||
|
|
8d1058364c | ||
|
|
0280dbeb41 | ||
|
|
97b5973e4c | ||
|
|
406425dca4 | ||
|
|
fd79c4f972 | ||
|
|
a96970e8c3 | ||
|
|
67f11a44df | ||
|
|
aa6dadfe69 | ||
|
|
ca1c5659a1 |
1
.github/actionlint.yaml
vendored
1
.github/actionlint.yaml
vendored
@@ -6,6 +6,7 @@ self-hosted-runner:
|
||||
- large_windows_16_latest
|
||||
- large_ubuntu_16
|
||||
- large_ubuntu_16-22.04
|
||||
- v80-desktop
|
||||
# Configuration variables in array of strings defined in your repository or
|
||||
# organization. `null` means disabling configuration variables check.
|
||||
# Empty array means no configuration variable is allowed.
|
||||
|
||||
4
.github/actions/gpu_setup/action.yml
vendored
4
.github/actions/gpu_setup/action.yml
vendored
@@ -33,7 +33,9 @@ runs:
|
||||
if: inputs.github-instance == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
TOOLKIT_VERSION="$(echo ${CUDA_VERSION} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
|
||||
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
|
||||
# shellcheck disable=SC2001
|
||||
TOOLKIT_VERSION="$(echo "${CUDA_VERSION}" | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/${env.CUDA_KEYRING_PACKAGE}
|
||||
echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
|
||||
sha256sum -c checksum
|
||||
|
||||
@@ -23,8 +23,8 @@ on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
@@ -67,7 +67,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -126,9 +126,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
10
.github/workflows/aws_tfhe_fast_tests.yml
vendored
10
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -24,8 +24,8 @@ on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -174,7 +174,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -182,6 +182,7 @@ jobs:
|
||||
if: needs.should-run.outputs.csprng_test == 'true'
|
||||
run: |
|
||||
make test_tfhe_csprng
|
||||
make test_tfhe_csprng_big_endian
|
||||
|
||||
- name: Run tfhe-zk-pok tests
|
||||
if: needs.should-run.outputs.zk_pok_test == 'true'
|
||||
@@ -272,9 +273,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
|
||||
|
||||
9
.github/workflows/aws_tfhe_integer_tests.yml
vendored
9
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -30,8 +30,8 @@ on:
|
||||
branches:
|
||||
- main
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -114,7 +114,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -142,9 +142,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
@@ -30,8 +30,8 @@ on:
|
||||
branches:
|
||||
- main
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -115,7 +115,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -147,9 +147,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
9
.github/workflows/aws_tfhe_tests.yml
vendored
9
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -27,8 +27,8 @@ on:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -185,7 +185,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -254,9 +254,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
9
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
9
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -23,8 +23,8 @@ on:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -123,9 +123,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
16
.github/workflows/benchmark_boolean.yml
vendored
16
.github/workflows/benchmark_boolean.yml
vendored
@@ -58,14 +58,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -93,7 +96,7 @@ jobs:
|
||||
|
||||
- name: Parse key sizes results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/boolean_key_sizes.csv "${RESULTS_FILENAME}" \
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/boolean_key_sizes.csv "${RESULTS_FILENAME}" \
|
||||
--object-sizes \
|
||||
--append-results
|
||||
|
||||
@@ -114,8 +117,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
14
.github/workflows/benchmark_core_crypto.yml
vendored
14
.github/workflows/benchmark_core_crypto.yml
vendored
@@ -58,14 +58,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -107,8 +110,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
18
.github/workflows/benchmark_dex.yml
vendored
18
.github/workflows/benchmark_dex.yml
vendored
@@ -58,14 +58,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -97,13 +100,13 @@ jobs:
|
||||
|
||||
- name: Parse swap request PBS counts
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/dex_swap_request_pbs_count.csv "${RESULTS_FILENAME}" \
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_request_pbs_count.csv "${RESULTS_FILENAME}" \
|
||||
--object-sizes \
|
||||
--append-results
|
||||
|
||||
- name: Parse swap claim PBS counts
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/dex_swap_claim_pbs_count.csv "${RESULTS_FILENAME}" \
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_claim_pbs_count.csv "${RESULTS_FILENAME}" \
|
||||
--object-sizes \
|
||||
--append-results
|
||||
|
||||
@@ -116,8 +119,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
16
.github/workflows/benchmark_erc20.yml
vendored
16
.github/workflows/benchmark_erc20.yml
vendored
@@ -59,14 +59,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -98,7 +101,7 @@ jobs:
|
||||
|
||||
- name: Parse PBS counts
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/erc20_pbs_count.csv "${RESULTS_FILENAME}" \
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/erc20_pbs_count.csv "${RESULTS_FILENAME}" \
|
||||
--object-sizes \
|
||||
--append-results
|
||||
|
||||
@@ -111,8 +114,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
3
.github/workflows/benchmark_gpu.yml
vendored
3
.github/workflows/benchmark_gpu.yml
vendored
@@ -10,13 +10,14 @@ on:
|
||||
type: choice
|
||||
options:
|
||||
- "l40 (n3-L40x1)"
|
||||
- "4-l40 (n3-L40x4)"
|
||||
- "multi-a100-nvlink (n3-A100x8-NVLink)"
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "4-h100 (n3-H100x4)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
- "multi-a100-nvlink (n3-A100x8-NVLink)"
|
||||
command:
|
||||
description: "Benchmark command to run"
|
||||
type: choice
|
||||
|
||||
34
.github/workflows/benchmark_gpu_4090.yml
vendored
34
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -22,8 +22,8 @@ on:
|
||||
# Weekly benchmarks will be triggered each Friday at 9p.m.
|
||||
- cron: "0 21 * * 5"
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
cuda-integer-benchmarks:
|
||||
@@ -46,15 +46,18 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
echo "FAST_BENCH=TRUE";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -93,8 +96,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
@@ -124,14 +130,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -170,8 +179,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
48
.github/workflows/benchmark_gpu_common.yml
vendored
48
.github/workflows/benchmark_gpu_common.yml
vendored
@@ -84,7 +84,7 @@ jobs:
|
||||
run: |
|
||||
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
|
||||
# shellcheck disable=SC2001
|
||||
PARSED_COMMAND=$(echo "${INPUTS_COMMAND}" | sed 's/[[:space:]]*,[[:space:]]*/\\", \\"/g')
|
||||
PARSED_COMMAND=$(echo "${INPUTS_COMMAND}" | sed 's/[[:space:]]*,[[:space:]]*/\", \"/g')
|
||||
echo "COMMAND=[\"${PARSED_COMMAND}\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set single operations flavor
|
||||
@@ -120,26 +120,33 @@ jobs:
|
||||
env:
|
||||
INPUTS_PARAMS_TYPE: ${{ inputs.params_type }}
|
||||
|
||||
|
||||
- name: Set command output
|
||||
id: set_command
|
||||
run: |
|
||||
echo "command=${{ toJSON(env.COMMAND) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "command=${COMMAND_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
COMMAND_OUTPUT: ${{ toJSON(env.COMMAND) }}
|
||||
|
||||
- name: Set operation flavor output
|
||||
id: set_op_flavor
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "op_flavor=${OP_FLAVOR_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
OP_FLAVOR_OUTPUT: ${{ toJSON(env.OP_FLAVOR) }}
|
||||
|
||||
- name: Set benchmark types output
|
||||
id: set_bench_type
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "bench_type=${BENCH_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
BENCH_TYPE_OUTPUT: ${{ toJSON(env.BENCH_TYPE) }}
|
||||
|
||||
- name: Set parameters types output
|
||||
id: set_params_type
|
||||
run: |
|
||||
echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "params_type=${PARAMS_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
PARAMS_TYPE_OUTPUT: ${{ toJSON(env.PARAMS_TYPE) }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-${{ inputs.profile }}-benchmarks)
|
||||
@@ -227,6 +234,8 @@ jobs:
|
||||
include:
|
||||
- cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
@@ -237,18 +246,20 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
# Re-export environment variables as dependencies setup perform this task in the previous job.
|
||||
# Local env variables are cleaned at the end of each job.
|
||||
- name: Export CUDA variables
|
||||
shell: bash
|
||||
run: |
|
||||
CUDA_PATH=/usr/local/cuda-${{ matrix.cuda }}
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
@@ -258,13 +269,15 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
||||
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
GCC_VERSION: ${{ matrix.gcc }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -275,11 +288,11 @@ jobs:
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" BENCH_PARAM_TYPE="${PARAMS_TYPE}" bench_"${COMMAND}"_gpu
|
||||
make BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" BENCH_PARAM_TYPE="${BENCH_PARAMS_TYPE}" bench_"${BENCH_COMMAND}"_gpu
|
||||
env:
|
||||
OP_FLAVOR: ${{ matrix.op_flavor }}
|
||||
BENCH_TYPE: ${{ matrix.bench_type }}
|
||||
BENCH_PARAM_TYPE: ${{ matrix.params_type }}
|
||||
BENCH_PARAMS_TYPE: ${{ matrix.params_type }}
|
||||
BENCH_COMMAND: ${{ matrix.command }}
|
||||
|
||||
- name: Parse results
|
||||
@@ -317,8 +330,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
|
||||
2
.github/workflows/benchmark_gpu_dex.yml
vendored
2
.github/workflows/benchmark_gpu_dex.yml
vendored
@@ -10,6 +10,8 @@ on:
|
||||
type: choice
|
||||
options:
|
||||
- "l40 (n3-L40x1)"
|
||||
- "4-l40 (n3-L40x4)"
|
||||
- "multi-a100-nvlink (n3-A100x8-NVLink)"
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "4-h100 (n3-H100x4)"
|
||||
|
||||
14
.github/workflows/benchmark_gpu_dex_common.yml
vendored
14
.github/workflows/benchmark_gpu_dex_common.yml
vendored
@@ -119,14 +119,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -167,8 +170,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
|
||||
2
.github/workflows/benchmark_gpu_erc20.yml
vendored
2
.github/workflows/benchmark_gpu_erc20.yml
vendored
@@ -10,6 +10,8 @@ on:
|
||||
type: choice
|
||||
options:
|
||||
- "l40 (n3-L40x1)"
|
||||
- "4-l40 (n3-L40x4)"
|
||||
- "multi-a100-nvlink (n3-A100x8-NVLink)"
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "4-h100 (n3-H100x4)"
|
||||
|
||||
14
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
14
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
@@ -120,14 +120,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -168,8 +171,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
|
||||
94
.github/workflows/benchmark_hpu_integer.yml
vendored
Normal file
94
.github/workflows/benchmark_hpu_integer.yml
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
# Run all integer benchmarks on a permanent HPU instance and return parsed results to Slab CI bot.
|
||||
name: Hpu Integer Benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
integer-benchmarks-hpu:
|
||||
name: Execute integer & erc20 benchmarks for HPU backend
|
||||
runs-on: v80-desktop
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
steps:
|
||||
# Needed as long as hw_regmap repository is private
|
||||
- name: Configure SSH
|
||||
uses: webfactory/ssh-agent@a6f90b1f127823b31d4d4a8d96047790581349bd # v0.9.1
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make bench_integer_hpu
|
||||
make bench_hlapi_erc20_hpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
|
||||
--database tfhe_rs \
|
||||
--hardware "hpu_x1" \
|
||||
--backend hpu \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch "${REF_NAME}" \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${BENCH_DATE}" \
|
||||
--walk-subdirs
|
||||
env:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_benchmarks
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
22
.github/workflows/benchmark_integer.yml
vendored
22
.github/workflows/benchmark_integer.yml
vendored
@@ -79,12 +79,16 @@ jobs:
|
||||
- name: Set operation flavor output
|
||||
id: set_op_flavor
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "op_flavor=${OP_FLAVOR_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
OP_FLAVOR_OUTPUT: ${{ toJSON(env.OP_FLAVOR) }}
|
||||
|
||||
- name: Set benchmark types output
|
||||
id: set_bench_type
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "bench_type=${BENCH_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
BENCH_TYPE_OUTPUT: ${{ toJSON(env.BENCH_TYPE) }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (integer-benchmarks)
|
||||
@@ -128,14 +132,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -193,8 +200,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
20
.github/workflows/benchmark_shortint.yml
vendored
20
.github/workflows/benchmark_shortint.yml
vendored
@@ -48,7 +48,9 @@ jobs:
|
||||
- name: Set operation flavor output
|
||||
id: set_op_flavor
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "op_flavor=${OP_FLAVOR_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
OP_FLAVOR_OUTPUT: ${{ toJSON(env.OP_FLAVOR) }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (shortint-benchmarks)
|
||||
@@ -89,14 +91,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -137,7 +142,7 @@ jobs:
|
||||
- name: Parse key sizes results
|
||||
if: matrix.op_flavor == 'default'
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/shortint_key_sizes.csv "${RESULTS_FILENAME}" \
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/shortint_key_sizes.csv "${RESULTS_FILENAME}" \
|
||||
--object-sizes \
|
||||
--append-results
|
||||
|
||||
@@ -150,8 +155,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
22
.github/workflows/benchmark_signed_integer.yml
vendored
22
.github/workflows/benchmark_signed_integer.yml
vendored
@@ -79,12 +79,16 @@ jobs:
|
||||
- name: Set operation flavor output
|
||||
id: set_op_flavor
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "op_flavor=${OP_FLAVOR_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
OP_FLAVOR_OUTPUT: ${{ toJSON(env.OP_FLAVOR) }}
|
||||
|
||||
- name: Set benchmark types output
|
||||
id: set_bench_type
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "bench_type=${BENCH_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
BENCH_TYPE_OUTPUT: ${{ toJSON(env.BENCH_TYPE) }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (signed-integer-benchmarks)
|
||||
@@ -128,14 +132,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -185,8 +192,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
12
.github/workflows/benchmark_tfhe_fft.yml
vendored
12
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -61,11 +61,14 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
@@ -107,8 +110,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
12
.github/workflows/benchmark_tfhe_ntt.yml
vendored
12
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -61,11 +61,14 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
@@ -107,8 +110,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
14
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
14
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
@@ -98,14 +98,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -155,8 +158,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
30
.github/workflows/benchmark_wasm_client.yml
vendored
30
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -96,14 +96,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -136,17 +139,21 @@ jobs:
|
||||
|
||||
- name: Install web resources
|
||||
run: |
|
||||
make install_${{ matrix.browser }}_browser
|
||||
make install_${{ matrix.browser }}_web_driver
|
||||
make install_"${BROWSER}"_browser
|
||||
make install_"${BROWSER}"_web_driver
|
||||
env:
|
||||
BROWSER: ${{ matrix.browser }}
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make bench_web_js_api_parallel_${{ matrix.browser }}_ci
|
||||
make bench_web_js_api_parallel_"${BROWSER}"_ci
|
||||
env:
|
||||
BROWSER: ${{ matrix.browser }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
make parse_wasm_benchmarks
|
||||
python3 ./ci/benchmark_parser.py tfhe/wasm_pk_gen.csv "${RESULTS_FILENAME}" \
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/wasm_pk_gen.csv "${RESULTS_FILENAME}" \
|
||||
--database tfhe_rs \
|
||||
--hardware "m6i.4xlarge" \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
@@ -154,7 +161,7 @@ jobs:
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${BENCH_DATE}" \
|
||||
--key-gen
|
||||
rm tfhe/wasm_pk_gen.csv
|
||||
rm tfhe-benchmark/wasm_pk_gen.csv
|
||||
env:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
@@ -167,7 +174,7 @@ jobs:
|
||||
- name: Parse key and ciphertext sizes results
|
||||
if: matrix.browser == 'chrome'
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/hlapi_cpk_and_cctl_sizes.csv "${RESULTS_FILENAME}" \
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/hlapi_cpk_and_cctl_sizes.csv "${RESULTS_FILENAME}" \
|
||||
--key-gen \
|
||||
--append-results
|
||||
|
||||
@@ -188,8 +195,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
20
.github/workflows/benchmark_zk_pke.yml
vendored
20
.github/workflows/benchmark_zk_pke.yml
vendored
@@ -93,7 +93,9 @@ jobs:
|
||||
- name: Set benchmark types output
|
||||
id: set_bench_type
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
echo "bench_type=${BENCH_TYPE_OUTPUT}" >> "${GITHUB_OUTPUT}"
|
||||
env:
|
||||
BENCH_TYPE_OUTPUT: ${{ toJSON(env.BENCH_TYPE) }}
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (pke-zk-benchmarks)
|
||||
@@ -140,14 +142,17 @@ jobs:
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_DATE=${COMMIT_DATE}";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -184,7 +189,7 @@ jobs:
|
||||
|
||||
- name: Parse CRS sizes results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/pke_zk_crs_sizes.csv "${RESULTS_FILENAME}" \
|
||||
python3 ./ci/benchmark_parser.py tfhe-benchmark/pke_zk_crs_sizes.csv "${RESULTS_FILENAME}" \
|
||||
--object-sizes \
|
||||
--append-results
|
||||
|
||||
@@ -205,8 +210,11 @@ jobs:
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
|
||||
--slab-url "${SLAB_URL}"
|
||||
env:
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
|
||||
11
.github/workflows/cargo_build.yml
vendored
11
.github/workflows/cargo_build.yml
vendored
@@ -14,8 +14,8 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
cargo-builds:
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -94,5 +94,10 @@ jobs:
|
||||
run: |
|
||||
make build_tfhe_coverage
|
||||
|
||||
- name: Run Hpu pcc checks
|
||||
if: ${{ contains(matrix.os, 'ubuntu') }}
|
||||
run: |
|
||||
make pcc_hpu
|
||||
|
||||
# The wasm build check is a bit annoying to set-up here and is done during the tests in
|
||||
# aws_tfhe_tests.yml
|
||||
|
||||
4
.github/workflows/cargo_build_tfhe_fft.yml
vendored
4
.github/workflows/cargo_build_tfhe_fft.yml
vendored
@@ -12,8 +12,8 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
cargo-builds-fft:
|
||||
|
||||
4
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
4
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
@@ -12,8 +12,8 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
cargo-builds-ntt:
|
||||
|
||||
8
.github/workflows/cargo_test_fft.yml
vendored
8
.github/workflows/cargo_test_fft.yml
vendored
@@ -16,8 +16,8 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
runs-on: ${{ matrix.runner_type }}
|
||||
strategy:
|
||||
matrix:
|
||||
runner_type: [ubuntu-latest, macos-latest, windows-latest]
|
||||
runner_type: [ ubuntu-latest, macos-latest, windows-latest ]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
@@ -82,7 +82,7 @@ jobs:
|
||||
runs-on: ${{ matrix.runner_type }}
|
||||
strategy:
|
||||
matrix:
|
||||
runner_type: [ubuntu-latest, macos-latest, windows-latest]
|
||||
runner_type: [ ubuntu-latest, macos-latest, windows-latest ]
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
|
||||
8
.github/workflows/cargo_test_ntt.yml
vendored
8
.github/workflows/cargo_test_ntt.yml
vendored
@@ -16,8 +16,8 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
os: [ ubuntu-latest, macos-latest, windows-latest ]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
os: [ ubuntu-latest, macos-latest, windows-latest ]
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
|
||||
5
.github/workflows/check_commit.yml
vendored
5
.github/workflows/check_commit.yml
vendored
@@ -3,8 +3,9 @@ name: Check commit and PR compliance
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read # Permission needed to scan commits in a pull-request
|
||||
|
||||
jobs:
|
||||
check-commit-pr:
|
||||
|
||||
13
.github/workflows/ci_lint.yml
vendored
13
.github/workflows/ci_lint.yml
vendored
@@ -9,7 +9,8 @@ env:
|
||||
ACTIONLINT_CHECKSUM: "023070a287cd8cccd71515fedc843f1985bf96c436b7effaecce67290e7e0757"
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
lint-check:
|
||||
@@ -24,10 +25,10 @@ jobs:
|
||||
|
||||
- name: Get actionlint
|
||||
run: |
|
||||
wget "https://github.com/rhysd/actionlint/releases/download/v${{ env.ACTIONLINT_VERSION }}/actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz"
|
||||
echo "${{ env.ACTIONLINT_CHECKSUM }} actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz" > checksum
|
||||
wget "https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VERSION}/actionlint_${ACTIONLINT_VERSION}_linux_amd64.tar.gz"
|
||||
echo "${ACTIONLINT_CHECKSUM} actionlint_${ACTIONLINT_VERSION}_linux_amd64.tar.gz" > checksum
|
||||
sha256sum -c checksum
|
||||
tar -xf actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz actionlint
|
||||
tar -xf actionlint_"${ACTIONLINT_VERSION}"_linux_amd64.tar.gz actionlint
|
||||
ln -s "$(pwd)/actionlint" /usr/local/bin/
|
||||
|
||||
- name: Lint workflows
|
||||
@@ -37,9 +38,11 @@ jobs:
|
||||
- name: Check workflows security
|
||||
run: |
|
||||
make check_workflow_security
|
||||
env:
|
||||
GH_TOKEN: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Ensure SHA pinned actions
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@4830be28ce81da52ec70d65c552a7403821d98d4 # v3.0.23
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@fc87bb5b5a97953d987372e74478de634726b3e5 # v3.0.25
|
||||
with:
|
||||
allowlist: |
|
||||
slsa-framework/slsa-github-generator
|
||||
|
||||
10
.github/workflows/code_coverage.yml
vendored
10
.github/workflows/code_coverage.yml
vendored
@@ -17,8 +17,8 @@ on:
|
||||
workflow_dispatch:
|
||||
# Code coverage workflow is only run via workflow_dispatch event since execution duration is not stabilized yet.
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@ad3126e916f78f00edff4ed0317cf185271ccc2d
|
||||
uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -104,7 +104,7 @@ jobs:
|
||||
make test_integer_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@ad3126e916f78f00edff4ed0317cf185271ccc2d
|
||||
uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
@@ -21,8 +21,8 @@ on:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
@@ -66,7 +66,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
13
.github/workflows/data_pr_close.yml
vendored
13
.github/workflows/data_pr_close.yml
vendored
@@ -59,7 +59,7 @@ jobs:
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "Authorization: Bearer ${TOKEN}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"${COMMENTS_URL}" \
|
||||
-d "${BODY}"
|
||||
@@ -71,6 +71,7 @@ jobs:
|
||||
REPO: ${{ github.repository }}
|
||||
EVENT_NUMBER: ${{ github.event.number }}
|
||||
COMMENTS_URL: ${{ fromJson(env.TARGET_REPO_PR).comments_url }}
|
||||
TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Merge the Pull Request in the data repo
|
||||
if: ${{ github.event.pull_request.merged }}
|
||||
@@ -81,7 +82,7 @@ jobs:
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X PUT \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "Authorization: Bearer ${TOKEN}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"${TARGET_REPO_PR_URL}"/merge \
|
||||
-d '{ "merge_method": "rebase" }'
|
||||
@@ -91,6 +92,7 @@ jobs:
|
||||
exit $RES
|
||||
env:
|
||||
TARGET_REPO_PR_URL: ${{ fromJson(env.TARGET_REPO_PR).url }}
|
||||
TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Close the Pull Request in the data repo
|
||||
if: ${{ !github.event.pull_request.merged }}
|
||||
@@ -101,7 +103,7 @@ jobs:
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X PATCH \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "Authorization: Bearer ${TOKEN}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"${TARGET_REPO_PR_URL}" \
|
||||
-d '{ "state": "closed" }'
|
||||
@@ -111,6 +113,7 @@ jobs:
|
||||
exit $RES
|
||||
env:
|
||||
TARGET_REPO_PR_URL: ${{ fromJson(env.TARGET_REPO_PR).url }}
|
||||
TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Delete the associated branch in the data repo
|
||||
run: |
|
||||
@@ -120,13 +123,15 @@ jobs:
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X DELETE \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "Authorization: Bearer ${TOKEN}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"${TARGET_REPO_API_URL}"/git/refs/heads/"${PR_BRANCH}"
|
||||
RES="$?"
|
||||
echo EOF
|
||||
} >> "${GITHUB_ENV}"
|
||||
exit $RES
|
||||
env:
|
||||
TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ always() && job.status == 'failure' }}
|
||||
|
||||
6
.github/workflows/gpu_4090_tests.yml
vendored
6
.github/workflows/gpu_4090_tests.yml
vendored
@@ -22,8 +22,8 @@ on:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
cuda-tests-linux:
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
9
.github/workflows/gpu_fast_h100_tests.yml
vendored
9
.github/workflows/gpu_fast_h100_tests.yml
vendored
@@ -25,8 +25,8 @@ on:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -140,7 +140,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -172,9 +172,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
9
.github/workflows/gpu_fast_tests.yml
vendored
9
.github/workflows/gpu_fast_tests.yml
vendored
@@ -24,8 +24,8 @@ on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -124,7 +124,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -156,9 +156,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
2
.github/workflows/gpu_full_h100_tests.yml
vendored
2
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -79,7 +79,7 @@ jobs:
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
11
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
11
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
@@ -25,8 +25,8 @@ on:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -149,7 +149,7 @@ jobs:
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=FALSE make test_high_level_api_gpu
|
||||
make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
@@ -161,9 +161,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -19,8 +19,8 @@ on:
|
||||
# Nightly tests will be triggered each evening 8p.m.
|
||||
- cron: "0 20 * * *"
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
33
.github/workflows/gpu_pcc.yml
vendored
33
.github/workflows/gpu_pcc.yml
vendored
@@ -1,4 +1,4 @@
|
||||
# Perfom tfhe-cuda-backend post-commit checks on an AWS instance
|
||||
# Perform tfhe-cuda-backend post-commit checks on an AWS instance
|
||||
name: Cuda - Post-commit Checks
|
||||
|
||||
env:
|
||||
@@ -23,8 +23,8 @@ env:
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
@@ -81,16 +81,20 @@ jobs:
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
shell: bash
|
||||
run: |
|
||||
TOOLKIT_VERSION="$(echo ${{ matrix.cuda }} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
|
||||
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
|
||||
# shellcheck disable=SC2001
|
||||
TOOLKIT_VERSION="$(echo "${CUDA_VERSION}" | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/"${CUDA_KEYRING_PACKAGE}"
|
||||
echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
|
||||
sha256sum -c checksum
|
||||
sudo dpkg -i "${CUDA_KEYRING_PACKAGE}"
|
||||
sudo apt update
|
||||
sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format
|
||||
env:
|
||||
CUDA_VERSION: ${{ matrix.cuda }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -100,17 +104,21 @@ jobs:
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
env:
|
||||
CUDA_VERSION: ${{ matrix.cuda }}
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
||||
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
GCC_VERSION: ${{ matrix.gcc }}
|
||||
|
||||
- name: Run fmt checks
|
||||
run: |
|
||||
@@ -120,12 +128,17 @@ jobs:
|
||||
run: |
|
||||
make pcc_gpu
|
||||
|
||||
- name: Check build with hpu enabled
|
||||
run: |
|
||||
make clippy_gpu_hpu
|
||||
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
|
||||
|
||||
@@ -25,8 +25,8 @@ on:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -144,9 +144,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -25,9 +25,8 @@ on:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -141,7 +140,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -159,9 +158,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -29,8 +29,8 @@ on:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -156,9 +156,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -25,9 +25,8 @@ on:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -127,7 +126,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -145,9 +144,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -25,8 +25,8 @@ on:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -140,7 +140,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -158,9 +158,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
@@ -29,8 +29,8 @@ on:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -156,9 +156,10 @@ jobs:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
|
||||
73
.github/workflows/hpu_hlapi_tests.yml
vendored
Normal file
73
.github/workflows/hpu_hlapi_tests.yml
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
# Test tfhe-fft
|
||||
name: Cargo Test HLAPI HPU
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
|
||||
permissions: { }
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
outputs:
|
||||
hpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.hpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
hpu:
|
||||
- tfhe/Cargo.toml
|
||||
- Makefile
|
||||
- backends/tfhe-hpu-backend/**
|
||||
- mockups/tfhe-hpu-mockup/**
|
||||
|
||||
cargo-tests-hpu:
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.hpu_test == 'true'
|
||||
runs-on: large_ubuntu_16
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Install Just
|
||||
run: |
|
||||
cargo install just
|
||||
|
||||
- name: Test HLAPI HPU
|
||||
run: |
|
||||
source setup_hpu.sh
|
||||
just -f mockups/tfhe-hpu-mockup/Justfile BUILD_PROFILE=release mockup &
|
||||
make HPU_CONFIG=sim test_high_level_api_hpu
|
||||
make HPU_CONFIG=sim test_user_doc_hpu
|
||||
2
.github/workflows/integer_long_run_tests.yml
vendored
2
.github/workflows/integer_long_run_tests.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
5
.github/workflows/m1_tests.yml
vendored
5
.github/workflows/m1_tests.yml
vendored
@@ -27,7 +27,8 @@ concurrency:
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions: {}
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
cargo-builds-m1:
|
||||
@@ -45,7 +46,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
5
.github/workflows/make_release.yml
vendored
5
.github/workflows/make_release.yml
vendored
@@ -110,7 +110,10 @@ jobs:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe --token "${CRATES_TOKEN}" "${DRY_RUN}"
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe --token "${CRATES_TOKEN}" ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
|
||||
31
.github/workflows/make_release_cuda.yml
vendored
31
.github/workflows/make_release_cuda.yml
vendored
@@ -67,7 +67,7 @@ jobs:
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -78,17 +78,19 @@ jobs:
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
CUDA_VERSION: ${{ matrix.cuda }}
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
||||
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
- name: Prepare package
|
||||
@@ -129,7 +131,7 @@ jobs:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
steps:
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -140,26 +142,33 @@ jobs:
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
CUDA_VERSION: ${{ matrix.cuda }}
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
||||
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
env:
|
||||
GCC_VERSION: ${{ matrix.gcc }}
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe-cuda-backend --token "${CRATES_TOKEN}" "${DRY_RUN}"
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-cuda-backend --token "${CRATES_TOKEN}" ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
|
||||
105
.github/workflows/make_release_hpu.yml
vendored
Normal file
105
.github/workflows/make_release_hpu.yml
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
name: Publish HPU release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: "Dry-run"
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
verify_tag:
|
||||
uses: ./.github/workflows/verify_tagged_commit.yml
|
||||
secrets:
|
||||
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package:
|
||||
runs-on: ubuntu-latest
|
||||
needs: verify_tag
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-hpu-backend
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
# Needed to create the provenance via GitHub OIDC
|
||||
id-token: write
|
||||
# Needed to upload assets/artifacts
|
||||
contents: write
|
||||
with:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: Publish tfhe-hpu-backend Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [verify_tag, package] # for comparing hashes
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-hpu-backend --token "${CRATES_TOKEN}" ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Slack notification (hashes comparison)
|
||||
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_MESSAGE: "SLSA tfhe-hpu-backend crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-hpu-backend release failed: (${{ env.ACTION_RUN_URL }})"
|
||||
@@ -84,7 +84,10 @@ jobs:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe-csprng --token "${CRATES_TOKEN}" "${DRY_RUN}"
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-csprng --token "${CRATES_TOKEN}" ${DRY_RUN}
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
5
.github/workflows/make_release_tfhe_fft.yml
vendored
5
.github/workflows/make_release_tfhe_fft.yml
vendored
@@ -80,7 +80,10 @@ jobs:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe-fft --token "${CRATES_TOKEN}" "${DRY_RUN}"
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-fft --token "${CRATES_TOKEN}" ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
|
||||
5
.github/workflows/make_release_tfhe_ntt.yml
vendored
5
.github/workflows/make_release_tfhe_ntt.yml
vendored
@@ -80,7 +80,10 @@ jobs:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe-ntt --token "${CRATES_TOKEN}" "${DRY_RUN}"
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-ntt --token "${CRATES_TOKEN}" ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
|
||||
5
.github/workflows/make_release_zk_pok.yml
vendored
5
.github/workflows/make_release_zk_pok.yml
vendored
@@ -81,7 +81,10 @@ jobs:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe-zk-pok --token "${CRATES_TOKEN}" "${DRY_RUN}"
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p tfhe-zk-pok --token "${CRATES_TOKEN}" ${DRY_RUN}
|
||||
- name: Verify hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
2
.lfsconfig
Normal file
2
.lfsconfig
Normal file
@@ -0,0 +1,2 @@
|
||||
[lfs]
|
||||
fetchexclude = *
|
||||
@@ -10,3 +10,9 @@
|
||||
/tfhe/src/integer/gpu
|
||||
|
||||
/tfhe/src/high_level_api/ @tmontaigu
|
||||
|
||||
/Makefile @IceTDrinker @soonum
|
||||
|
||||
/.github/ @soonum
|
||||
|
||||
/CODEOWNERS @IceTDrinker
|
||||
|
||||
@@ -9,10 +9,12 @@ members = [
|
||||
"tasks",
|
||||
"tfhe-csprng",
|
||||
"backends/tfhe-cuda-backend",
|
||||
"backends/tfhe-hpu-backend",
|
||||
"utils/tfhe-versionable",
|
||||
"utils/tfhe-versionable-derive",
|
||||
"utils/param_dedup",
|
||||
"tests",
|
||||
"mockups/tfhe-hpu-mockup",
|
||||
]
|
||||
|
||||
exclude = [
|
||||
|
||||
120
Makefile
120
Makefile
@@ -2,6 +2,7 @@ SHELL:=$(shell /usr/bin/env which bash)
|
||||
OS:=$(shell uname)
|
||||
RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
|
||||
CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
|
||||
CARGO_BUILD_JOBS=default
|
||||
CPU_COUNT=$(shell ./scripts/cpu_count.sh)
|
||||
RS_BUILD_TOOLCHAIN:=stable
|
||||
CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
|
||||
@@ -55,6 +56,9 @@ REGEX_PATTERN?=''
|
||||
TFHECUDA_SRC=backends/tfhe-cuda-backend/cuda
|
||||
TFHECUDA_BUILD=$(TFHECUDA_SRC)/build
|
||||
|
||||
# tfhe-hpu-backend
|
||||
HPU_CONFIG=v80
|
||||
|
||||
# Exclude these files from coverage reports
|
||||
define COVERAGE_EXCLUDED_FILES
|
||||
--exclude-files apps/trivium/src/trivium/* \
|
||||
@@ -166,9 +170,13 @@ install_typos_checker: install_rs_build_toolchain
|
||||
.PHONY: install_zizmor # Install zizmor workflow security checker
|
||||
install_zizmor: install_rs_build_toolchain
|
||||
@zizmor --version > /dev/null 2>&1 || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install zizmor || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install zizmor --version ~1.9 || \
|
||||
( echo "Unable to install zizmor, unknown error." && exit 1 )
|
||||
|
||||
.PHONY: install_cargo_cross # Install custom tfhe-rs lints
|
||||
install_cargo_cross: install_rs_build_toolchain
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cross
|
||||
|
||||
.PHONY: setup_venv # Setup Python virtualenv for wasm tests
|
||||
setup_venv:
|
||||
python3 -m venv venv
|
||||
@@ -290,7 +298,7 @@ check_typos: install_typos_checker
|
||||
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
|
||||
clippy_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats,extended-types \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats,extended-types,zk-pok \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
@@ -301,6 +309,20 @@ check_gpu: install_rs_check_toolchain
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: clippy_hpu # Run clippy lints on tfhe with "hpu" enabled
|
||||
clippy_hpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean,shortint,integer,internal-keycache,hpu,pbs-stats,extended-types \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_gpu_hpu # Run clippy lints on tfhe with "gpu" and "hpu" enabled
|
||||
clippy_gpu_hpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,hpu,pbs-stats,extended-types,zk-pok \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: fix_newline # Fix newline at end of file issues to be UNIX compliant
|
||||
fix_newline: check_linelint_installed
|
||||
linelint -a .
|
||||
@@ -473,6 +495,11 @@ clippy_cuda_backend: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-cuda-backend -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_hpu_backend # Run clippy lints on the tfhe-hpu-backend
|
||||
clippy_hpu_backend: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-hpu-backend -- --no-deps -D warnings
|
||||
|
||||
.PHONY: check_rust_bindings_did_not_change # Check rust bindings are up to date for tfhe-cuda-backend
|
||||
check_rust_bindings_did_not_change:
|
||||
cargo build -p tfhe-cuda-backend && "$(MAKE)" fmt_gpu && \
|
||||
@@ -702,6 +729,28 @@ test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_n
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_integer_hpu_ci # Run the tests for integer ci on hpu backend
|
||||
test_integer_hpu_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
cargo test --release -p $(TFHE_SPEC) --features hpu-v80 --test hpu
|
||||
|
||||
.PHONY: test_integer_hpu_mockup_ci # Run the tests for integer ci on hpu backend and mockup
|
||||
test_integer_hpu_mockup_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
source ./setup_hpu.sh --config sim ; \
|
||||
cargo build --release --bin hpu_mockup; \
|
||||
coproc target/release/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_pfail64_psi64.toml > mockup.log; \
|
||||
HPU_TEST_ITER=1 \
|
||||
cargo test --profile devo -p $(TFHE_SPEC) --features hpu --test hpu -- u32 && \
|
||||
kill %1
|
||||
|
||||
.PHONY: test_integer_hpu_mockup_ci_fast # Run the quick tests for integer ci on hpu backend and mockup.
|
||||
test_integer_hpu_mockup_ci_fast: install_rs_check_toolchain install_cargo_nextest
|
||||
source ./setup_hpu.sh --config sim ; \
|
||||
cargo build --profile devo --bin hpu_mockup; \
|
||||
coproc target/devo/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_fast.toml > mockup.log; \
|
||||
HPU_TEST_ITER=1 \
|
||||
cargo test --profile devo -p $(TFHE_SPEC) --features hpu --test hpu -- u32 && \
|
||||
kill %1
|
||||
|
||||
.PHONY: test_boolean # Run the tests of the boolean module
|
||||
test_boolean: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
@@ -854,9 +903,25 @@ test_high_level_api: install_rs_build_toolchain
|
||||
|
||||
test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) \
|
||||
--test-threads=4 --features=integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
|
||||
-E "test(/high_level_api::.*gpu.*/)"
|
||||
|
||||
test_high_level_api_hpu: install_rs_build_toolchain install_cargo_nextest
|
||||
ifeq ($(HPU_CONFIG), v80)
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
--build-jobs=$(CARGO_BUILD_JOBS) \
|
||||
--test-threads=1 \
|
||||
--features=integer,internal-keycache,hpu,hpu-v80 -p $(TFHE_SPEC) \
|
||||
-E "test(/high_level_api::.*hpu.*/)"
|
||||
else
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
--build-jobs=$(CARGO_BUILD_JOBS) \
|
||||
--test-threads=1 \
|
||||
--features=integer,internal-keycache,hpu -p $(TFHE_SPEC) \
|
||||
-E "test(/high_level_api::.*hpu.*/)"
|
||||
endif
|
||||
|
||||
|
||||
.PHONY: test_strings # Run the tests for strings ci
|
||||
test_strings: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
@@ -874,9 +939,21 @@ test_user_doc: install_rs_build_toolchain
|
||||
.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
|
||||
test_user_doc_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
|
||||
--features=internal-keycache,integer,zk-pok,gpu -p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
|
||||
.PHONY: test_user_doc_hpu # Run tests for HPU from the .md documentation
|
||||
test_user_doc_hpu: install_rs_build_toolchain
|
||||
ifeq ($(HPU_CONFIG), v80)
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=internal-keycache,integer,hpu,hpu-v80 -p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
else
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=internal-keycache,integer,hpu -p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
endif
|
||||
|
||||
|
||||
|
||||
.PHONY: test_regex_engine # Run tests for regex_engine example
|
||||
@@ -907,6 +984,12 @@ test_tfhe_csprng: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-csprng
|
||||
|
||||
.PHONY: test_tfhe_csprng_big_endian # Run tfhe-csprng tests on an emulated big endian system
|
||||
test_tfhe_csprng_big_endian: install_rs_build_toolchain install_cargo_cross
|
||||
RUSTFLAGS="" cross $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-csprng --target=powerpc64-unknown-linux-gnu
|
||||
|
||||
|
||||
.PHONY: test_zk_pok # Run tfhe-zk-pok tests
|
||||
test_zk_pok: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
@@ -1012,7 +1095,7 @@ check_compile_tests: install_rs_build_toolchain
|
||||
.PHONY: check_compile_tests_benches_gpu # Build tests in debug without running them
|
||||
check_compile_tests_benches_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
|
||||
--features=experimental,boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=experimental,boolean,shortint,integer,internal-keycache,gpu,zk-pok \
|
||||
-p $(TFHE_SPEC)
|
||||
mkdir -p "$(TFHECUDA_BUILD)" && \
|
||||
cd "$(TFHECUDA_BUILD)" && \
|
||||
@@ -1100,6 +1183,12 @@ clippy_bench_gpu: install_rs_check_toolchain
|
||||
--features=gpu,shortint,integer,internal-keycache,nightly-avx512,pbs-stats,zk-pok \
|
||||
-p tfhe-benchmark -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_bench_hpu # Run clippy lints on tfhe-benchmark
|
||||
clippy_bench_hpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=hpu,shortint,integer,internal-keycache,pbs-stats\
|
||||
-p tfhe-benchmark -- --no-deps -D warnings
|
||||
|
||||
.PHONY: print_doc_bench_parameters # Print parameters used in doc benchmarks
|
||||
print_doc_bench_parameters:
|
||||
RUSTFLAGS="" cargo run --example print_doc_bench_parameters \
|
||||
@@ -1133,6 +1222,14 @@ bench_signed_integer_gpu: install_rs_check_toolchain
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_integer_hpu # Run benchmarks for integer on HPU backend
|
||||
bench_integer_hpu: install_rs_check_toolchain
|
||||
source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark -- --quick
|
||||
|
||||
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
|
||||
bench_integer_compression: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
@@ -1146,7 +1243,7 @@ bench_integer_compression_gpu: install_rs_check_toolchain
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench glwe_packing_compression-integer-bench \
|
||||
--features=integer,internal-keycache,gpu,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
|
||||
.PHONY: bench_integer_zk_gpu
|
||||
bench_integer_zk_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
@@ -1324,6 +1421,14 @@ bench_hlapi_dex_gpu: install_rs_check_toolchain
|
||||
--bench hlapi-dex \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_hlapi_erc20_hpu # Run benchmarks for ECR20 operations on HPU
|
||||
bench_hlapi_erc20_hpu: install_rs_check_toolchain
|
||||
source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark -- --quick
|
||||
|
||||
.PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
|
||||
bench_tfhe_zk_pok: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
@@ -1423,6 +1528,9 @@ tfhe_lints
|
||||
pcc_gpu: check_rust_bindings_did_not_change clippy_rustdoc_gpu \
|
||||
clippy_gpu clippy_cuda_backend clippy_bench_gpu check_compile_tests_benches_gpu
|
||||
|
||||
.PHONY: pcc_hpu # pcc stands for pre commit checks for HPU compilation
|
||||
pcc_hpu: clippy_hpu clippy_hpu_backend test_integer_hpu_mockup_ci_fast
|
||||
|
||||
.PHONY: fpcc # pcc stands for pre commit checks, the f stands for fast
|
||||
fpcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
|
||||
check_md_docs_are_tested clippy_fast check_compile_tests
|
||||
|
||||
@@ -11,11 +11,13 @@ extend-ignore-identifiers-re = [
|
||||
# Example with string replacing "hello" with "herlo"
|
||||
"herlo",
|
||||
# Example in trivium
|
||||
"C9217BA0D762ACA1"
|
||||
"C9217BA0D762ACA1",
|
||||
"0x[0-9a-fA-F]+"
|
||||
]
|
||||
|
||||
[files]
|
||||
extend-exclude = [
|
||||
"backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cu",
|
||||
"backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu",
|
||||
"backends/tfhe-hpu-backend/config_store/**/*.link_summary",
|
||||
]
|
||||
|
||||
@@ -28,9 +28,10 @@ void cuda_modulus_switch_inplace_64(void *stream, uint32_t gpu_index,
|
||||
|
||||
void cuda_improve_noise_modulus_switch_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *encrypted_zeros, uint32_t lwe_size,
|
||||
uint32_t num_lwes, uint32_t num_zeros, double input_variance,
|
||||
double r_sigma, double bound, uint32_t log_modulus);
|
||||
void const *lwe_array_in, void const *lwe_array_indexes,
|
||||
void const *encrypted_zeros, uint32_t lwe_size, uint32_t num_lwes,
|
||||
uint32_t num_zeros, double input_variance, double r_sigma, double bound,
|
||||
uint32_t log_modulus);
|
||||
|
||||
void cuda_glwe_sample_extract_128(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
|
||||
@@ -8,7 +8,7 @@ extern std::mutex m;
|
||||
extern bool p2p_enabled;
|
||||
|
||||
extern "C" {
|
||||
int32_t cuda_setup_multi_gpu();
|
||||
int32_t cuda_setup_multi_gpu(int device_0_id);
|
||||
}
|
||||
|
||||
// Define a variant type that can be either a vector or a single pointer
|
||||
|
||||
@@ -400,7 +400,8 @@ uint64_t scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
CudaRadixCiphertextFFI *radix_lwe_vec, int8_t *mem_ptr, void *const *bsks,
|
||||
CudaRadixCiphertextFFI *radix_lwe_vec, bool reduce_degrees_for_single_carry_propagation,
|
||||
int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
@@ -538,5 +539,13 @@ void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void extend_radix_with_trivial_zero_blocks_msb_64(
|
||||
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
|
||||
void *const *streams, uint32_t const *gpu_indexes);
|
||||
|
||||
void trim_radix_blocks_lsb_64(CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
void *const *streams,
|
||||
uint32_t const *gpu_indexes);
|
||||
} // extern C
|
||||
#endif // CUDA_INTEGER_H
|
||||
|
||||
@@ -1116,18 +1116,116 @@ template <typename Torus> struct int_overflowing_sub_memory {
|
||||
};
|
||||
|
||||
template <typename Torus> struct int_sum_ciphertexts_vec_memory {
|
||||
CudaRadixCiphertextFFI *new_blocks;
|
||||
CudaRadixCiphertextFFI *new_blocks_copy;
|
||||
CudaRadixCiphertextFFI *old_blocks;
|
||||
CudaRadixCiphertextFFI *small_lwe_vector;
|
||||
|
||||
int_radix_params params;
|
||||
|
||||
int32_t *d_smart_copy_in;
|
||||
int32_t *d_smart_copy_out;
|
||||
|
||||
bool mem_reuse = false;
|
||||
size_t max_total_blocks_in_vec;
|
||||
uint32_t num_blocks_in_radix;
|
||||
uint32_t max_num_radix_in_vec;
|
||||
uint64_t *size_tracker;
|
||||
bool gpu_memory_allocated;
|
||||
|
||||
// temporary buffers
|
||||
CudaRadixCiphertextFFI *current_blocks;
|
||||
CudaRadixCiphertextFFI *small_lwe_vector;
|
||||
|
||||
uint32_t *d_columns_data;
|
||||
uint32_t *d_columns_counter;
|
||||
uint32_t **d_columns;
|
||||
|
||||
uint32_t *d_new_columns_data;
|
||||
uint32_t *d_new_columns_counter;
|
||||
uint32_t **d_new_columns;
|
||||
|
||||
uint64_t *d_degrees;
|
||||
uint32_t *d_pbs_counters;
|
||||
|
||||
// lookup table for extracting message and carry
|
||||
int_radix_lut<Torus> *luts_message_carry;
|
||||
|
||||
bool mem_reuse = false;
|
||||
|
||||
void setup_index_buffers(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes) {
|
||||
|
||||
d_degrees = (uint64_t *)cuda_malloc_with_size_tracking_async(
|
||||
max_total_blocks_in_vec * sizeof(uint64_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, gpu_memory_allocated);
|
||||
|
||||
d_pbs_counters = (uint32_t *)cuda_malloc_with_size_tracking_async(
|
||||
3 * sizeof(uint32_t), streams[0], gpu_indexes[0], size_tracker,
|
||||
gpu_memory_allocated);
|
||||
|
||||
auto num_blocks_in_radix = this->num_blocks_in_radix;
|
||||
auto max_num_radix_in_vec = this->max_num_radix_in_vec;
|
||||
auto setup_columns =
|
||||
[num_blocks_in_radix, max_num_radix_in_vec, streams,
|
||||
gpu_indexes](uint32_t **&columns, uint32_t *&columns_data,
|
||||
uint32_t *&columns_counter, uint64_t *size_tracker,
|
||||
bool gpu_memory_allocated) {
|
||||
columns_data = (uint32_t *)cuda_malloc_with_size_tracking_async(
|
||||
num_blocks_in_radix * max_num_radix_in_vec * sizeof(uint32_t),
|
||||
streams[0], gpu_indexes[0], size_tracker, gpu_memory_allocated);
|
||||
columns_counter = (uint32_t *)cuda_malloc_with_size_tracking_async(
|
||||
num_blocks_in_radix * sizeof(uint32_t), streams[0],
|
||||
gpu_indexes[0], size_tracker, gpu_memory_allocated);
|
||||
cuda_memset_with_size_tracking_async(
|
||||
columns_counter, 0, num_blocks_in_radix * sizeof(uint32_t),
|
||||
streams[0], gpu_indexes[0], gpu_memory_allocated);
|
||||
uint32_t **h_columns = new uint32_t *[num_blocks_in_radix];
|
||||
for (int i = 0; i < num_blocks_in_radix; ++i) {
|
||||
h_columns[i] = columns_data + i * max_num_radix_in_vec;
|
||||
}
|
||||
columns = (uint32_t **)cuda_malloc_with_size_tracking_async(
|
||||
num_blocks_in_radix * sizeof(uint32_t *), streams[0],
|
||||
gpu_indexes[0], size_tracker, gpu_memory_allocated);
|
||||
cuda_memcpy_async_to_gpu(columns, h_columns,
|
||||
num_blocks_in_radix * sizeof(uint32_t *),
|
||||
streams[0], gpu_indexes[0]);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
delete[] h_columns;
|
||||
};
|
||||
|
||||
setup_columns(d_columns, d_columns_data, d_columns_counter, size_tracker,
|
||||
gpu_memory_allocated);
|
||||
setup_columns(d_new_columns, d_new_columns_data, d_new_columns_counter,
|
||||
size_tracker, gpu_memory_allocated);
|
||||
}
|
||||
|
||||
void setup_lookup_tables(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count) {
|
||||
uint32_t message_modulus = params.message_modulus;
|
||||
|
||||
if (!mem_reuse) {
|
||||
luts_message_carry = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, 2, max_total_blocks_in_vec,
|
||||
gpu_memory_allocated, size_tracker);
|
||||
}
|
||||
auto message_acc = luts_message_carry->get_lut(0, 0);
|
||||
auto carry_acc = luts_message_carry->get_lut(0, 1);
|
||||
|
||||
// define functions for each accumulator
|
||||
auto lut_f_message = [message_modulus](Torus x) -> Torus {
|
||||
return x % message_modulus;
|
||||
};
|
||||
auto lut_f_carry = [message_modulus](Torus x) -> Torus {
|
||||
return x / message_modulus;
|
||||
};
|
||||
|
||||
// generate accumulators
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], message_acc,
|
||||
luts_message_carry->get_degree(0),
|
||||
luts_message_carry->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, message_modulus, params.carry_modulus,
|
||||
lut_f_message, gpu_memory_allocated);
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], carry_acc,
|
||||
luts_message_carry->get_degree(1),
|
||||
luts_message_carry->get_max_degree(1), params.glwe_dimension,
|
||||
params.polynomial_size, message_modulus, params.carry_modulus,
|
||||
lut_f_carry, gpu_memory_allocated);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
}
|
||||
int_sum_ciphertexts_vec_memory(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_params params,
|
||||
@@ -1136,103 +1234,84 @@ template <typename Torus> struct int_sum_ciphertexts_vec_memory {
|
||||
bool allocate_gpu_memory,
|
||||
uint64_t *size_tracker) {
|
||||
this->params = params;
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->mem_reuse = false;
|
||||
this->max_total_blocks_in_vec = num_blocks_in_radix * max_num_radix_in_vec;
|
||||
this->num_blocks_in_radix = num_blocks_in_radix;
|
||||
this->max_num_radix_in_vec = max_num_radix_in_vec;
|
||||
this->gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->size_tracker = size_tracker;
|
||||
|
||||
int max_pbs_count = num_blocks_in_radix * max_num_radix_in_vec;
|
||||
setup_index_buffers(streams, gpu_indexes);
|
||||
setup_lookup_tables(streams, gpu_indexes, gpu_count);
|
||||
|
||||
// allocate gpu memory for intermediate buffers
|
||||
new_blocks = new CudaRadixCiphertextFFI;
|
||||
// create and allocate intermediate buffers
|
||||
current_blocks = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], new_blocks, max_pbs_count,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
new_blocks_copy = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], new_blocks_copy, max_pbs_count,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
old_blocks = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], old_blocks, max_pbs_count,
|
||||
streams[0], gpu_indexes[0], current_blocks, max_total_blocks_in_vec,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
small_lwe_vector = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], small_lwe_vector, max_pbs_count,
|
||||
streams[0], gpu_indexes[0], small_lwe_vector, max_total_blocks_in_vec,
|
||||
params.small_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
d_smart_copy_in = (int32_t *)cuda_malloc_with_size_tracking_async(
|
||||
max_pbs_count * sizeof(int32_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, allocate_gpu_memory);
|
||||
d_smart_copy_out = (int32_t *)cuda_malloc_with_size_tracking_async(
|
||||
max_pbs_count * sizeof(int32_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, allocate_gpu_memory);
|
||||
cuda_memset_with_size_tracking_async(
|
||||
d_smart_copy_in, 0, max_pbs_count * sizeof(int32_t), streams[0],
|
||||
gpu_indexes[0], allocate_gpu_memory);
|
||||
cuda_memset_with_size_tracking_async(
|
||||
d_smart_copy_out, 0, max_pbs_count * sizeof(int32_t), streams[0],
|
||||
gpu_indexes[0], allocate_gpu_memory);
|
||||
}
|
||||
|
||||
int_sum_ciphertexts_vec_memory(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_params params, uint32_t num_blocks_in_radix,
|
||||
uint32_t max_num_radix_in_vec, CudaRadixCiphertextFFI *new_blocks,
|
||||
CudaRadixCiphertextFFI *old_blocks,
|
||||
CudaRadixCiphertextFFI *small_lwe_vector, bool allocate_gpu_memory,
|
||||
uint32_t max_num_radix_in_vec, CudaRadixCiphertextFFI *current_blocks,
|
||||
CudaRadixCiphertextFFI *small_lwe_vector,
|
||||
int_radix_lut<Torus> *reused_lut, bool allocate_gpu_memory,
|
||||
uint64_t *size_tracker) {
|
||||
mem_reuse = true;
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->mem_reuse = true;
|
||||
this->params = params;
|
||||
this->max_total_blocks_in_vec = num_blocks_in_radix * max_num_radix_in_vec;
|
||||
this->num_blocks_in_radix = num_blocks_in_radix;
|
||||
this->max_num_radix_in_vec = max_num_radix_in_vec;
|
||||
this->gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->size_tracker = size_tracker;
|
||||
|
||||
int max_pbs_count = num_blocks_in_radix * max_num_radix_in_vec;
|
||||
|
||||
// assign gpu memory for intermediate buffers
|
||||
this->new_blocks = new_blocks;
|
||||
this->old_blocks = old_blocks;
|
||||
this->current_blocks = current_blocks;
|
||||
this->small_lwe_vector = small_lwe_vector;
|
||||
new_blocks_copy = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], new_blocks_copy, max_pbs_count,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
d_smart_copy_in = (int32_t *)cuda_malloc_with_size_tracking_async(
|
||||
max_pbs_count * sizeof(int32_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, allocate_gpu_memory);
|
||||
d_smart_copy_out = (int32_t *)cuda_malloc_with_size_tracking_async(
|
||||
max_pbs_count * sizeof(int32_t), streams[0], gpu_indexes[0],
|
||||
size_tracker, allocate_gpu_memory);
|
||||
cuda_memset_with_size_tracking_async(
|
||||
d_smart_copy_in, 0, max_pbs_count * sizeof(int32_t), streams[0],
|
||||
gpu_indexes[0], allocate_gpu_memory);
|
||||
cuda_memset_with_size_tracking_async(
|
||||
d_smart_copy_out, 0, max_pbs_count * sizeof(int32_t), streams[0],
|
||||
gpu_indexes[0], allocate_gpu_memory);
|
||||
this->luts_message_carry = reused_lut;
|
||||
setup_index_buffers(streams, gpu_indexes);
|
||||
}
|
||||
|
||||
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
cuda_drop_with_size_tracking_async(d_smart_copy_in, streams[0],
|
||||
cuda_drop_with_size_tracking_async(d_degrees, streams[0], gpu_indexes[0],
|
||||
gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_pbs_counters, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_smart_copy_out, streams[0],
|
||||
|
||||
cuda_drop_with_size_tracking_async(d_columns_data, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_columns_counter, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_columns, streams[0], gpu_indexes[0],
|
||||
gpu_memory_allocated);
|
||||
|
||||
cuda_drop_with_size_tracking_async(d_new_columns_data, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_new_columns_counter, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_new_columns, streams[0],
|
||||
gpu_indexes[0], gpu_memory_allocated);
|
||||
|
||||
if (!mem_reuse) {
|
||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0], new_blocks,
|
||||
gpu_memory_allocated);
|
||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0], old_blocks,
|
||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0], current_blocks,
|
||||
gpu_memory_allocated);
|
||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0],
|
||||
small_lwe_vector, gpu_memory_allocated);
|
||||
luts_message_carry->release(streams, gpu_indexes, gpu_count);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
delete new_blocks;
|
||||
delete old_blocks;
|
||||
|
||||
delete current_blocks;
|
||||
delete small_lwe_vector;
|
||||
delete luts_message_carry;
|
||||
}
|
||||
release_radix_ciphertext_async(streams[0], gpu_indexes[0], new_blocks_copy,
|
||||
gpu_memory_allocated);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
delete new_blocks_copy;
|
||||
}
|
||||
};
|
||||
|
||||
// For sequential algorithm in group propagation
|
||||
template <typename Torus> struct int_seq_group_prop_memory {
|
||||
|
||||
@@ -2549,7 +2628,7 @@ template <typename Torus> struct int_mul_memory {
|
||||
// radix_lwe_left except the last blocks of each shift
|
||||
int msb_vector_block_count = num_radix_blocks * (num_radix_blocks - 1) / 2;
|
||||
|
||||
int total_block_count = lsb_vector_block_count + msb_vector_block_count;
|
||||
int total_block_count = num_radix_blocks * num_radix_blocks;
|
||||
|
||||
// allocate memory for intermediate buffers
|
||||
vector_result_sb = new CudaRadixCiphertextFFI;
|
||||
@@ -2562,13 +2641,13 @@ template <typename Torus> struct int_mul_memory {
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
small_lwe_vector = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams[0], gpu_indexes[0], small_lwe_vector, total_block_count,
|
||||
streams[0], gpu_indexes[0], small_lwe_vector, 2 * total_block_count,
|
||||
params.small_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
// create int_radix_lut objects for lsb, msb, message, carry
|
||||
// luts_array -> lut = {lsb_acc, msb_acc}
|
||||
luts_array = new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count,
|
||||
params, 2, total_block_count,
|
||||
params, 2, 2 * total_block_count,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
auto lsb_acc = luts_array->get_lut(0, 0);
|
||||
auto msb_acc = luts_array->get_lut(0, 1);
|
||||
@@ -2602,9 +2681,10 @@ template <typename Torus> struct int_mul_memory {
|
||||
|
||||
luts_array->broadcast_lut(streams, gpu_indexes, 0);
|
||||
// create memory object for sum ciphertexts
|
||||
// create memory object for sum ciphertexts
|
||||
sum_ciphertexts_mem = new int_sum_ciphertexts_vec_memory<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_radix_blocks,
|
||||
2 * num_radix_blocks, block_mul_res, vector_result_sb, small_lwe_vector,
|
||||
2 * num_radix_blocks, vector_result_sb, small_lwe_vector, luts_array,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
uint32_t uses_carry = 0;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
@@ -3918,7 +3998,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
zero_out_if_overflow_did_not_happen[0]->get_degree(0),
|
||||
zero_out_if_overflow_did_not_happen[0]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, cur_lut_f, 2, gpu_memory_allocated);
|
||||
params.carry_modulus, cur_lut_f, params.message_modulus - 2,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_did_not_happen[0]->broadcast_lut(streams, gpu_indexes,
|
||||
0);
|
||||
generate_device_accumulator_bivariate_with_factor<Torus>(
|
||||
@@ -3927,7 +4008,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
zero_out_if_overflow_did_not_happen[1]->get_degree(0),
|
||||
zero_out_if_overflow_did_not_happen[1]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, cur_lut_f, 3, gpu_memory_allocated);
|
||||
params.carry_modulus, cur_lut_f, params.message_modulus - 1,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_did_not_happen[1]->broadcast_lut(streams, gpu_indexes,
|
||||
0);
|
||||
|
||||
@@ -3954,7 +4036,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
zero_out_if_overflow_happened[0]->get_degree(0),
|
||||
zero_out_if_overflow_happened[0]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, overflow_happened_f, 2, gpu_memory_allocated);
|
||||
params.carry_modulus, overflow_happened_f, params.message_modulus - 2,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_happened[0]->broadcast_lut(streams, gpu_indexes, 0);
|
||||
generate_device_accumulator_bivariate_with_factor<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
@@ -3962,7 +4045,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
zero_out_if_overflow_happened[1]->get_degree(0),
|
||||
zero_out_if_overflow_happened[1]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, overflow_happened_f, 3, gpu_memory_allocated);
|
||||
params.carry_modulus, overflow_happened_f, params.message_modulus - 1,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_happened[1]->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
// merge_overflow_flags_luts
|
||||
|
||||
@@ -248,6 +248,7 @@ template <> struct pbs_buffer_128<PBS_TYPE::CLASSICAL> {
|
||||
__uint128_t *global_accumulator;
|
||||
double *global_join_buffer;
|
||||
__uint128_t *temp_lwe_array_in;
|
||||
uint64_t *trivial_indexes;
|
||||
|
||||
PBS_VARIANT pbs_variant;
|
||||
bool uses_noise_reduction;
|
||||
@@ -263,11 +264,27 @@ template <> struct pbs_buffer_128<PBS_TYPE::CLASSICAL> {
|
||||
cuda_set_device(gpu_index);
|
||||
this->pbs_variant = pbs_variant;
|
||||
this->uses_noise_reduction = allocate_ms_array;
|
||||
this->temp_lwe_array_in =
|
||||
(__uint128_t *)cuda_malloc_with_size_tracking_async(
|
||||
(lwe_dimension + 1) * input_lwe_ciphertext_count *
|
||||
sizeof(__uint128_t),
|
||||
stream, gpu_index, size_tracker, allocate_ms_array);
|
||||
if (allocate_ms_array) {
|
||||
this->temp_lwe_array_in =
|
||||
(__uint128_t *)cuda_malloc_with_size_tracking_async(
|
||||
(lwe_dimension + 1) * input_lwe_ciphertext_count *
|
||||
sizeof(__uint128_t),
|
||||
stream, gpu_index, size_tracker, allocate_ms_array);
|
||||
this->trivial_indexes = (uint64_t *)cuda_malloc_with_size_tracking_async(
|
||||
input_lwe_ciphertext_count * sizeof(uint64_t), stream, gpu_index,
|
||||
size_tracker, allocate_ms_array);
|
||||
uint64_t *h_trivial_indexes = new uint64_t[input_lwe_ciphertext_count];
|
||||
for (uint32_t i = 0; i < input_lwe_ciphertext_count; i++)
|
||||
h_trivial_indexes[i] = i;
|
||||
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
trivial_indexes, h_trivial_indexes,
|
||||
input_lwe_ciphertext_count * sizeof(uint64_t), stream, gpu_index,
|
||||
allocate_gpu_memory);
|
||||
|
||||
cuda_synchronize_stream(stream, gpu_index);
|
||||
delete[] h_trivial_indexes;
|
||||
}
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
|
||||
size_t global_join_buffer_size = (glwe_dimension + 1) * level_count *
|
||||
input_lwe_ciphertext_count *
|
||||
@@ -404,9 +421,12 @@ template <> struct pbs_buffer_128<PBS_TYPE::CLASSICAL> {
|
||||
cuda_drop_with_size_tracking_async(global_accumulator, stream, gpu_index,
|
||||
gpu_memory_allocated);
|
||||
|
||||
if (uses_noise_reduction)
|
||||
if (uses_noise_reduction) {
|
||||
cuda_drop_with_size_tracking_async(temp_lwe_array_in, stream, gpu_index,
|
||||
gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(trivial_indexes, stream, gpu_index,
|
||||
gpu_memory_allocated);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -502,7 +522,8 @@ template <typename Torus>
|
||||
bool has_support_to_cuda_programmable_bootstrap_tbc(uint32_t num_samples,
|
||||
uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t level_count);
|
||||
uint32_t level_count,
|
||||
uint32_t max_shared_memory);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
__device__ inline int get_start_ith_ggsw(int i, uint32_t polynomial_size,
|
||||
|
||||
@@ -86,13 +86,15 @@ void cuda_modulus_switch_inplace_64(void *stream, uint32_t gpu_index,
|
||||
|
||||
void cuda_improve_noise_modulus_switch_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *encrypted_zeros, uint32_t lwe_size,
|
||||
uint32_t num_lwes, uint32_t num_zeros, double input_variance,
|
||||
double r_sigma, double bound, uint32_t log_modulus) {
|
||||
void const *lwe_array_in, void const *lwe_array_indexes,
|
||||
void const *encrypted_zeros, uint32_t lwe_size, uint32_t num_lwes,
|
||||
uint32_t num_zeros, double input_variance, double r_sigma, double bound,
|
||||
uint32_t log_modulus) {
|
||||
host_improve_noise_modulus_switch<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<uint64_t const *>(lwe_array_in),
|
||||
static_cast<uint64_t const *>(lwe_array_indexes),
|
||||
static_cast<const uint64_t *>(encrypted_zeros), lwe_size, num_lwes,
|
||||
num_zeros, input_variance, r_sigma, bound, log_modulus);
|
||||
}
|
||||
|
||||
@@ -178,11 +178,10 @@ __device__ __forceinline__ double measure_modulus_switch_noise(
|
||||
|
||||
// Each thread processes two elements of the lwe array
|
||||
template <typename Torus>
|
||||
__global__ void
|
||||
improve_noise_modulus_switch(Torus *array_out, const Torus *array_in,
|
||||
const Torus *zeros, int lwe_size, int num_zeros,
|
||||
double input_variance, double r_sigma,
|
||||
double bound, uint32_t log_modulus) {
|
||||
__global__ void improve_noise_modulus_switch(
|
||||
Torus *array_out, const Torus *array_in, const uint64_t *indexes,
|
||||
const Torus *zeros, int lwe_size, int num_zeros, double input_variance,
|
||||
double r_sigma, double bound, uint32_t log_modulus) {
|
||||
|
||||
// First we will assume size is less than the number of threads per block
|
||||
// I should switch this to dynamic shared memory
|
||||
@@ -198,13 +197,13 @@ improve_noise_modulus_switch(Torus *array_out, const Torus *array_in,
|
||||
// This probably are not needed cause we are setting the values
|
||||
sum_mask_errors[threadIdx.x] = 0.f;
|
||||
sum_squared_mask_errors[threadIdx.x] = 0.f;
|
||||
auto this_block_lwe_in = array_in + indexes[blockIdx.x] * lwe_size;
|
||||
auto this_block_lwe_out = array_out + indexes[blockIdx.x] * lwe_size;
|
||||
Torus input_element1 = this_block_lwe_in[threadIdx.x];
|
||||
|
||||
Torus input_element1 = array_in[threadIdx.x + blockIdx.x * lwe_size];
|
||||
|
||||
Torus input_element2 =
|
||||
threadIdx.x + blockDim.x < lwe_size
|
||||
? array_in[threadIdx.x + blockDim.x + blockIdx.x * lwe_size]
|
||||
: 0;
|
||||
Torus input_element2 = threadIdx.x + blockDim.x < lwe_size
|
||||
? this_block_lwe_in[threadIdx.x + blockDim.x]
|
||||
: 0;
|
||||
|
||||
// Base noise is only handled by thread 0
|
||||
double base_noise = measure_modulus_switch_noise<Torus>(
|
||||
@@ -218,11 +217,10 @@ improve_noise_modulus_switch(Torus *array_out, const Torus *array_in,
|
||||
__syncthreads();
|
||||
|
||||
if (found)
|
||||
array_out[threadIdx.x + blockIdx.x * lwe_size] = input_element1;
|
||||
this_block_lwe_out[threadIdx.x] = input_element1;
|
||||
|
||||
if (found && (threadIdx.x + blockDim.x) < lwe_size)
|
||||
array_out[threadIdx.x + blockDim.x + blockIdx.x * lwe_size] =
|
||||
input_element2;
|
||||
this_block_lwe_out[threadIdx.x + blockDim.x] = input_element2;
|
||||
|
||||
__syncthreads();
|
||||
// If we found a zero element we stop iterating (in avg 20 times are
|
||||
@@ -253,11 +251,10 @@ improve_noise_modulus_switch(Torus *array_out, const Torus *array_in,
|
||||
// Assumption we always have at least 512 elements
|
||||
// If we find a useful zero encryption we replace the lwe by lwe + zero
|
||||
if (found)
|
||||
array_out[threadIdx.x + blockIdx.x * lwe_size] = zero_element1;
|
||||
this_block_lwe_out[threadIdx.x] = zero_element1;
|
||||
|
||||
if (found && (threadIdx.x + blockDim.x) < lwe_size)
|
||||
array_out[threadIdx.x + blockDim.x + blockIdx.x * lwe_size] =
|
||||
zero_element2;
|
||||
this_block_lwe_out[threadIdx.x + blockDim.x] = zero_element2;
|
||||
|
||||
__syncthreads();
|
||||
// If we found a zero element we stop iterating (in avg 20 times are
|
||||
@@ -270,9 +267,10 @@ improve_noise_modulus_switch(Torus *array_out, const Torus *array_in,
|
||||
template <typename Torus>
|
||||
__host__ void host_improve_noise_modulus_switch(
|
||||
cudaStream_t stream, uint32_t gpu_index, Torus *array_out,
|
||||
Torus const *array_in, const Torus *zeros, uint32_t lwe_size,
|
||||
uint32_t num_lwes, const uint32_t num_zeros, const double input_variance,
|
||||
const double r_sigma, const double bound, uint32_t log_modulus) {
|
||||
Torus const *array_in, uint64_t const *indexes, const Torus *zeros,
|
||||
uint32_t lwe_size, uint32_t num_lwes, const uint32_t num_zeros,
|
||||
const double input_variance, const double r_sigma, const double bound,
|
||||
uint32_t log_modulus) {
|
||||
|
||||
if (lwe_size < 512) {
|
||||
PANIC("The lwe_size is less than 512, this is not supported\n");
|
||||
@@ -289,8 +287,8 @@ __host__ void host_improve_noise_modulus_switch(
|
||||
int num_threads = 512, num_blocks = num_lwes;
|
||||
|
||||
improve_noise_modulus_switch<Torus><<<num_blocks, num_threads, 0, stream>>>(
|
||||
array_out, array_in, zeros, lwe_size, num_zeros, input_variance, r_sigma,
|
||||
bound, log_modulus);
|
||||
array_out, array_in, indexes, zeros, lwe_size, num_zeros, input_variance,
|
||||
r_sigma, bound, log_modulus);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
|
||||
@@ -492,6 +492,7 @@ __host__ void host_fourier_transform_forward_as_integer_f128(
|
||||
batch_convert_u128_to_f128_as_integer<params>
|
||||
<<<grid_size, block_size, 0, stream>>>(d_re0, d_re1, d_im0, d_im1,
|
||||
d_standard);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// call negacyclic 128 bit forward fft.
|
||||
if (full_sm) {
|
||||
@@ -503,6 +504,7 @@ __host__ void host_fourier_transform_forward_as_integer_f128(
|
||||
<<<grid_size, block_size, shared_memory_size, stream>>>(
|
||||
d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
|
||||
}
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
cuda_memcpy_async_to_cpu(re0, d_re0, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
|
||||
@@ -63,7 +63,7 @@ void update_degrees_after_bitor(uint64_t *output_degrees,
|
||||
auto result = max;
|
||||
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max | j > result) {
|
||||
if ((max | j) > result) {
|
||||
result = max | j;
|
||||
}
|
||||
}
|
||||
@@ -82,7 +82,7 @@ void update_degrees_after_bitxor(uint64_t *output_degrees,
|
||||
|
||||
// Try every possibility to find the worst case
|
||||
for (uint j = 0; j < min + 1; j++) {
|
||||
if (max ^ j > result) {
|
||||
if ((max ^ j) > result) {
|
||||
result = max ^ j;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ __host__ void host_integer_radix_bitop_kb(
|
||||
update_degrees_after_bitor(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
} else if (mem_ptr->op == BITXOR) {
|
||||
} else if (mem_ptr->op == BITOP_TYPE::BITXOR) {
|
||||
update_degrees_after_bitxor(degrees, lwe_array_1->degrees,
|
||||
lwe_array_2->degrees,
|
||||
lwe_array_1->num_radix_blocks);
|
||||
|
||||
17
backends/tfhe-cuda-backend/cuda/src/integer/cast.cu
Normal file
17
backends/tfhe-cuda-backend/cuda/src/integer/cast.cu
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "cast.cuh"
|
||||
|
||||
void extend_radix_with_trivial_zero_blocks_msb_64(
|
||||
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
|
||||
void *const *streams, uint32_t const *gpu_indexes) {
|
||||
host_extend_radix_with_trivial_zero_blocks_msb<uint64_t>(
|
||||
output, input, (cudaStream_t *)streams, gpu_indexes);
|
||||
}
|
||||
|
||||
void trim_radix_blocks_lsb_64(CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
void *const *streams,
|
||||
uint32_t const *gpu_indexes) {
|
||||
|
||||
host_trim_radix_blocks_lsb<uint64_t>(output, input, (cudaStream_t *)streams,
|
||||
gpu_indexes);
|
||||
}
|
||||
36
backends/tfhe-cuda-backend/cuda/src/integer/cast.cuh
Normal file
36
backends/tfhe-cuda-backend/cuda/src/integer/cast.cuh
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef CAST_CUH
|
||||
#define CAST_CUH
|
||||
|
||||
#include "device.h"
|
||||
#include "integer.cuh"
|
||||
#include "integer/integer_utilities.h"
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_extend_radix_with_trivial_zero_blocks_msb(
|
||||
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes) {
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0], output,
|
||||
0, input->num_radix_blocks, input, 0,
|
||||
input->num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_trim_radix_blocks_lsb(CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes) {
|
||||
|
||||
const uint32_t input_start_lwe_index =
|
||||
input->num_radix_blocks - output->num_radix_blocks;
|
||||
|
||||
if (input->num_radix_blocks <= output->num_radix_blocks) {
|
||||
PANIC("Cuda error: input num blocks should be greater than output num "
|
||||
"blocks");
|
||||
}
|
||||
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], output, 0, output->num_radix_blocks, input,
|
||||
input_start_lwe_index, input->num_radix_blocks);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -456,7 +456,7 @@ __host__ void tree_sign_reduction(
|
||||
auto inner_tree_leaf = tree_buffer->tree_inner_leaf_lut;
|
||||
while (partial_block_count > 2) {
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], y, x, partial_block_count,
|
||||
4);
|
||||
message_modulus);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, x, y, bsks, ksks,
|
||||
@@ -477,16 +477,17 @@ __host__ void tree_sign_reduction(
|
||||
auto last_lut = tree_buffer->tree_last_leaf_lut;
|
||||
auto block_selector_f = tree_buffer->block_selector_f;
|
||||
std::function<Torus(Torus)> f;
|
||||
|
||||
auto num_bits_in_message = log2_int(params.message_modulus);
|
||||
if (partial_block_count == 2) {
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], y, x, partial_block_count,
|
||||
4);
|
||||
message_modulus);
|
||||
|
||||
f = [block_selector_f, sign_handler_f](Torus x) -> Torus {
|
||||
int msb = (x >> 2) & 3;
|
||||
int lsb = x & 3;
|
||||
f = [block_selector_f, sign_handler_f, num_bits_in_message,
|
||||
message_modulus](Torus x) -> Torus {
|
||||
Torus msb = (x >> num_bits_in_message) & (message_modulus - 1);
|
||||
Torus lsb = x & (message_modulus - 1);
|
||||
|
||||
int final_sign = block_selector_f(msb, lsb);
|
||||
Torus final_sign = block_selector_f(msb, lsb);
|
||||
return sign_handler_f(final_sign);
|
||||
};
|
||||
} else {
|
||||
|
||||
@@ -386,8 +386,9 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
subtraction_overflowed,
|
||||
at_least_one_upper_block_is_non_zero, 1);
|
||||
|
||||
int factor = (i) ? 3 : 2;
|
||||
int factor_lut_id = factor - 2;
|
||||
auto message_modulus = radix_params.message_modulus;
|
||||
int factor = (i) ? message_modulus - 1 : message_modulus - 2;
|
||||
int factor_lut_id = (i) ? 1 : 0;
|
||||
for (size_t k = 0;
|
||||
k < cleaned_merged_interesting_remainder->num_radix_blocks; k++) {
|
||||
copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
|
||||
|
||||
@@ -520,8 +520,7 @@ __host__ void integer_radix_apply_univariate_lookup_table_kb(
|
||||
if (num_radix_blocks > lut->num_blocks)
|
||||
PANIC("Cuda error: num radix blocks on which lut is applied should be "
|
||||
"smaller or equal to the number of lut radix blocks")
|
||||
if (num_radix_blocks > lwe_array_out->num_radix_blocks ||
|
||||
num_radix_blocks > lwe_array_in->num_radix_blocks)
|
||||
if (num_radix_blocks > lwe_array_out->num_radix_blocks)
|
||||
PANIC("Cuda error: num radix blocks on which lut is applied should be "
|
||||
"smaller or equal to the number of input & output radix blocks")
|
||||
|
||||
@@ -1291,7 +1290,7 @@ void host_compute_prefix_sum_hillis_steele(
|
||||
}
|
||||
|
||||
// This function is used to perform step 2 of Thomas' new propagation algorithm
|
||||
// Consist three steps:
|
||||
// Consists of three steps:
|
||||
// - propagates the carry within each group with cheap LWE operations stored in
|
||||
// simulators
|
||||
// - calculates the propagation state of each group
|
||||
@@ -1616,10 +1615,12 @@ __host__ void reduce_signs(
|
||||
auto message_modulus = params.message_modulus;
|
||||
auto carry_modulus = params.carry_modulus;
|
||||
|
||||
auto num_bits_in_message = log2_int(message_modulus);
|
||||
std::function<Torus(Torus)> reduce_two_orderings_function =
|
||||
[diff_buffer, sign_handler_f](Torus x) -> Torus {
|
||||
int msb = (x >> 2) & 3;
|
||||
int lsb = x & 3;
|
||||
[diff_buffer, sign_handler_f, num_bits_in_message,
|
||||
message_modulus](Torus x) -> Torus {
|
||||
Torus msb = (x >> num_bits_in_message) & (message_modulus - 1);
|
||||
Torus lsb = x & (message_modulus - 1);
|
||||
|
||||
return diff_buffer->tree_buffer->block_selector_f(msb, lsb);
|
||||
};
|
||||
@@ -1640,7 +1641,7 @@ __host__ void reduce_signs(
|
||||
|
||||
while (num_sign_blocks > 2) {
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], signs_b, signs_a,
|
||||
num_sign_blocks, 4);
|
||||
num_sign_blocks, message_modulus);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, signs_a, signs_b, bsks, ksks,
|
||||
ms_noise_reduction_key, lut, num_sign_blocks / 2);
|
||||
@@ -1669,7 +1670,8 @@ __host__ void reduce_signs(
|
||||
message_modulus, carry_modulus, final_lut_f, true);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], signs_b, signs_a, 2, 4);
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], signs_b, signs_a,
|
||||
num_sign_blocks, message_modulus);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, signs_array_out, signs_b, bsks, ksks,
|
||||
ms_noise_reduction_key, lut, 1);
|
||||
@@ -1677,8 +1679,8 @@ __host__ void reduce_signs(
|
||||
} else {
|
||||
|
||||
std::function<Torus(Torus)> final_lut_f =
|
||||
[mem_ptr, sign_handler_f](Torus x) -> Torus {
|
||||
return sign_handler_f(x & 3);
|
||||
[mem_ptr, sign_handler_f, message_modulus](Torus x) -> Torus {
|
||||
return sign_handler_f(x & (message_modulus - 1));
|
||||
};
|
||||
|
||||
auto lut = mem_ptr->diff_buffer->reduce_signs_lut;
|
||||
|
||||
@@ -226,7 +226,8 @@ uint64_t scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
CudaRadixCiphertextFFI *radix_lwe_vec, int8_t *mem_ptr, void *const *bsks,
|
||||
CudaRadixCiphertextFFI *radix_lwe_vec, bool reduce_degrees_for_single_carry_propagation,
|
||||
int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
|
||||
@@ -234,64 +235,59 @@ void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
if (radix_lwe_vec->num_radix_blocks % radix_lwe_out->num_radix_blocks != 0)
|
||||
PANIC("Cuda error: input vector length should be a multiple of the "
|
||||
"output's number of radix blocks")
|
||||
// FIXME: this should not be necessary, we should make sure sum_ctxt works in
|
||||
// the general case
|
||||
for (int i = 0; i < radix_lwe_vec->num_radix_blocks; i++) {
|
||||
radix_lwe_vec->degrees[i] = mem->params.message_modulus - 1;
|
||||
}
|
||||
switch (mem->params.polynomial_size) {
|
||||
case 512:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t, AmortizedDegree<512>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
case 1024:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<1024>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
case 2048:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<2048>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
case 4096:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<4096>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
case 8192:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<8192>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
case 16384:
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<16384>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, radix_lwe_out,
|
||||
radix_lwe_vec, bsks, (uint64_t **)(ksks), ms_noise_reduction_key, mem,
|
||||
radix_lwe_vec, reduce_degrees_for_single_carry_propagation, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem,
|
||||
radix_lwe_out->num_radix_blocks,
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks,
|
||||
nullptr);
|
||||
radix_lwe_vec->num_radix_blocks / radix_lwe_out->num_radix_blocks);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
|
||||
|
||||
@@ -20,28 +20,11 @@
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <omp.h>
|
||||
#include <queue>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
template <typename Torus>
|
||||
__global__ void smart_copy(Torus *dst, Torus *src, int32_t *id_out,
|
||||
int32_t *id_in, size_t lwe_size) {
|
||||
size_t tid = threadIdx.x;
|
||||
size_t b_id = blockIdx.x;
|
||||
size_t stride = blockDim.x;
|
||||
|
||||
auto input_id = id_in[b_id];
|
||||
auto output_id = id_out[b_id];
|
||||
|
||||
auto cur_src = (input_id >= 0) ? &src[input_id * lwe_size] : nullptr;
|
||||
auto cur_dst = &dst[output_id * lwe_size];
|
||||
|
||||
for (int i = tid; i < lwe_size; i += stride) {
|
||||
cur_dst[i] = (input_id >= 0) ? cur_src[i] : 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
__global__ void
|
||||
all_shifted_lhs_rhs(Torus const *radix_lwe_left, Torus *lsb_ciphertext,
|
||||
@@ -94,33 +77,173 @@ all_shifted_lhs_rhs(Torus const *radix_lwe_left, Torus *lsb_ciphertext,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ void tree_add_chunks(Torus *result_blocks, Torus *input_blocks,
|
||||
uint32_t chunk_size, uint32_t block_size,
|
||||
uint32_t num_blocks) {
|
||||
__global__ inline void radix_vec_to_columns(uint32_t *const *const columns,
|
||||
uint32_t *const columns_counter,
|
||||
const uint64_t *const degrees,
|
||||
const uint32_t num_radix_blocks,
|
||||
const uint32_t num_radix_in_vec) {
|
||||
|
||||
size_t stride = blockDim.x;
|
||||
size_t chunk_id = blockIdx.x;
|
||||
size_t chunk_elem_size = chunk_size * num_blocks * block_size;
|
||||
size_t radix_elem_size = num_blocks * block_size;
|
||||
auto src_chunk = &input_blocks[chunk_id * chunk_elem_size];
|
||||
auto dst_radix = &result_blocks[chunk_id * radix_elem_size];
|
||||
size_t block_stride = blockIdx.y * block_size;
|
||||
auto result = &dst_radix[block_stride];
|
||||
|
||||
// init shared mem with first radix of chunk
|
||||
size_t tid = threadIdx.x;
|
||||
for (int i = tid; i < block_size; i += stride) {
|
||||
result[i] = src_chunk[block_stride + i];
|
||||
}
|
||||
|
||||
// accumulate rest of the radixes
|
||||
for (int r_id = 1; r_id < chunk_size; r_id++) {
|
||||
auto cur_src_radix = &src_chunk[r_id * radix_elem_size];
|
||||
for (int i = tid; i < block_size; i += stride) {
|
||||
result[i] += cur_src_radix[block_stride + i];
|
||||
const uint32_t idx = threadIdx.x;
|
||||
size_t cnt = 0;
|
||||
for (int i = 0; i < num_radix_in_vec; i++) {
|
||||
size_t ct_id = i * num_radix_blocks + idx;
|
||||
if (degrees[ct_id] != 0) {
|
||||
columns[idx][cnt] = ct_id;
|
||||
++cnt;
|
||||
}
|
||||
}
|
||||
columns_counter[idx] = cnt;
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ inline void prepare_new_columns_and_pbs_indexes(
|
||||
uint32_t *const *const new_columns, uint32_t *const new_columns_counter,
|
||||
Torus *const pbs_indexes_in, Torus *const pbs_indexes_out,
|
||||
Torus *const lut_indexes, uint32_t *const pbs_counters,
|
||||
const uint32_t *const *const columns, const uint32_t *const columns_counter,
|
||||
const uint32_t chunk_size) {
|
||||
__shared__ uint32_t counter, sharedOr;
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
counter = 0;
|
||||
sharedOr = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
const uint32_t base_id = threadIdx.x;
|
||||
const uint32_t column_len = columns_counter[base_id];
|
||||
|
||||
uint32_t ct_count = 0;
|
||||
for (uint32_t i = 0; i + chunk_size <= column_len; i += chunk_size) {
|
||||
// those indexes are for message ciphertexts
|
||||
// for message ciphertexts in and out index should be same
|
||||
const uint32_t in_index = columns[base_id][i];
|
||||
new_columns[base_id][ct_count] = in_index;
|
||||
const uint32_t pbs_index = atomicAdd(&counter, 1);
|
||||
pbs_indexes_in[pbs_index] = in_index;
|
||||
pbs_indexes_out[pbs_index] = in_index;
|
||||
lut_indexes[pbs_index] = 0;
|
||||
++ct_count;
|
||||
}
|
||||
// ct1 ct2 ct3
|
||||
// pbs_indexes: 0, 1, 2
|
||||
// pbs_indexes: 2, 1, 0
|
||||
|
||||
__syncthreads();
|
||||
uint32_t message_count = counter;
|
||||
|
||||
if (base_id > 0) {
|
||||
const uint32_t prev_base_id = base_id - 1;
|
||||
const uint32_t prev_column_len = columns_counter[prev_base_id];
|
||||
|
||||
for (uint32_t i = 0; i + chunk_size <= prev_column_len; i += chunk_size) {
|
||||
// those indexes are for carry ciphertexts
|
||||
// for carry ciphertexts input is same as for message
|
||||
// output will be placed to next block in the column
|
||||
const uint32_t in_index = columns[prev_base_id][i];
|
||||
const uint32_t out_index = columns[prev_base_id][i + 1];
|
||||
new_columns[base_id][ct_count] = out_index;
|
||||
const uint32_t pbs_index = atomicAdd(&counter, 1);
|
||||
pbs_indexes_in[pbs_index] = in_index;
|
||||
pbs_indexes_out[pbs_index] = out_index;
|
||||
lut_indexes[pbs_index] = 1;
|
||||
++ct_count;
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t start_index = column_len - column_len % chunk_size;
|
||||
for (uint32_t i = start_index; i < column_len; ++i) {
|
||||
new_columns[base_id][ct_count] = columns[base_id][i];
|
||||
++ct_count;
|
||||
}
|
||||
|
||||
new_columns_counter[base_id] = ct_count;
|
||||
|
||||
if (ct_count > chunk_size) {
|
||||
atomicOr(&sharedOr, 1);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
pbs_counters[0] = counter;
|
||||
pbs_counters[1] = message_count;
|
||||
pbs_counters[2] = sharedOr;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ inline void prepare_final_pbs_indexes(
|
||||
Torus *const pbs_indexes_in, Torus *const pbs_indexes_out,
|
||||
Torus *const lut_indexes, const uint32_t num_radix_blocks) {
|
||||
int idx = threadIdx.x;
|
||||
pbs_indexes_in[idx] = idx % num_radix_blocks;
|
||||
pbs_indexes_out[idx] = idx + idx / num_radix_blocks;
|
||||
lut_indexes[idx] = idx / num_radix_blocks;
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ void calculate_chunks(Torus *const input_blocks,
|
||||
const uint32_t *const *const columns,
|
||||
const uint32_t *const columns_counter,
|
||||
const uint32_t chunk_size,
|
||||
const uint32_t block_size) {
|
||||
|
||||
const uint32_t part_size = blockDim.x;
|
||||
const uint32_t base_id = blockIdx.x;
|
||||
const uint32_t part_id = blockIdx.y;
|
||||
const uint32_t coef_id = part_id * part_size + threadIdx.x;
|
||||
|
||||
if (coef_id >= block_size)
|
||||
return;
|
||||
|
||||
const uint32_t column_len = columns_counter[base_id];
|
||||
|
||||
if (column_len >= chunk_size) {
|
||||
const uint32_t num_chunks = column_len / chunk_size;
|
||||
Torus result = 0;
|
||||
|
||||
for (uint32_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
|
||||
const uint32_t first_ct_id = columns[base_id][chunk_id * chunk_size];
|
||||
result = input_blocks[first_ct_id * block_size + coef_id];
|
||||
|
||||
for (uint32_t ct_id = 1; ct_id < chunk_size; ++ct_id) {
|
||||
const uint32_t cur_ct_id =
|
||||
columns[base_id][chunk_id * chunk_size + ct_id];
|
||||
result += input_blocks[cur_ct_id * block_size + coef_id];
|
||||
}
|
||||
|
||||
input_blocks[first_ct_id * block_size + coef_id] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__global__ void calculate_final_chunk_into_radix(
|
||||
Torus *const out_radix, const Torus *const input_blocks,
|
||||
const uint32_t *const *const columns, const uint32_t *const columns_counter,
|
||||
const uint32_t chunk_size, const uint32_t block_size) {
|
||||
|
||||
const uint32_t part_size = blockDim.x;
|
||||
const uint32_t base_id = blockIdx.x;
|
||||
const uint32_t part_id = blockIdx.y;
|
||||
const uint32_t coef_id = part_id * part_size + threadIdx.x;
|
||||
|
||||
if (coef_id >= block_size)
|
||||
return;
|
||||
|
||||
const uint32_t column_len = columns_counter[base_id];
|
||||
|
||||
Torus result = 0;
|
||||
if (column_len) {
|
||||
const uint32_t first_ct_id = columns[base_id][0];
|
||||
result = input_blocks[first_ct_id * block_size + coef_id];
|
||||
|
||||
for (uint32_t i = 1; i < column_len; ++i) {
|
||||
const uint32_t cur_ct_it = columns[base_id][i];
|
||||
result += input_blocks[cur_ct_it * block_size + coef_id];
|
||||
}
|
||||
}
|
||||
out_radix[base_id * block_size + coef_id] = result;
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
@@ -167,6 +290,65 @@ __global__ void fill_radix_from_lsb_msb(Torus *result_blocks, Torus *lsb_blocks,
|
||||
(process_msb) ? cur_msb_ct[params::degree] : 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool at_least_one_column_needs_processing(
|
||||
const uint64_t *const degrees, const uint32_t num_radix_blocks,
|
||||
const uint32_t num_radix_in_vec, const uint32_t chunk_size) {
|
||||
std::vector<uint32_t> columns_count(num_radix_blocks, 0);
|
||||
|
||||
for (size_t column = 0; column < num_radix_blocks; ++column) {
|
||||
for (size_t block = 0; block < num_radix_in_vec; ++block) {
|
||||
const size_t block_index = block * num_radix_blocks + column;
|
||||
if (degrees[block_index]) {
|
||||
columns_count[column]++;
|
||||
if (columns_count[column] > chunk_size) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void calculate_final_degrees(uint64_t *const out_degrees,
|
||||
const uint64_t *const input_degrees,
|
||||
size_t num_blocks, size_t num_radix_in_vec,
|
||||
size_t chunk_size,
|
||||
uint64_t message_modulus) {
|
||||
|
||||
auto get_degree = [message_modulus](uint64_t degree) -> uint64_t {
|
||||
return std::min(message_modulus - 1, degree);
|
||||
};
|
||||
std::vector<std::queue<uint64_t>> columns(num_blocks);
|
||||
for (size_t i = 0; i < num_radix_in_vec; ++i) {
|
||||
for (size_t j = 0; j < num_blocks; ++j) {
|
||||
if (input_degrees[i * num_blocks + j])
|
||||
columns[j].push(input_degrees[i * num_blocks + j]);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
auto &col = columns[i];
|
||||
while (col.size() > 1) {
|
||||
uint32_t cur_degree = 0;
|
||||
size_t mn = std::min(chunk_size, col.size());
|
||||
for (int j = 0; j < mn; ++j) {
|
||||
cur_degree += col.front();
|
||||
col.pop();
|
||||
}
|
||||
const uint64_t new_degree = get_degree(cur_degree);
|
||||
col.push(new_degree);
|
||||
if ((i + 1) < num_blocks) {
|
||||
columns[i + 1].push(new_degree);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_blocks; i++) {
|
||||
out_degrees[i] = (columns[i].empty()) ? 0 : columns[i].front();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ uint64_t scratch_cuda_integer_partial_sum_ciphertexts_vec_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
@@ -181,15 +363,107 @@ __host__ uint64_t scratch_cuda_integer_partial_sum_ciphertexts_vec_kb(
|
||||
return size_tracker;
|
||||
}
|
||||
|
||||
__global__ inline void DEBUG_PRINT_COLUMNS(uint32_t *const *const columns,
|
||||
uint32_t *const columns_counter,
|
||||
const uint32_t num_radix_blocks) {
|
||||
printf("cuda_columns_counter:\n");
|
||||
for (int i = 0; i < num_radix_blocks; i++) {
|
||||
printf("%d ", columns_counter[i]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("cuda_columns:\n");
|
||||
|
||||
for (int i = 0; i < num_radix_blocks; i++) {
|
||||
printf("column[%d]: ", i);
|
||||
for (int j = 0; j < columns_counter[i]; j++)
|
||||
{
|
||||
printf("%d ", columns[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
}
|
||||
|
||||
__global__ inline void DEBUG_PRINT_COLUMNS_DATA(uint32_t *const *const columns,
|
||||
uint32_t *const columns_counter,
|
||||
uint64_t* data,
|
||||
const uint32_t num_radix_blocks, size_t lwe_size) {
|
||||
|
||||
uint64_t delta = 576460752303423488ULL;
|
||||
__syncthreads();
|
||||
printf("cuda_new_columns:\n");
|
||||
__syncthreads();
|
||||
for (int i = 0; i < num_radix_blocks; i++) {
|
||||
__syncthreads();
|
||||
printf("column[%d]: ", i);
|
||||
__syncthreads();
|
||||
for (int j = 0; j < columns_counter[i]; j++)
|
||||
{
|
||||
__syncthreads();
|
||||
auto cur_data =data[ columns[i][j] * lwe_size + lwe_size - 1];
|
||||
cur_data /= delta;
|
||||
printf("%llu ", cur_data);
|
||||
__syncthreads();
|
||||
}
|
||||
__syncthreads();
|
||||
printf("\n");
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
printf("\n");
|
||||
__syncthreads();
|
||||
|
||||
}
|
||||
|
||||
template<typename Torus, bool input, bool clear>
|
||||
__global__ inline void DEBUG_PRINT_PBS_DATA(Torus * data, Torus* input_indexes, Torus*
|
||||
output_indexes, Torus *lut_indexes, size_t lwe_size, int num) {
|
||||
printf("input_output_indexes: \n");
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
auto input_val = data[input_indexes[i] * lwe_size + lwe_size -1];
|
||||
auto output_val = data[output_indexes[i] * lwe_size + lwe_size -1];
|
||||
|
||||
auto val = input ? input_val : output_val;
|
||||
auto val_clear = clear ? val / 576460752303423488ULL : val;
|
||||
|
||||
printf("%d %lu %lu %lu %lu %lu\n", i, input_indexes[i], output_indexes[i], lut_indexes[i],
|
||||
val_clear, val);
|
||||
}
|
||||
}
|
||||
|
||||
//template<typename Torus>
|
||||
//__global__ inline void DEBUG_PRINT_RADIX(Torus * data, size_t num_blocks, size_t lwe_size) {
|
||||
// for (int i = 0; i < num_blocks; i++) {
|
||||
// auto val = data[i * lwe_size + lwe_size - 1];
|
||||
// auto val_clear = val / 576460752303423488ULL;
|
||||
// printf("cuda_partial_sum_result: %lu %lu\n", val, val_clear);
|
||||
// }
|
||||
//}
|
||||
|
||||
template <typename Torus, class params>
|
||||
__host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
CudaRadixCiphertextFFI *terms, void *const *bsks, uint64_t *const *ksks,
|
||||
CudaRadixCiphertextFFI *terms, bool reduce_degrees_for_single_carry_propagation, void *const
|
||||
*bsks, uint64_t *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
int_sum_ciphertexts_vec_memory<uint64_t> *mem_ptr,
|
||||
uint32_t num_radix_blocks, uint32_t num_radix_in_vec,
|
||||
int_radix_lut<Torus> *reused_lut) {
|
||||
uint32_t num_radix_blocks, uint32_t num_radix_in_vec) {
|
||||
// cudaDeviceSynchronize();
|
||||
// print_body<Torus>("cuda_input_partial_sum", (Torus*)terms->ptr, num_radix_blocks * num_radix_in_vec,
|
||||
// 2048,
|
||||
// 576460752303423488ULL);
|
||||
|
||||
// for (int i = 0; i <num_radix_blocks * num_radix_in_vec; i++ ) {
|
||||
// printf("cuda_input_degrees: %d\n", terms->degrees[i]);
|
||||
// }
|
||||
// cudaDeviceSynchronize();
|
||||
auto big_lwe_dimension = mem_ptr->params.big_lwe_dimension;
|
||||
auto big_lwe_size = big_lwe_dimension + 1;
|
||||
|
||||
if (terms->lwe_dimension != radix_lwe_out->lwe_dimension)
|
||||
PANIC("Cuda error: output and input radix ciphertexts should have the same "
|
||||
@@ -199,22 +473,28 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
PANIC("Cuda error: input vector does not have enough blocks")
|
||||
if (num_radix_blocks > radix_lwe_out->num_radix_blocks)
|
||||
PANIC("Cuda error: output does not have enough blocks")
|
||||
auto new_blocks = mem_ptr->new_blocks;
|
||||
auto new_blocks_copy = mem_ptr->new_blocks_copy;
|
||||
auto old_blocks = mem_ptr->old_blocks;
|
||||
|
||||
auto current_blocks = mem_ptr->current_blocks;
|
||||
auto small_lwe_vector = mem_ptr->small_lwe_vector;
|
||||
auto d_degrees = mem_ptr->d_degrees;
|
||||
auto d_columns = mem_ptr->d_columns;
|
||||
auto d_columns_counter = mem_ptr->d_columns_counter;
|
||||
auto d_new_columns = mem_ptr->d_new_columns;
|
||||
auto d_new_columns_counter = mem_ptr->d_new_columns_counter;
|
||||
auto d_pbs_indexes_in = mem_ptr->luts_message_carry->lwe_indexes_in;
|
||||
auto d_pbs_indexes_out = mem_ptr->luts_message_carry->lwe_indexes_out;
|
||||
auto d_pbs_counters = mem_ptr->d_pbs_counters;
|
||||
|
||||
auto d_smart_copy_in = mem_ptr->d_smart_copy_in;
|
||||
auto d_smart_copy_out = mem_ptr->d_smart_copy_out;
|
||||
auto luts_message_carry = mem_ptr->luts_message_carry;
|
||||
|
||||
auto message_modulus = mem_ptr->params.message_modulus;
|
||||
auto carry_modulus = mem_ptr->params.carry_modulus;
|
||||
auto big_lwe_dimension = mem_ptr->params.big_lwe_dimension;
|
||||
auto big_lwe_size = big_lwe_dimension + 1;
|
||||
auto glwe_dimension = mem_ptr->params.glwe_dimension;
|
||||
auto polynomial_size = mem_ptr->params.polynomial_size;
|
||||
auto small_lwe_dimension = mem_ptr->params.small_lwe_dimension;
|
||||
auto small_lwe_size = small_lwe_dimension + 1;
|
||||
auto chunk_size =
|
||||
(mem_ptr->params.message_modulus * mem_ptr->params.carry_modulus - 1) /
|
||||
(mem_ptr->params.message_modulus - 1);
|
||||
|
||||
size_t total_blocks_in_vec = num_radix_blocks * num_radix_in_vec;
|
||||
|
||||
// In the case of extracting a single LWE this parameters are dummy
|
||||
uint32_t num_many_lut = 1;
|
||||
@@ -228,244 +508,202 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
terms, 0, num_radix_blocks);
|
||||
return;
|
||||
}
|
||||
if (old_blocks != terms) {
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], old_blocks,
|
||||
terms);
|
||||
}
|
||||
|
||||
if (num_radix_in_vec == 2) {
|
||||
CudaRadixCiphertextFFI old_blocks_slice;
|
||||
as_radix_ciphertext_slice<Torus>(&old_blocks_slice, old_blocks,
|
||||
num_radix_blocks, 2 * num_radix_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks_slice, num_radix_blocks);
|
||||
CudaRadixCiphertextFFI terms_slice;
|
||||
as_radix_ciphertext_slice<Torus>(&terms_slice, terms, num_radix_blocks,
|
||||
2 * num_radix_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, terms,
|
||||
&terms_slice, num_radix_blocks);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t r = num_radix_in_vec;
|
||||
size_t total_modulus = message_modulus * carry_modulus;
|
||||
size_t message_max = message_modulus - 1;
|
||||
size_t chunk_size = (total_modulus - 1) / message_max;
|
||||
|
||||
size_t h_lwe_idx_in[terms->num_radix_blocks];
|
||||
size_t h_lwe_idx_out[terms->num_radix_blocks];
|
||||
int32_t h_smart_copy_in[terms->num_radix_blocks];
|
||||
int32_t h_smart_copy_out[terms->num_radix_blocks];
|
||||
|
||||
/// Here it is important to query the default max shared memory on device 0
|
||||
/// instead of cuda_get_max_shared_memory,
|
||||
/// to avoid bugs with tree_add_chunks trying to use too much shared memory
|
||||
auto max_shared_memory = 0;
|
||||
check_cuda_error(cudaDeviceGetAttribute(
|
||||
&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock, 0));
|
||||
|
||||
// create lut object for message and carry
|
||||
// we allocate luts_message_carry in the host function (instead of scratch)
|
||||
// to reduce average memory consumption
|
||||
int_radix_lut<Torus> *luts_message_carry;
|
||||
size_t ch_amount = r / chunk_size;
|
||||
if (!ch_amount)
|
||||
ch_amount++;
|
||||
if (reused_lut == nullptr) {
|
||||
luts_message_carry = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->params, 2,
|
||||
2 * ch_amount * num_radix_blocks, true, nullptr);
|
||||
} else {
|
||||
luts_message_carry = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->params, 2,
|
||||
2 * ch_amount * num_radix_blocks, reused_lut, true, nullptr);
|
||||
if (current_blocks != terms) {
|
||||
copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0],
|
||||
current_blocks, terms);
|
||||
}
|
||||
auto message_acc = luts_message_carry->get_lut(0, 0);
|
||||
auto carry_acc = luts_message_carry->get_lut(0, 1);
|
||||
|
||||
// define functions for each accumulator
|
||||
auto lut_f_message = [message_modulus](Torus x) -> Torus {
|
||||
return x % message_modulus;
|
||||
};
|
||||
auto lut_f_carry = [message_modulus](Torus x) -> Torus {
|
||||
return x / message_modulus;
|
||||
};
|
||||
cuda_memcpy_async_to_gpu(d_degrees, current_blocks->degrees,
|
||||
total_blocks_in_vec * sizeof(uint64_t), streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
// generate accumulators
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], message_acc,
|
||||
luts_message_carry->get_degree(0), luts_message_carry->get_max_degree(0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
lut_f_message, true);
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], carry_acc, luts_message_carry->get_degree(1),
|
||||
luts_message_carry->get_max_degree(1), glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, lut_f_carry, true);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
radix_vec_to_columns<<<1, num_radix_blocks, 0, streams[0]>>>(
|
||||
d_columns, d_columns_counter, d_degrees, num_radix_blocks,
|
||||
num_radix_in_vec);
|
||||
|
||||
while (r > 2) {
|
||||
size_t cur_total_blocks = r * num_radix_blocks;
|
||||
size_t ch_amount = r / chunk_size;
|
||||
if (!ch_amount)
|
||||
ch_amount++;
|
||||
dim3 add_grid(ch_amount, num_radix_blocks, 1);
|
||||
bool needs_processing = at_least_one_column_needs_processing(
|
||||
current_blocks->degrees, num_radix_blocks, num_radix_in_vec, chunk_size);
|
||||
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
tree_add_chunks<Torus><<<add_grid, 512, 0, streams[0]>>>(
|
||||
(Torus *)new_blocks->ptr, (Torus *)old_blocks->ptr,
|
||||
std::min(r, chunk_size), big_lwe_size, num_radix_blocks);
|
||||
int number_of_threads = min(256, params::degree);
|
||||
int part_count = (big_lwe_size + number_of_threads - 1) / number_of_threads;
|
||||
const dim3 number_of_blocks_2d(num_radix_blocks, part_count, 1);
|
||||
|
||||
check_cuda_error(cudaGetLastError());
|
||||
// h_pbs_counters[0] - total ciphertexts
|
||||
// h_pbs_counters[1] - message ciphertexts
|
||||
// h_pbs_counters[2] - at_least_one_column_needs_processing
|
||||
uint32_t *h_pbs_counters;
|
||||
cudaMallocHost((void **)&h_pbs_counters, 3 * sizeof(uint32_t));
|
||||
if (mem_ptr->mem_reuse) {
|
||||
mem_ptr->setup_lookup_tables(streams, gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
size_t total_count = 0;
|
||||
size_t message_count = 0;
|
||||
size_t carry_count = 0;
|
||||
size_t sm_copy_count = 0;
|
||||
|
||||
generate_ids_update_degrees(
|
||||
terms->degrees, h_lwe_idx_in, h_lwe_idx_out, h_smart_copy_in,
|
||||
h_smart_copy_out, ch_amount, r, num_radix_blocks, chunk_size,
|
||||
message_max, total_count, message_count, carry_count, sm_copy_count);
|
||||
auto lwe_indexes_in = luts_message_carry->lwe_indexes_in;
|
||||
auto lwe_indexes_out = luts_message_carry->lwe_indexes_out;
|
||||
luts_message_carry->set_lwe_indexes(streams[0], gpu_indexes[0],
|
||||
h_lwe_idx_in, h_lwe_idx_out);
|
||||
while (needs_processing) {
|
||||
// cudaDeviceSynchronize();
|
||||
// DEBUG_PRINT_COLUMNS<<<1, 1, 0, streams[0]>>>(d_columns, d_columns_counter, num_radix_blocks);
|
||||
// DEBUG_PRINT_COLUMNS_DATA<<<1, 1, 0, streams[0]>>>(d_columns, d_columns_counter, (uint64_t *)
|
||||
// (current_blocks->ptr), num_radix_blocks, big_lwe_size);
|
||||
calculate_chunks<Torus>
|
||||
<<<number_of_blocks_2d, number_of_threads, 0, streams[0]>>>(
|
||||
(Torus *)(current_blocks->ptr), d_columns, d_columns_counter,
|
||||
chunk_size, big_lwe_size);
|
||||
|
||||
size_t copy_size = sm_copy_count * sizeof(int32_t);
|
||||
cuda_memcpy_async_to_gpu(d_smart_copy_in, h_smart_copy_in, copy_size,
|
||||
streams[0], gpu_indexes[0]);
|
||||
cuda_memcpy_async_to_gpu(d_smart_copy_out, h_smart_copy_out, copy_size,
|
||||
streams[0], gpu_indexes[0]);
|
||||
prepare_new_columns_and_pbs_indexes<<<1, num_radix_blocks, 0, streams[0]>>>(
|
||||
d_new_columns, d_new_columns_counter, d_pbs_indexes_in,
|
||||
d_pbs_indexes_out, luts_message_carry->get_lut_indexes(0, 0),
|
||||
d_pbs_counters, d_columns, d_columns_counter, chunk_size);
|
||||
|
||||
// inside d_smart_copy_in there are only -1 values
|
||||
// it's fine to call smart_copy with same pointer
|
||||
// as source and destination
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], new_blocks_copy, 0, r * num_radix_blocks,
|
||||
new_blocks, 0, r * num_radix_blocks);
|
||||
smart_copy<Torus><<<sm_copy_count, 1024, 0, streams[0]>>>(
|
||||
(Torus *)new_blocks->ptr, (Torus *)new_blocks_copy->ptr,
|
||||
d_smart_copy_out, d_smart_copy_in, big_lwe_size);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
cuda_memcpy_async_to_cpu(h_pbs_counters, d_pbs_counters,
|
||||
3 * sizeof(uint32_t), streams[0], gpu_indexes[0]);
|
||||
|
||||
if (carry_count > 0)
|
||||
cuda_set_value_async<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
luts_message_carry->get_lut_indexes(0, message_count), 1,
|
||||
carry_count);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
const uint32_t total_ciphertexts = h_pbs_counters[0];
|
||||
const uint32_t total_messages = h_pbs_counters[1];
|
||||
needs_processing = (h_pbs_counters[2] != 0);
|
||||
|
||||
auto active_gpu_count = get_active_gpu_count(total_ciphertexts, gpu_count);
|
||||
|
||||
// DEBUG_PRINT_PBS_DATA<Torus, true, true><<<1, 1, 0, streams[0]>>>(
|
||||
// (Torus *)(current_blocks->ptr), d_pbs_indexes_in, d_pbs_indexes_out,
|
||||
// luts_message_carry->get_lut_indexes(0, 0), big_lwe_size, total_ciphertexts
|
||||
// );
|
||||
|
||||
/// For multi GPU execution we create vectors of pointers for inputs and
|
||||
/// outputs
|
||||
std::vector<Torus *> new_blocks_vec = luts_message_carry->lwe_array_in_vec;
|
||||
std::vector<Torus *> small_lwe_vector_vec =
|
||||
luts_message_carry->lwe_after_ks_vec;
|
||||
std::vector<Torus *> lwe_after_pbs_vec =
|
||||
luts_message_carry->lwe_after_pbs_vec;
|
||||
std::vector<Torus *> lwe_trivial_indexes_vec =
|
||||
luts_message_carry->lwe_trivial_indexes_vec;
|
||||
|
||||
auto active_gpu_count = get_active_gpu_count(total_count, gpu_count);
|
||||
if (active_gpu_count == 1) {
|
||||
/// Apply KS to go from a big LWE dimension to a small LWE dimension
|
||||
/// After this keyswitch execution, we need to synchronize the streams
|
||||
/// because the keyswitch and PBS do not operate on the same number of
|
||||
/// inputs
|
||||
execute_keyswitch_async<Torus>(
|
||||
streams, gpu_indexes, 1, (Torus *)small_lwe_vector->ptr,
|
||||
lwe_indexes_in, (Torus *)new_blocks->ptr, lwe_indexes_in, ksks,
|
||||
polynomial_size * glwe_dimension, small_lwe_dimension,
|
||||
mem_ptr->params.ks_base_log, mem_ptr->params.ks_level, message_count);
|
||||
|
||||
/// Apply PBS to apply a LUT, reduce the noise and go from a small LWE
|
||||
/// dimension to a big LWE dimension
|
||||
execute_pbs_async<Torus>(
|
||||
streams, gpu_indexes, 1, (Torus *)new_blocks->ptr, lwe_indexes_out,
|
||||
luts_message_carry->lut_vec, luts_message_carry->lut_indexes_vec,
|
||||
(Torus *)small_lwe_vector->ptr, lwe_indexes_in, bsks,
|
||||
ms_noise_reduction_key, luts_message_carry->buffer, glwe_dimension,
|
||||
small_lwe_dimension, polynomial_size, mem_ptr->params.pbs_base_log,
|
||||
mem_ptr->params.pbs_level, mem_ptr->params.grouping_factor,
|
||||
total_count, mem_ptr->params.pbs_type, num_many_lut, lut_stride);
|
||||
} else {
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
multi_gpu_scatter_lwe_async<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, new_blocks_vec,
|
||||
(Torus *)new_blocks->ptr, luts_message_carry->h_lwe_indexes_in,
|
||||
luts_message_carry->using_trivial_lwe_indexes, message_count,
|
||||
big_lwe_size);
|
||||
|
||||
/// Apply KS to go from a big LWE dimension to a small LWE dimension
|
||||
/// After this keyswitch execution, we need to synchronize the streams
|
||||
/// because the keyswitch and PBS do not operate on the same number of
|
||||
/// inputs
|
||||
execute_keyswitch_async<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, small_lwe_vector_vec,
|
||||
lwe_trivial_indexes_vec, new_blocks_vec, lwe_trivial_indexes_vec,
|
||||
d_pbs_indexes_in, (Torus *)current_blocks->ptr, d_pbs_indexes_in,
|
||||
ksks, big_lwe_dimension, small_lwe_dimension,
|
||||
mem_ptr->params.ks_base_log, mem_ptr->params.ks_level, total_count);
|
||||
mem_ptr->params.ks_base_log, mem_ptr->params.ks_level,
|
||||
total_messages);
|
||||
|
||||
/// Copy data back to GPU 0, rebuild the lwe array, and scatter again on a
|
||||
/// different configuration
|
||||
multi_gpu_gather_lwe_async<Torus>(
|
||||
streams, gpu_indexes, gpu_count, (Torus *)small_lwe_vector->ptr,
|
||||
small_lwe_vector_vec, luts_message_carry->h_lwe_indexes_in,
|
||||
luts_message_carry->using_trivial_lwe_indexes, message_count,
|
||||
small_lwe_size);
|
||||
/// Synchronize all GPUs
|
||||
for (uint i = 0; i < active_gpu_count; i++) {
|
||||
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
|
||||
}
|
||||
|
||||
multi_gpu_scatter_lwe_async<Torus>(
|
||||
streams, gpu_indexes, gpu_count, small_lwe_vector_vec,
|
||||
(Torus *)small_lwe_vector->ptr, luts_message_carry->h_lwe_indexes_in,
|
||||
luts_message_carry->using_trivial_lwe_indexes, total_count,
|
||||
small_lwe_size);
|
||||
|
||||
/// Apply PBS to apply a LUT, reduce the noise and go from a small LWE
|
||||
/// dimension to a big LWE dimension
|
||||
execute_pbs_async<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, lwe_after_pbs_vec,
|
||||
lwe_trivial_indexes_vec, luts_message_carry->lut_vec,
|
||||
luts_message_carry->lut_indexes_vec, small_lwe_vector_vec,
|
||||
lwe_trivial_indexes_vec, bsks, ms_noise_reduction_key,
|
||||
streams, gpu_indexes, 1, (Torus *)current_blocks->ptr,
|
||||
d_pbs_indexes_out, luts_message_carry->lut_vec,
|
||||
luts_message_carry->lut_indexes_vec, (Torus *)small_lwe_vector->ptr,
|
||||
d_pbs_indexes_in, bsks, ms_noise_reduction_key,
|
||||
luts_message_carry->buffer, glwe_dimension, small_lwe_dimension,
|
||||
polynomial_size, mem_ptr->params.pbs_base_log,
|
||||
mem_ptr->params.pbs_level, mem_ptr->params.grouping_factor,
|
||||
total_count, mem_ptr->params.pbs_type, num_many_lut, lut_stride);
|
||||
total_ciphertexts, mem_ptr->params.pbs_type, num_many_lut,
|
||||
lut_stride);
|
||||
} else {
|
||||
cuda_memcpy_async_to_cpu(luts_message_carry->h_lwe_indexes_in,
|
||||
luts_message_carry->lwe_indexes_in,
|
||||
total_ciphertexts * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_memcpy_async_to_cpu(luts_message_carry->h_lwe_indexes_out,
|
||||
luts_message_carry->lwe_indexes_out,
|
||||
total_ciphertexts * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
multi_gpu_gather_lwe_async<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, (Torus *)new_blocks->ptr,
|
||||
lwe_after_pbs_vec, luts_message_carry->h_lwe_indexes_out,
|
||||
luts_message_carry->using_trivial_lwe_indexes, total_count,
|
||||
big_lwe_size);
|
||||
/// Synchronize all GPUs
|
||||
for (uint i = 0; i < active_gpu_count; i++) {
|
||||
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
|
||||
}
|
||||
}
|
||||
for (uint i = 0; i < total_count; i++) {
|
||||
auto degrees_index = luts_message_carry->h_lut_indexes[i];
|
||||
new_blocks->degrees[i] = luts_message_carry->degrees[degrees_index];
|
||||
new_blocks->noise_levels[i] = NoiseLevel::NOMINAL;
|
||||
luts_message_carry->using_trivial_lwe_indexes = false;
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, current_blocks,
|
||||
current_blocks, bsks, ksks, ms_noise_reduction_key,
|
||||
luts_message_carry, total_ciphertexts);
|
||||
}
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
// DEBUG_PRINT_PBS_DATA<Torus, false, true><<<1, 1, 0, streams[0]>>>(
|
||||
// (Torus *)(current_blocks->ptr), d_pbs_indexes_in, d_pbs_indexes_out,
|
||||
// luts_message_carry->get_lut_indexes(0, 0), big_lwe_size, total_ciphertexts
|
||||
// );
|
||||
|
||||
int rem_blocks = (r > chunk_size) ? r % chunk_size * num_radix_blocks : 0;
|
||||
int new_blocks_created = 2 * ch_amount * num_radix_blocks;
|
||||
|
||||
if (rem_blocks > 0)
|
||||
copy_radix_ciphertext_slice_async<Torus>(
|
||||
streams[0], gpu_indexes[0], new_blocks, new_blocks_created,
|
||||
new_blocks_created + rem_blocks, old_blocks,
|
||||
cur_total_blocks - rem_blocks, cur_total_blocks);
|
||||
std::swap(new_blocks, old_blocks);
|
||||
r = (new_blocks_created + rem_blocks) / num_radix_blocks;
|
||||
std::swap(d_columns, d_new_columns);
|
||||
std::swap(d_columns_counter, d_new_columns_counter);
|
||||
}
|
||||
luts_message_carry->release(streams, gpu_indexes, gpu_count);
|
||||
delete (luts_message_carry);
|
||||
|
||||
CudaRadixCiphertextFFI old_blocks_slice;
|
||||
as_radix_ciphertext_slice<Torus>(&old_blocks_slice, old_blocks,
|
||||
num_radix_blocks, 2 * num_radix_blocks);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out, old_blocks,
|
||||
&old_blocks_slice, num_radix_blocks);
|
||||
cudaFreeHost(h_pbs_counters);
|
||||
calculate_final_chunk_into_radix<Torus>
|
||||
<<<number_of_blocks_2d, number_of_threads, 0, streams[0]>>>(
|
||||
(Torus *)(radix_lwe_out->ptr), (Torus *)(current_blocks->ptr),
|
||||
d_columns, d_columns_counter, chunk_size, big_lwe_size);
|
||||
|
||||
if (reduce_degrees_for_single_carry_propagation) {
|
||||
prepare_final_pbs_indexes<Torus><<<1, 2 * num_radix_blocks, 0, streams[0]>>>(
|
||||
d_pbs_indexes_in, d_pbs_indexes_out,
|
||||
luts_message_carry->get_lut_indexes(0, 0), num_radix_blocks);
|
||||
|
||||
cuda_memset_async(
|
||||
(Torus *)(current_blocks->ptr) + big_lwe_size * num_radix_blocks, 0,
|
||||
big_lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
|
||||
auto active_gpu_count = get_active_gpu_count(2 * num_radix_blocks, gpu_count);
|
||||
|
||||
if (active_gpu_count == 1) {
|
||||
execute_keyswitch_async<Torus>(
|
||||
streams, gpu_indexes, 1, (Torus *)small_lwe_vector->ptr,
|
||||
d_pbs_indexes_in, (Torus *)radix_lwe_out->ptr, d_pbs_indexes_in, ksks,
|
||||
big_lwe_dimension, small_lwe_dimension, mem_ptr->params.ks_base_log,
|
||||
mem_ptr->params.ks_level, num_radix_blocks);
|
||||
|
||||
execute_pbs_async<Torus>(
|
||||
streams, gpu_indexes, 1, (Torus *)current_blocks->ptr,
|
||||
d_pbs_indexes_out, luts_message_carry->lut_vec,
|
||||
luts_message_carry->lut_indexes_vec, (Torus *)small_lwe_vector->ptr,
|
||||
d_pbs_indexes_in, bsks, ms_noise_reduction_key,
|
||||
luts_message_carry->buffer, glwe_dimension, small_lwe_dimension,
|
||||
polynomial_size, mem_ptr->params.pbs_base_log,
|
||||
mem_ptr->params.pbs_level, mem_ptr->params.grouping_factor,
|
||||
2 * num_radix_blocks, mem_ptr->params.pbs_type, num_many_lut,
|
||||
lut_stride);
|
||||
} else {
|
||||
cuda_memcpy_async_to_cpu(luts_message_carry->h_lwe_indexes_in,
|
||||
luts_message_carry->lwe_indexes_in,
|
||||
2 * num_radix_blocks * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_memcpy_async_to_cpu(luts_message_carry->h_lwe_indexes_out,
|
||||
luts_message_carry->lwe_indexes_out,
|
||||
2 * num_radix_blocks * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
luts_message_carry->using_trivial_lwe_indexes = false;
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, current_blocks, radix_lwe_out,
|
||||
bsks, ksks, ms_noise_reduction_key, luts_message_carry,
|
||||
2 * num_radix_blocks);
|
||||
}
|
||||
// cudaDeviceSynchronize();
|
||||
// print_body<Torus>("cuda_before_add", (Torus*)radix_lwe_out->ptr, num_radix_blocks, 2048,
|
||||
// 576460752303423488ULL);
|
||||
// cudaDeviceSynchronize();
|
||||
calculate_final_degrees(radix_lwe_out->degrees, terms->degrees,
|
||||
num_radix_blocks, num_radix_in_vec, chunk_size,
|
||||
mem_ptr->params.message_modulus);
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
CudaRadixCiphertextFFI current_blocks_slice;
|
||||
as_radix_ciphertext_slice<Torus>(¤t_blocks_slice, current_blocks,
|
||||
num_radix_blocks, 2 * num_radix_blocks);
|
||||
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], radix_lwe_out,
|
||||
current_blocks, ¤t_blocks_slice, num_radix_blocks);
|
||||
// printf("add_happened\n");
|
||||
}
|
||||
|
||||
|
||||
// cudaDeviceSynchronize();
|
||||
//
|
||||
// print_body<Torus>("cuda_out_after_add", (Torus*)radix_lwe_out->ptr, num_radix_blocks, 2048,
|
||||
// 576460752303423488ULL);
|
||||
// cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
@@ -599,14 +837,28 @@ __host__ void host_integer_mult_radix_kb(
|
||||
size_t b_id = i % num_blocks;
|
||||
terms_degree_msb[i] = (b_id > r_id) ? message_modulus - 2 : 0;
|
||||
}
|
||||
|
||||
|
||||
for (int i = 0; i < num_blocks * 2 * num_blocks; i++)
|
||||
{
|
||||
auto cur_ptr = (Torus*)vector_result_sb->ptr;
|
||||
cur_ptr += i * 2049 + 2048;
|
||||
print_debug<Torus>("", cur_ptr, 1);
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_blocks * 2 * num_blocks; i++) {
|
||||
printf("%d\n", vector_result_sb->degrees[i]);
|
||||
}
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<Torus, params>(
|
||||
streams, gpu_indexes, gpu_count, radix_lwe_out, vector_result_sb, bsks,
|
||||
streams, gpu_indexes, gpu_count, radix_lwe_out, vector_result_sb, true, bsks,
|
||||
ksks, ms_noise_reduction_key, mem_ptr->sum_ciphertexts_mem, num_blocks,
|
||||
2 * num_blocks, mem_ptr->luts_array);
|
||||
2 * num_blocks);
|
||||
|
||||
auto scp_mem_ptr = mem_ptr->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
|
||||
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, radix_lwe_out, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, ms_noise_reduction_key, requested_flag,
|
||||
|
||||
@@ -115,13 +115,10 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
set_zero_radix_ciphertext_slice_async<T>(streams[0], gpu_indexes[0],
|
||||
lwe_array, 0, num_radix_blocks);
|
||||
} else {
|
||||
for (int i = 0; i < j * num_radix_blocks; i++) {
|
||||
all_shifted_buffer->degrees[i] = message_modulus - 1;
|
||||
}
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<T, params>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, all_shifted_buffer, bsks,
|
||||
streams, gpu_indexes, gpu_count, lwe_array, all_shifted_buffer, true, bsks,
|
||||
ksks, ms_noise_reduction_key, mem->sum_ciphertexts_vec_mem,
|
||||
num_radix_blocks, j, nullptr);
|
||||
num_radix_blocks, j);
|
||||
|
||||
auto scp_mem_ptr = mem->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
|
||||
@@ -261,6 +261,8 @@ void cuda_fourier_polynomial_mul(void *stream_v, uint32_t gpu_index,
|
||||
default:
|
||||
break;
|
||||
}
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
cuda_drop_async(buffer, stream, gpu_index);
|
||||
}
|
||||
|
||||
|
||||
@@ -279,6 +279,7 @@ void cuda_convert_lwe_programmable_bootstrap_key(cudaStream_t stream,
|
||||
PANIC("Cuda error (convert KSK): unsupported polynomial size. Supported "
|
||||
"N's are powers of two in the interval [256..16384].")
|
||||
}
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
cuda_drop_async(d_bsk, stream, gpu_index);
|
||||
cuda_drop_async(buffer, stream, gpu_index);
|
||||
@@ -315,6 +316,7 @@ void convert_u128_to_f128_and_forward_fft_128(cudaStream_t stream,
|
||||
// convert u128 into 4 x double
|
||||
batch_convert_u128_to_f128_strided_as_torus<params>
|
||||
<<<grid_size, block_size, 0, stream>>>(d_bsk, d_standard);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// call negacyclic 128 bit forward fft.
|
||||
if (full_sm) {
|
||||
@@ -326,6 +328,7 @@ void convert_u128_to_f128_and_forward_fft_128(cudaStream_t stream,
|
||||
<<<grid_size, block_size, shared_memory_size, stream>>>(d_bsk, d_bsk,
|
||||
buffer);
|
||||
}
|
||||
check_cuda_error(cudaGetLastError());
|
||||
cuda_drop_async(buffer, stream, gpu_index);
|
||||
}
|
||||
|
||||
|
||||
@@ -194,7 +194,8 @@ void execute_pbs_async(
|
||||
lut_indexes_vec[i] + (ptrdiff_t)(gpu_offset);
|
||||
|
||||
void *zeros = nullptr;
|
||||
if (ms_noise_reduction_key != nullptr)
|
||||
if (ms_noise_reduction_key != nullptr &&
|
||||
ms_noise_reduction_key->ptr != nullptr)
|
||||
zeros = ms_noise_reduction_key->ptr[i];
|
||||
cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
streams[i], gpu_indexes[i], current_lwe_array_out,
|
||||
|
||||
@@ -660,13 +660,15 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
(pbs_buffer<uint64_t, CLASSICAL> *)mem_ptr;
|
||||
|
||||
// If the parameters contain noise reduction key, then apply it
|
||||
if (ms_noise_reduction_key != nullptr) {
|
||||
if (ms_noise_reduction_key != nullptr &&
|
||||
ms_noise_reduction_key->ptr != nullptr) {
|
||||
if (ms_noise_reduction_key->num_zeros != 0) {
|
||||
uint32_t log_modulus = log2(polynomial_size) + 1;
|
||||
host_improve_noise_modulus_switch<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
buffer->temp_lwe_array_in,
|
||||
static_cast<uint64_t const *>(lwe_array_in),
|
||||
static_cast<uint64_t const *>(lwe_input_indexes),
|
||||
static_cast<uint64_t *>(ms_noise_reduction_ptr), lwe_dimension + 1,
|
||||
num_samples, ms_noise_reduction_key->num_zeros,
|
||||
ms_noise_reduction_key->ms_input_variance,
|
||||
@@ -846,4 +848,7 @@ template uint64_t scratch_cuda_programmable_bootstrap_tbc<uint64_t>(
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
|
||||
bool allocate_ms_array);
|
||||
template bool
|
||||
supports_distributed_shared_memory_on_classic_programmable_bootstrap<
|
||||
__uint128_t>(uint32_t polynomial_size, uint32_t max_shared_memory);
|
||||
#endif
|
||||
|
||||
@@ -256,6 +256,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_128(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<__uint128_t *>(buffer->temp_lwe_array_in),
|
||||
static_cast<__uint128_t const *>(lwe_array_in),
|
||||
static_cast<uint64_t const *>(buffer->trivial_indexes),
|
||||
static_cast<const __uint128_t *>(ms_noise_reduction_ptr),
|
||||
lwe_dimension + 1, num_samples, ms_noise_reduction_key->num_zeros,
|
||||
ms_noise_reduction_key->ms_input_variance,
|
||||
|
||||
@@ -398,20 +398,32 @@ uint64_t scratch_cuda_multi_bit_programmable_bootstrap_64(
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory) {
|
||||
|
||||
bool supports_cg =
|
||||
supports_cooperative_groups_on_multibit_programmable_bootstrap<uint64_t>(
|
||||
glwe_dimension, polynomial_size, level_count,
|
||||
input_lwe_ciphertext_count, cuda_get_max_shared_memory(gpu_index));
|
||||
#if (CUDA_ARCH >= 900)
|
||||
if (has_support_to_cuda_programmable_bootstrap_tbc_multi_bit<uint64_t>(
|
||||
// On H100s we should be using TBC until num_samples < num_sms / 2.
|
||||
// After that we switch to CG until not supported anymore.
|
||||
// At this point we return to TBC.
|
||||
int num_sms = 0;
|
||||
check_cuda_error(cudaDeviceGetAttribute(
|
||||
&num_sms, cudaDevAttrMultiProcessorCount, gpu_index));
|
||||
|
||||
bool supports_tbc =
|
||||
has_support_to_cuda_programmable_bootstrap_tbc_multi_bit<uint64_t>(
|
||||
input_lwe_ciphertext_count, glwe_dimension, polynomial_size,
|
||||
level_count, cuda_get_max_shared_memory(gpu_index)))
|
||||
level_count, cuda_get_max_shared_memory(gpu_index));
|
||||
|
||||
if (supports_tbc &&
|
||||
!(input_lwe_ciphertext_count > num_sms / 2 && supports_cg))
|
||||
return scratch_cuda_tbc_multi_bit_programmable_bootstrap<uint64_t>(
|
||||
stream, gpu_index, (pbs_buffer<uint64_t, MULTI_BIT> **)buffer,
|
||||
glwe_dimension, polynomial_size, level_count,
|
||||
input_lwe_ciphertext_count, allocate_gpu_memory);
|
||||
else
|
||||
#endif
|
||||
if (supports_cooperative_groups_on_multibit_programmable_bootstrap<
|
||||
uint64_t>(glwe_dimension, polynomial_size, level_count,
|
||||
input_lwe_ciphertext_count,
|
||||
cuda_get_max_shared_memory(gpu_index)))
|
||||
if (supports_cg)
|
||||
return scratch_cuda_cg_multi_bit_programmable_bootstrap<uint64_t>(
|
||||
stream, gpu_index, (pbs_buffer<uint64_t, MULTI_BIT> **)buffer,
|
||||
glwe_dimension, polynomial_size, level_count,
|
||||
|
||||
@@ -37,18 +37,21 @@ template <typename T> void print_debug(const char *name, const T *src, int N) {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void print_body_kernel(T *src, int N, int lwe_dimension) {
|
||||
__global__ void print_body_kernel(T *src, int N, int lwe_dimension, T delta) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
printf("%lu, ", src[i * (lwe_dimension + 1) + lwe_dimension]);
|
||||
T body = src[i * (lwe_dimension + 1) + lwe_dimension];
|
||||
T clear = body / delta;
|
||||
printf("(%lu, %lu), ", body, clear);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void print_body(const char *name, T *src, int n, int lwe_dimension) {
|
||||
void print_body(const char *name, T *src, int n, int lwe_dimension, T delta) {
|
||||
printf("%s: ", name);
|
||||
cudaDeviceSynchronize();
|
||||
print_body_kernel<<<1, 1>>>(src, n, lwe_dimension);
|
||||
print_body_kernel<<<1, 1>>>(src, n, lwe_dimension, delta);
|
||||
cudaDeviceSynchronize();
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
@@ -6,7 +6,8 @@
|
||||
std::mutex m;
|
||||
bool p2p_enabled = false;
|
||||
|
||||
int32_t cuda_setup_multi_gpu() {
|
||||
// Enable bidirectional p2p access between all available GPUs and device_0_id
|
||||
int32_t cuda_setup_multi_gpu(int device_0_id) {
|
||||
int num_gpus = cuda_get_number_of_gpus();
|
||||
if (num_gpus == 0)
|
||||
PANIC("GPU error: the number of GPUs should be > 0.")
|
||||
@@ -18,11 +19,13 @@ int32_t cuda_setup_multi_gpu() {
|
||||
omp_set_nested(1);
|
||||
int has_peer_access_to_device_0;
|
||||
for (int i = 1; i < num_gpus; i++) {
|
||||
check_cuda_error(
|
||||
cudaDeviceCanAccessPeer(&has_peer_access_to_device_0, i, 0));
|
||||
check_cuda_error(cudaDeviceCanAccessPeer(&has_peer_access_to_device_0,
|
||||
i, device_0_id));
|
||||
if (has_peer_access_to_device_0) {
|
||||
cuda_set_device(i);
|
||||
check_cuda_error(cudaDeviceEnablePeerAccess(0, 0));
|
||||
check_cuda_error(cudaDeviceEnablePeerAccess(device_0_id, 0));
|
||||
cuda_set_device(device_0_id);
|
||||
check_cuda_error(cudaDeviceEnablePeerAccess(i, 0));
|
||||
}
|
||||
num_used_gpus += 1;
|
||||
}
|
||||
|
||||
@@ -168,7 +168,7 @@ BENCHMARK_DEFINE_F(MultiBitBootstrap_u64, TbcMultiBit)
|
||||
(benchmark::State &st) {
|
||||
if (!has_support_to_cuda_programmable_bootstrap_tbc_multi_bit<uint64_t>(
|
||||
input_lwe_ciphertext_count, glwe_dimension, polynomial_size,
|
||||
pbs_level)) {
|
||||
pbs_level, cuda_get_max_shared_memory(0))) {
|
||||
st.SkipWithError("Configuration not supported for tbc operation");
|
||||
return;
|
||||
}
|
||||
@@ -256,7 +256,7 @@ BENCHMARK_DEFINE_F(ClassicalBootstrap_u64, TbcPBC)
|
||||
(benchmark::State &st) {
|
||||
if (!has_support_to_cuda_programmable_bootstrap_tbc<uint64_t>(
|
||||
input_lwe_ciphertext_count, glwe_dimension, polynomial_size,
|
||||
pbs_level)) {
|
||||
pbs_level, cuda_get_max_shared_memory(0))) {
|
||||
st.SkipWithError("Configuration not supported for tbc operation");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ public:
|
||||
number_of_inputs = (int)GetParam().number_of_inputs;
|
||||
|
||||
// Enable Multi-GPU logic
|
||||
gpu_count = cuda_setup_multi_gpu();
|
||||
gpu_count = cuda_setup_multi_gpu(0);
|
||||
active_gpu_count = std::min((uint)number_of_inputs, gpu_count);
|
||||
for (uint gpu_i = 0; gpu_i < active_gpu_count; gpu_i++) {
|
||||
streams.push_back(cuda_create_stream(gpu_i));
|
||||
|
||||
@@ -50,6 +50,7 @@ unsafe extern "C" {
|
||||
gpu_index: u32,
|
||||
lwe_array_out: *mut ffi::c_void,
|
||||
lwe_array_in: *const ffi::c_void,
|
||||
lwe_array_indexes: *const ffi::c_void,
|
||||
encrypted_zeros: *const ffi::c_void,
|
||||
lwe_size: u32,
|
||||
num_lwes: u32,
|
||||
@@ -1017,6 +1018,7 @@ unsafe extern "C" {
|
||||
gpu_count: u32,
|
||||
radix_lwe_out: *mut CudaRadixCiphertextFFI,
|
||||
radix_lwe_vec: *mut CudaRadixCiphertextFFI,
|
||||
reduce_degrees_for_single_carry_propagation: bool,
|
||||
mem_ptr: *mut i8,
|
||||
bsks: *const *mut ffi::c_void,
|
||||
ksks: *const *mut ffi::c_void,
|
||||
@@ -1315,6 +1317,22 @@ unsafe extern "C" {
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn trim_radix_blocks_lsb_64(
|
||||
output: *mut CudaRadixCiphertextFFI,
|
||||
input: *const CudaRadixCiphertextFFI,
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" {
|
||||
pub fn extend_radix_with_trivial_zero_blocks_msb_64(
|
||||
output: *mut CudaRadixCiphertextFFI,
|
||||
input: *const CudaRadixCiphertextFFI,
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
);
|
||||
}
|
||||
pub const KS_TYPE_BIG_TO_SMALL: KS_TYPE = 0;
|
||||
pub const KS_TYPE_SMALL_TO_BIG: KS_TYPE = 1;
|
||||
pub type KS_TYPE = ffi::c_uint;
|
||||
|
||||
@@ -101,6 +101,6 @@ extern "C" {
|
||||
|
||||
pub fn cuda_drop_async(ptr: *mut c_void, stream: *mut c_void, gpu_index: u32);
|
||||
|
||||
pub fn cuda_setup_multi_gpu() -> i32;
|
||||
pub fn cuda_setup_multi_gpu(gpu_index: u32) -> i32;
|
||||
|
||||
} // extern "C"
|
||||
|
||||
3
backends/tfhe-hpu-backend/.gitattributes
vendored
Normal file
3
backends/tfhe-hpu-backend/.gitattributes
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
*.xclbin filter=lfs diff=lfs merge=lfs -text
|
||||
*.pdi filter=lfs diff=lfs merge=lfs -text
|
||||
python/lib/example.json filter=lfs diff=lfs merge=lfs -text
|
||||
3
backends/tfhe-hpu-backend/.gitignore
vendored
Normal file
3
backends/tfhe-hpu-backend/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
ngt_*
|
||||
config
|
||||
kogge_cfg.toml
|
||||
88
backends/tfhe-hpu-backend/Cargo.toml
Normal file
88
backends/tfhe-hpu-backend/Cargo.toml
Normal file
@@ -0,0 +1,88 @@
|
||||
[package]
|
||||
name = "tfhe-hpu-backend"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
license = "BSD-3-Clause-Clear"
|
||||
description = "HPU implementation on FPGA of TFHE-rs primitives."
|
||||
homepage = "https://www.zama.ai/"
|
||||
documentation = "https://docs.zama.ai/tfhe-rs"
|
||||
repository = "https://github.com/zama-ai/tfhe-rs"
|
||||
readme = "README.md"
|
||||
keywords = ["encryption", "fhe", "cryptography", "hardware", "fpga"]
|
||||
|
||||
[features]
|
||||
hw-xrt = []
|
||||
hw-v80 = []
|
||||
io-dump = ["num-traits"]
|
||||
rtl_graph = ["dot2"]
|
||||
utils = ["clap", "clap-num", "bitvec", "serde_json"]
|
||||
|
||||
[build-dependencies]
|
||||
cxx-build = "1.0"
|
||||
|
||||
[dependencies]
|
||||
cxx = "1.0"
|
||||
hw_regmap = "0.1.0"
|
||||
|
||||
strum = { version = "0.26.2", features = ["derive"] }
|
||||
strum_macros = "0.26.2"
|
||||
enum_dispatch = "0.3.13"
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
toml = { version = "0.8", features = [] }
|
||||
paste = "1.0.15"
|
||||
thiserror = "1.0.61"
|
||||
bytemuck = "1.16.0"
|
||||
anyhow = "1.0.82"
|
||||
lazy_static = "1.4.0"
|
||||
rand = "0.8.5"
|
||||
regex = "1.10.4"
|
||||
bitflags = { version = "2.5.0", features = ["serde"] }
|
||||
itertools = "0.11.0"
|
||||
lru = "0.12.3"
|
||||
bitfield-struct = "0.10.0"
|
||||
crossbeam = { version = "0.8.4", features = ["crossbeam-queue"] }
|
||||
rayon = { workspace = true }
|
||||
|
||||
# Dependencies used for Sim feature
|
||||
ipc-channel = "0.18.3"
|
||||
|
||||
# Dependencies used for debug feature
|
||||
num-traits = { version = "0.2", optional = true }
|
||||
clap = { version = "4.4.4", features = ["derive"], optional = true }
|
||||
clap-num = { version = "1.1.1", optional = true }
|
||||
nix = { version = "0.29.0", features = ["ioctl", "uio"] }
|
||||
|
||||
# Dependencies used for rtl_graph features
|
||||
dot2 = { version = "1.0", optional = true }
|
||||
|
||||
bitvec = { version = "1.0", optional = true }
|
||||
serde_json = { version = "1.0", optional = true }
|
||||
|
||||
# Binary for manual debugging
|
||||
# Enable to access Hpu register and drive some custom sequence by hand
|
||||
[[bin]]
|
||||
name = "hputil"
|
||||
path = "src/utils/hputil.rs"
|
||||
required-features = ["utils"]
|
||||
|
||||
# Binary for asm manipulation
|
||||
# Enable to convert back and forth between asm/hex format
|
||||
[[bin]]
|
||||
name = "dop_fmt"
|
||||
path = "src/utils/dop_fmt.rs"
|
||||
required-features = ["utils"]
|
||||
|
||||
# Enable to convert back and forth between asm/hex format
|
||||
[[bin]]
|
||||
name = "iop_fmt"
|
||||
path = "src/utils/iop_fmt.rs"
|
||||
required-features = ["utils"]
|
||||
|
||||
# Firmware generation
|
||||
# Enable to expand IOp in list of Dop for inspection
|
||||
[[bin]]
|
||||
name = "fw"
|
||||
path = "src/utils/fw.rs"
|
||||
required-features = ["utils"]
|
||||
28
backends/tfhe-hpu-backend/LICENSE
Normal file
28
backends/tfhe-hpu-backend/LICENSE
Normal file
@@ -0,0 +1,28 @@
|
||||
BSD 3-Clause Clear License
|
||||
|
||||
Copyright © 2025 ZAMA.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or other
|
||||
materials provided with the distribution.
|
||||
|
||||
3. Neither the name of ZAMA nor the names of its contributors may be used to endorse
|
||||
or promote products derived from this software without specific prior written permission.
|
||||
|
||||
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE.
|
||||
THIS SOFTWARE IS PROVIDED BY THE ZAMA AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
ZAMA OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
|
||||
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user