mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-04-28 03:01:21 -04:00
Compare commits
1 Commits
feat/princ
...
dt/bench/t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ea9815be11 |
@@ -4,9 +4,6 @@ ignore = [
|
||||
"RUSTSEC-2024-0436",
|
||||
# Ignoring unmaintained 'bincode' crate. Getting rid of it would be too complex on the short term.
|
||||
"RUSTSEC-2025-0141",
|
||||
# Ignoring unsoundness in 'rand' with custom logger. Rand update is currently blocked by
|
||||
# arkworks and we do not use custom loggers.
|
||||
"RUSTSEC-2026-0097",
|
||||
]
|
||||
|
||||
[output]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Run data related tests
|
||||
name: aws_data_tests
|
||||
# Run backward compatibility tests
|
||||
name: aws_tfhe_backward_compat_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -30,8 +30,8 @@ permissions:
|
||||
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
|
||||
|
||||
jobs:
|
||||
data-tests:
|
||||
name: aws_data_tests/data-tests (bpr)
|
||||
backward-compat-tests:
|
||||
name: aws_tfhe_backward_compat_tests/backward-compat-tests (bpr)
|
||||
if: (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name != 'push'
|
||||
runs-on: "runs-on=${{ github.run_id }}/runner=cpu-small"
|
||||
@@ -49,24 +49,22 @@ jobs:
|
||||
- name: Get LFS data sha
|
||||
id: hash-lfs-data
|
||||
run: |
|
||||
SHA=$(git lfs ls-files -l -I utils/tfhe-backward-compat-data,tests/corrupted_inputs_deserialization | sha256sum | cut -d' ' -f1)
|
||||
SHA=$(git lfs ls-files -l -I utils/tfhe-backward-compat-data | sha256sum | cut -d' ' -f1)
|
||||
echo "sha=${SHA}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Retrieve data from cache
|
||||
id: retrieve-data-cache
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
with:
|
||||
path: |
|
||||
utils/tfhe-backward-compat-data/**/*.cbor
|
||||
utils/tfhe-backward-compat-data/**/*.bcode
|
||||
tests/corrupted_inputs_deserialization/**/*.bcode
|
||||
key: ${{ steps.hash-lfs-data.outputs.sha }}
|
||||
|
||||
- name: Pull test data
|
||||
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make pull_backward_compat_data
|
||||
make pull_corrupted_inputs_data
|
||||
|
||||
# Pull token was stored by action/checkout to be used by lfs, we don't need it anymore
|
||||
- name: Remove git credentials
|
||||
@@ -82,19 +80,14 @@ jobs:
|
||||
run: |
|
||||
make test_backward_compatibility_ci
|
||||
|
||||
- name: Run corrupted inputs deserialization tests
|
||||
run: |
|
||||
make test_corrupted_inputs_ci
|
||||
|
||||
- name: Store data in cache
|
||||
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
|
||||
continue-on-error: true
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
with:
|
||||
path: |
|
||||
utils/tfhe-backward-compat-data/**/*.cbor
|
||||
utils/tfhe-backward-compat-data/**/*.bcode
|
||||
tests/corrupted_inputs_deserialization/**/*.bcode
|
||||
key: ${{ steps.hash-lfs-data.outputs.sha }}
|
||||
|
||||
- name: Set pull-request URL
|
||||
19
.github/workflows/aws_tfhe_fast_tests.yml
vendored
19
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -16,6 +16,7 @@ env:
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
@@ -36,7 +37,6 @@ jobs:
|
||||
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
|
||||
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
|
||||
versionable_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.versionable_any_changed }}
|
||||
safe_serialize_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.safe_serialize_any_changed }}
|
||||
core_crypto_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.core_crypto_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: "false"
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
@@ -79,7 +79,6 @@ jobs:
|
||||
- tfhe-zk-pok/**
|
||||
- utils/tfhe-versionable/**
|
||||
- utils/tfhe-versionable-derive/**
|
||||
- utils/tfhe-safe-serialize/**
|
||||
csprng:
|
||||
- tfhe-csprng/**
|
||||
zk_pok:
|
||||
@@ -87,8 +86,6 @@ jobs:
|
||||
versionable:
|
||||
- utils/tfhe-versionable/**
|
||||
- utils/tfhe-versionable-derive/**
|
||||
safe_serialize:
|
||||
- utils/tfhe-safe-serialize/**
|
||||
core_crypto:
|
||||
- tfhe/src/core_crypto/**
|
||||
boolean:
|
||||
@@ -125,7 +122,6 @@ jobs:
|
||||
steps.changed-files.outputs.csprng_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.versionable_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.safe_serialize_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.core_crypto_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.boolean_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.shortint_any_changed == 'true' ||
|
||||
@@ -149,7 +145,7 @@ jobs:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
@@ -174,11 +170,6 @@ jobs:
|
||||
run: |
|
||||
make test_versionable
|
||||
|
||||
- name: Run tfhe-safe-serialize tests
|
||||
if: needs.should-run.outputs.safe_serialize_test == 'true'
|
||||
run: |
|
||||
make test_safe_serialize
|
||||
|
||||
- name: Run core tests
|
||||
if: needs.should-run.outputs.core_crypto_test == 'true'
|
||||
run: |
|
||||
@@ -200,7 +191,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -213,7 +204,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
|
||||
4
.github/workflows/aws_tfhe_noise_checks.yml
vendored
4
.github/workflows/aws_tfhe_noise_checks.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
30
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
30
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -14,11 +14,12 @@ env:
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
types: [ labeled ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -31,16 +32,16 @@ jobs:
|
||||
if: github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read # Needed to check for file change
|
||||
pull-requests: read # Needed to check for file change
|
||||
outputs:
|
||||
wasm_test: ${{ github.event_name == 'workflow_dispatch' ||
|
||||
steps.changed-files.outputs.wasm_any_changed }}
|
||||
steps.changed-files.outputs.wasm_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: "false"
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
@@ -62,7 +63,6 @@ jobs:
|
||||
- tfhe/js_on_wasm_tests/**
|
||||
- tfhe/web_wasm_parallel_tests/**
|
||||
- utils/tfhe-versionable/**
|
||||
- utils/tfhe-safe-serialize/**
|
||||
- .github/workflows/aws_tfhe_wasm_tests.yml
|
||||
|
||||
wasm-tests:
|
||||
@@ -78,7 +78,7 @@ jobs:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
@@ -92,7 +92,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -128,21 +128,15 @@ jobs:
|
||||
run: |
|
||||
make test_nodejs_wasm_api_ci
|
||||
|
||||
- name: Run parallel wasm tests
|
||||
run: |
|
||||
make test_web_js_api_parallel_chrome_ci
|
||||
|
||||
- name: Run wasm_par_mq tests
|
||||
run: |
|
||||
make test_wasm_par_mq_chrome_ci
|
||||
make test_wasm_par_mq_firefox_ci
|
||||
|
||||
- name: Run parallel wasm tests
|
||||
run: |
|
||||
make test_web_js_api_parallel_chrome_ci
|
||||
make test_web_js_api_parallel_firefox_ci
|
||||
|
||||
- name: Run cross origin wasm tests
|
||||
run: |
|
||||
make test_web_js_api_cross_origin_chrome_ci
|
||||
make test_web_js_api_cross_origin_firefox_ci
|
||||
|
||||
- name: Run x86_64/wasm zk compatibility tests
|
||||
run: |
|
||||
make test_zk_wasm_x86_compat_ci
|
||||
|
||||
@@ -5,9 +5,8 @@ name: backward_compat_pr_change_report
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
paths:
|
||||
- 'utils/tfhe-lints/snapshots/lint_enum_snapshots_*.json'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -17,35 +16,9 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: backward_compat_pr_change_report/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read # Needed to check for file change
|
||||
outputs:
|
||||
backward_report: ${{ steps.changed-files.outputs.backward_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
backward:
|
||||
- utils/tfhe-lints/snapshots/*.json
|
||||
|
||||
change-report:
|
||||
name: backward_compat_pr_change_report/change-report (bpr)
|
||||
runs-on: ubuntu-latest
|
||||
needs: should-run
|
||||
if:
|
||||
needs.should-run.outputs.backward_report == 'true'
|
||||
permissions:
|
||||
pull-requests: write # To send and modify message in the PR
|
||||
steps:
|
||||
@@ -79,11 +52,18 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Post/refresh backward-compat report
|
||||
if: steps.report.outputs.has_report == 'true'
|
||||
uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0
|
||||
- name: Find existing comment
|
||||
id: find-comment
|
||||
uses: peter-evans/find-comment@3eae4d37986fb5a8592848f6a574fdf654e61f9e # v3.1.0
|
||||
with:
|
||||
header: backward-compat-snapshot
|
||||
hide_and_recreate: true
|
||||
hide_classify: OUTDATED
|
||||
path: report.md
|
||||
issue-number: ${{ github.event.pull_request.number }}
|
||||
body-includes: '**Backward-compat snapshot:'
|
||||
|
||||
- name: Comment on PR
|
||||
if: steps.report.outputs.has_report == 'true'
|
||||
uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5.0.0
|
||||
with:
|
||||
comment-id: ${{ steps.find-comment.outputs.comment-id }}
|
||||
issue-number: ${{ github.event.pull_request.number }}
|
||||
body-path: report.md
|
||||
edit-mode: replace
|
||||
|
||||
7
.github/workflows/benchmark_cpu.yml
vendored
7
.github/workflows/benchmark_cpu.yml
vendored
@@ -14,12 +14,11 @@ on:
|
||||
- signed_integer
|
||||
- integer_compression
|
||||
- integer_zk
|
||||
- msm_zk
|
||||
- shortint
|
||||
- shortint_oprf
|
||||
- hlapi_unsigned
|
||||
- hlapi_signed
|
||||
- hlapi_erc7984
|
||||
- hlapi_erc20
|
||||
- hlapi_dex
|
||||
- hlapi_noise_squash
|
||||
- hlapi_kvstore
|
||||
@@ -93,8 +92,8 @@ jobs:
|
||||
|
||||
if inputs_command == "integer_zk":
|
||||
files_to_parse.append("pke_zk_crs_sizes.csv")
|
||||
elif inputs_command == "hlapi_erc7984":
|
||||
files_to_parse.append("erc7984_pbs_count.csv")
|
||||
elif inputs_command == "hlapi_erc20":
|
||||
files_to_parse.append("erc20_pbs_count.csv")
|
||||
elif inputs_command == "hlapi_dex":
|
||||
files_to_parse.extend(
|
||||
[
|
||||
|
||||
10
.github/workflows/benchmark_cpu_common.yml
vendored
10
.github/workflows/benchmark_cpu_common.yml
vendored
@@ -107,7 +107,7 @@ jobs:
|
||||
]:
|
||||
f.write(f"""{env_name}=["{'", "'.join(values_to_join)}"]\n""")
|
||||
|
||||
- name: Set matrix arguments outputs
|
||||
- name: Set martix arguments outputs
|
||||
id: set_matrix_args
|
||||
run: | # zizmor: ignore[template-injection] these env variable are safe
|
||||
{
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -185,7 +185,7 @@ jobs:
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
|
||||
--database tfhe_rs \
|
||||
--hardware "hpc7a.96xlarge" \
|
||||
--hardware "hpc8a.96xlarge" \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch "${REF_NAME}" \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
@@ -223,7 +223,7 @@ jobs:
|
||||
results_type: ${{ inputs.additional_results_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -261,7 +261,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_cpu_weekly.yml
vendored
8
.github/workflows/benchmark_cpu_weekly.yml
vendored
@@ -108,14 +108,14 @@ jobs:
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-hlapi-erc7984:
|
||||
name: benchmark_cpu_weekly/run-benchmarks-hlapi-erc7984
|
||||
run-benchmarks-hlapi-erc20:
|
||||
name: benchmark_cpu_weekly/run-benchmarks-hlapi-erc20
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
with:
|
||||
command: hlapi_erc7984
|
||||
additional_file_to_parse: erc7984_pbs_count.csv
|
||||
command: hlapi_erc20
|
||||
additional_file_to_parse: erc20_pbs_count.csv
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
|
||||
6
.github/workflows/benchmark_ct_key_sizes.yml
vendored
6
.github/workflows/benchmark_ct_key_sizes.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_ct_key_sizes
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
39
.github/workflows/benchmark_documentation.yml
vendored
39
.github/workflows/benchmark_documentation.yml
vendored
@@ -17,10 +17,6 @@ on:
|
||||
description: "Run GPU core-crypto benchmarks"
|
||||
type: boolean
|
||||
default: true
|
||||
run-gpu-zk-benchmarks:
|
||||
description: "Run GPU ZK benchmarks"
|
||||
type: boolean
|
||||
default: true
|
||||
run-hpu-benchmarks:
|
||||
description: "Run HPU benchmarks"
|
||||
type: boolean
|
||||
@@ -40,7 +36,7 @@ jobs:
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
if: inputs.run-cpu-benchmarks
|
||||
with:
|
||||
command: integer,hlapi_erc7984
|
||||
command: integer,hlapi_erc20
|
||||
op_flavor: fast_default
|
||||
bench_type: both
|
||||
precisions_set: documentation
|
||||
@@ -95,7 +91,7 @@ jobs:
|
||||
with:
|
||||
profile: multi-h100-sxm5
|
||||
hardware_name: n3-H100-SXM5x8
|
||||
command: integer_multi_bit,hlapi_erc7984
|
||||
command: integer_multi_bit,hlapi_erc20
|
||||
op_flavor: fast_default
|
||||
bench_type: both
|
||||
precisions_set: documentation
|
||||
@@ -114,7 +110,7 @@ jobs:
|
||||
uses: ./.github/workflows/benchmark_hpu_common.yml
|
||||
if: inputs.run-hpu-benchmarks
|
||||
with:
|
||||
command: integer,hlapi_erc7984
|
||||
command: integer,hlapi_erc20
|
||||
op_flavor: default
|
||||
bench_type: both
|
||||
precisions_set: documentation
|
||||
@@ -169,42 +165,21 @@ jobs:
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
run-benchmarks-gpu-zk-server:
|
||||
name: benchmark_documentation/run-benchmarks-gpu-zk-server
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
if: inputs.run-gpu-zk-benchmarks
|
||||
with:
|
||||
profile: multi-h100-sxm5
|
||||
hardware_name: n3-H100-SXM5x8
|
||||
command: integer_zk
|
||||
op_flavor: default
|
||||
bench_type: both
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
|
||||
generate-svgs-with-benchmarks-run:
|
||||
name: benchmark-documentation/generate-svgs-with-benchmarks-run
|
||||
if: ${{ always() &&
|
||||
(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-gpu-zk-benchmarks || inputs.run-hpu-benchmarks) &&
|
||||
(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks ||inputs.run-hpu-benchmarks) &&
|
||||
inputs.generate-svgs }}
|
||||
needs: [
|
||||
run-benchmarks-cpu-integer, run-benchmarks-gpu-integer, run-benchmarks-hpu-integer,
|
||||
run-benchmarks-cpu-zk-server, run-benchmarks-cpu-zk-client,
|
||||
run-benchmarks-cpu-core-crypto, run-benchmarks-gpu-core-crypto,
|
||||
run-benchmarks-gpu-zk-server
|
||||
run-benchmarks-cpu-core-crypto, run-benchmarks-gpu-core-crypto
|
||||
]
|
||||
uses: ./.github/workflows/generate_svgs.yml
|
||||
with:
|
||||
time_span_days: 5
|
||||
generate-cpu-svgs: ${{ inputs.run-cpu-benchmarks }}
|
||||
generate-gpu-svgs: ${{ inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-gpu-zk-benchmarks }}
|
||||
generate-gpu-svgs: ${{ inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks }}
|
||||
generate-hpu-svgs: ${{ inputs.run-hpu-benchmarks }}
|
||||
secrets:
|
||||
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
|
||||
@@ -213,7 +188,7 @@ jobs:
|
||||
|
||||
generate-svgs-without-benchmarks-run:
|
||||
name: benchmark-documentation/generate-svgs-without-benchmarks-run
|
||||
if: ${{ !(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-gpu-zk-benchmarks || inputs.run-hpu-benchmarks) &&
|
||||
if: ${{ !(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-hpu-benchmarks) &&
|
||||
inputs.generate-svgs }}
|
||||
uses: ./.github/workflows/generate_svgs.yml
|
||||
with:
|
||||
|
||||
9
.github/workflows/benchmark_gpu.yml
vendored
9
.github/workflows/benchmark_gpu.yml
vendored
@@ -31,13 +31,10 @@ on:
|
||||
- pbs128
|
||||
- ks
|
||||
- ks_pbs
|
||||
- tfhe_zk_pok
|
||||
- msm_zk
|
||||
- integer_zk
|
||||
- integer_zk_experimental
|
||||
- integer_aes
|
||||
- integer_aes256
|
||||
- hlapi_erc7984
|
||||
- hlapi_erc20
|
||||
- hlapi_dex
|
||||
- hlapi_noise_squash
|
||||
op_flavor:
|
||||
@@ -123,8 +120,8 @@ jobs:
|
||||
|
||||
if inputs_command == "integer_zk":
|
||||
files_to_parse.append("pke_zk_crs_sizes.csv")
|
||||
elif inputs_command == "hlapi_erc7984":
|
||||
files_to_parse.append("erc7984_pbs_count.csv")
|
||||
elif inputs_command == "hlapi_erc20":
|
||||
files_to_parse.append("erc20_pbs_count.csv")
|
||||
elif inputs_command == "hlapi_dex":
|
||||
files_to_parse.extend(
|
||||
[
|
||||
|
||||
4
.github/workflows/benchmark_gpu_4090.yml
vendored
4
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -89,7 +89,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_multi_bit_gpu_default
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -173,7 +173,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
42
.github/workflows/benchmark_gpu_common.yml
vendored
42
.github/workflows/benchmark_gpu_common.yml
vendored
@@ -111,7 +111,7 @@ jobs:
|
||||
]:
|
||||
f.write(f"""{env_name}=["{'", "'.join(values_to_join)}"]\n""")
|
||||
|
||||
- name: Set matrix arguments outputs
|
||||
- name: Set martix arguments outputs
|
||||
id: set_matrix_args
|
||||
run: | # zizmor: ignore[template-injection] these env variable are safe
|
||||
{
|
||||
@@ -126,11 +126,17 @@ jobs:
|
||||
needs: prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -139,6 +145,25 @@ jobs:
|
||||
backend: ${{ inputs.backend }}
|
||||
profile: ${{ inputs.profile }}
|
||||
|
||||
- name: Acknowledge remote instance failure
|
||||
if: steps.start-remote-instance.outcome == 'failure' &&
|
||||
inputs.profile != 'single-h100'
|
||||
run: |
|
||||
echo "Remote instance instance has failed to start (profile provided: '${INPUTS_PROFILE}')"
|
||||
echo "Permanent instance instance cannot be used as a substitute (profile needed: 'single-h100')"
|
||||
exit 1
|
||||
env:
|
||||
INPUTS_PROFILE: ${{ inputs.profile }}
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' &&
|
||||
steps.start-remote-instance.outcome == 'failure' &&
|
||||
inputs.profile == 'single-h100'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# Install dependencies only once since cuda-benchmarks uses a matrix strategy, thus running multiple times.
|
||||
install-dependencies:
|
||||
name: benchmark_gpu_common/install-dependencies
|
||||
@@ -159,6 +184,7 @@ jobs:
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
@@ -270,7 +296,7 @@ jobs:
|
||||
filenames: ${{ inputs.additional_file_to_parse }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -307,13 +333,13 @@ jobs:
|
||||
|
||||
teardown-instance:
|
||||
name: benchmark_gpu_common/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
22
.github/workflows/benchmark_gpu_coprocessor.yml
vendored
22
.github/workflows/benchmark_gpu_coprocessor.yml
vendored
@@ -42,7 +42,7 @@ env:
|
||||
OPTIMIZATION_TARGET: "throughput"
|
||||
BATCH_SIZE: "5000"
|
||||
SCHEDULING_POLICY: "MAX_PARALLELISM"
|
||||
BENCHMARKS: "erc7984"
|
||||
BENCHMARKS: "erc20"
|
||||
BRANCH_NAME: ${{ github.ref_name }}
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
SLAB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
if [[ ${IS_MANUAL_RUN} == true ]]; then
|
||||
PROFILE_RAW="${PROFILE_MANUAL_RUN}"
|
||||
else
|
||||
PROFILE_RAW="${PROFILE_SCHEDULED_RUN}"
|
||||
PROFILE_RAW="${PROFILE}"
|
||||
fi
|
||||
# shellcheck disable=SC2001
|
||||
PROFILE_VAL=$(echo "${PROFILE_RAW}" | sed 's|.*[[:space:]](\(.*\))|\1|')
|
||||
@@ -94,7 +94,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -204,7 +204,7 @@ jobs:
|
||||
uses: foundry-rs/foundry-toolchain@8789b3e21e6c11b2697f5eb56eddae542f746c10
|
||||
|
||||
- name: Cache cargo
|
||||
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
|
||||
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
@@ -214,14 +214,14 @@ jobs:
|
||||
restore-keys: ${{ runner.os }}-cargo-
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Login to Chainguard Registry
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
|
||||
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
|
||||
with:
|
||||
registry: cgr.dev
|
||||
username: ${{ secrets.CGR_USERNAME }}
|
||||
@@ -248,13 +248,13 @@ jobs:
|
||||
npm install && npm run deploy:emptyProxies && npx hardhat compile
|
||||
working-directory: fhevm/
|
||||
|
||||
- name: Profile erc7984 no-cmux benchmark on GPU
|
||||
- name: Profile erc20 no-cmux benchmark on GPU
|
||||
run: |
|
||||
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" \
|
||||
FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" \
|
||||
BENCHMARK_TYPE="THROUGHPUT_200" \
|
||||
OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" \
|
||||
make -e "profile_erc7984_gpu"
|
||||
make -e "profile_erc20_gpu"
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Get nsys profile name
|
||||
@@ -271,7 +271,7 @@ jobs:
|
||||
- name: Upload profile artifact
|
||||
env:
|
||||
REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ env.REPORT_NAME }}
|
||||
path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}
|
||||
@@ -302,7 +302,7 @@ jobs:
|
||||
working-directory: fhevm/
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
|
||||
path: fhevm/$${{ env.RESULTS_FILENAME }}
|
||||
@@ -333,7 +333,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/benchmark_hpu.yml
vendored
2
.github/workflows/benchmark_hpu.yml
vendored
@@ -14,7 +14,7 @@ on:
|
||||
- integer
|
||||
- hlapi_unsigned
|
||||
- hlapi_signed
|
||||
- hlapi_erc7984
|
||||
- hlapi_erc20
|
||||
op_flavor:
|
||||
description: "Operations set to run"
|
||||
type: choice
|
||||
|
||||
6
.github/workflows/benchmark_hpu_common.yml
vendored
6
.github/workflows/benchmark_hpu_common.yml
vendored
@@ -95,7 +95,7 @@ jobs:
|
||||
]:
|
||||
f.write(f"""{env_name}=["{'", "'.join(values_to_join)}"]\n""")
|
||||
|
||||
- name: Set matrix arguments outputs
|
||||
- name: Set martix arguments outputs
|
||||
id: set_matrix_args
|
||||
run: | # zizmor: ignore[template-injection] these env variable are safe
|
||||
{
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
steps:
|
||||
# Needed as long as hw_regmap repository is private
|
||||
- name: Configure SSH
|
||||
uses: webfactory/ssh-agent@e83874834305fe9a4a2997156cb26c5de65a8555 # v0.10.0
|
||||
uses: webfactory/ssh-agent@a6f90b1f127823b31d4d4a8d96047790581349bd # v0.9.1
|
||||
with:
|
||||
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
|
||||
@@ -185,7 +185,7 @@ jobs:
|
||||
BENCH_TYPE: ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.bench_type }}_${{ matrix.command }}_benchmarks
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -143,7 +143,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -280,7 +280,7 @@ jobs:
|
||||
BENCH_TYPE: ${{ env.__TFHE_RS_BENCH_TYPE }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_regression_${{ env.RESULTS_FILE_SHA }} # RESULT_FILE_SHA is needed to avoid collision between matrix.command runs
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -387,7 +387,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_tfhe_fft.yml
vendored
6
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_fft
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_tfhe_ntt.yml
vendored
6
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_ntt
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
with open(env_file, "a") as f:
|
||||
f.write(f"""BROWSER=["{'", "'.join(split_browser)}"]\n""")
|
||||
|
||||
- name: Set matrix arguments output
|
||||
- name: Set martix arguments output
|
||||
id: set_matrix_arg
|
||||
run: | # zizmor: ignore[template-injection] this env variable is safe
|
||||
echo "browser=${{ toJSON(env.BROWSER) }}" >> "${GITHUB_OUTPUT}"
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -124,7 +124,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -158,9 +158,9 @@ jobs:
|
||||
env:
|
||||
BROWSER: ${{ matrix.browser }}
|
||||
|
||||
- name: Run benchmarks (cross origin)
|
||||
- name: Run benchmarks (unsafe coop)
|
||||
run: |
|
||||
make bench_web_js_api_cross_origin_"${BROWSER}"_ci
|
||||
make bench_web_js_api_unsafe_coop_"${BROWSER}"_ci
|
||||
env:
|
||||
BROWSER: ${{ matrix.browser }}
|
||||
|
||||
@@ -180,7 +180,7 @@ jobs:
|
||||
REF_NAME: ${{ github.ref_name }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -218,7 +218,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/cargo_build_common.yml
vendored
6
.github/workflows/cargo_build_common.yml
vendored
@@ -94,7 +94,7 @@ jobs:
|
||||
with open(env_file, "a") as f:
|
||||
f.write(f"""RUNNERS=["{'", "'.join(runners)}"]\n""")
|
||||
|
||||
- name: Set matrix runners outputs
|
||||
- name: Set martix runners outputs
|
||||
id: set_matrix_runners
|
||||
run: | # zizmor: ignore[template-injection] these env variable are safe
|
||||
echo "runners=${{ toJSON(env.RUNNERS) }}" >> "${GITHUB_OUTPUT}"
|
||||
@@ -138,7 +138,7 @@ jobs:
|
||||
- name: Node cache restoration
|
||||
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2'
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -151,7 +151,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2' && steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
|
||||
4
.github/workflows/cargo_test_ntt.yml
vendored
4
.github/workflows/cargo_test_ntt.yml
vendored
@@ -63,7 +63,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/ci_lint.yml
vendored
4
.github/workflows/ci_lint.yml
vendored
@@ -43,14 +43,14 @@ jobs:
|
||||
echo "version=$(make zizmor_version)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Check workflows security
|
||||
uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
|
||||
uses: zizmorcore/zizmor-action@0dce2577a4760a2749d8cfb7a84b7d5585ebcb7d # v0.5.0
|
||||
with:
|
||||
advanced-security: 'false' # Print results directly in logs
|
||||
persona: pedantic
|
||||
version: ${{ steps.get_zizmor.outputs.version }}
|
||||
|
||||
- name: Ensure SHA pinned actions
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@ca46236c6ce584ae24bc6283ba8dcf4b3ec8a066 # v5.0.4
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@70c4af2ed5282c51ba40566d026d6647852ffa3e # v5.0.1
|
||||
with:
|
||||
allowlist: |
|
||||
slsa-framework/slsa-github-generator
|
||||
|
||||
4
.github/workflows/code_coverage.yml
vendored
4
.github/workflows/code_coverage.yml
vendored
@@ -74,7 +74,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -88,7 +88,7 @@ jobs:
|
||||
make test_integer_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2
|
||||
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
4
.github/workflows/generate_svg_common.yml
vendored
4
.github/workflows/generate_svg_common.yml
vendored
@@ -87,7 +87,7 @@ jobs:
|
||||
|
||||
- name: Upload tables
|
||||
if: inputs.backend_comparison == false
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ inputs.backend }}_${{ inputs.layer }}_subset_${{inputs.bench_subset}}_${{ inputs.pbs_kind }}_${{ inputs.bench_type }}_tables
|
||||
# This will upload all the file generated
|
||||
@@ -111,7 +111,7 @@ jobs:
|
||||
|
||||
- name: Upload comparison tables
|
||||
if: inputs.backend_comparison == true
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
|
||||
with:
|
||||
name: ${{ github.sha }}_backends_comparison_tables
|
||||
# This will upload all the file generated
|
||||
|
||||
76
.github/workflows/generate_svgs.yml
vendored
76
.github/workflows/generate_svgs.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
if: inputs.generate-cpu-svgs
|
||||
with:
|
||||
backend: cpu
|
||||
hardware_name: hpc7a.96xlarge
|
||||
hardware_name: hpc8a.96xlarge
|
||||
layer: integer
|
||||
pbs_kind: classical
|
||||
bench_type: latency
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
if: inputs.generate-cpu-svgs
|
||||
with:
|
||||
backend: cpu
|
||||
hardware_name: hpc7a.96xlarge
|
||||
hardware_name: hpc8a.96xlarge
|
||||
layer: integer
|
||||
pbs_kind: classical
|
||||
bench_type: throughput
|
||||
@@ -160,7 +160,7 @@ jobs:
|
||||
if: inputs.generate-cpu-svgs
|
||||
with:
|
||||
backend: cpu
|
||||
hardware_name: hpc7a.96xlarge
|
||||
hardware_name: hpc8a.96xlarge
|
||||
layer: integer
|
||||
bench_subset: zk
|
||||
pbs_kind: classical
|
||||
@@ -178,7 +178,7 @@ jobs:
|
||||
if: inputs.generate-cpu-svgs
|
||||
with:
|
||||
backend: cpu
|
||||
hardware_name: hpc7a.96xlarge
|
||||
hardware_name: hpc8a.96xlarge
|
||||
layer: integer
|
||||
bench_subset: zk
|
||||
pbs_kind: classical
|
||||
@@ -209,98 +209,60 @@ jobs:
|
||||
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
|
||||
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
|
||||
|
||||
gpu-zk-server-latency-table:
|
||||
name: generate_documentation_svgs/gpu-zk-server-latency-table
|
||||
uses: ./.github/workflows/generate_svg_common.yml
|
||||
if: inputs.generate-gpu-svgs
|
||||
with:
|
||||
backend: gpu
|
||||
hardware_name: n3-H100-SXM5x8
|
||||
layer: integer
|
||||
bench_subset: zk
|
||||
pbs_kind: multi_bit
|
||||
grouping_factor: 4
|
||||
bench_type: latency
|
||||
time_span_days: ${{ inputs.time_span_days }}
|
||||
output_filename: gpu-zk-benchmark-latency
|
||||
secrets:
|
||||
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
|
||||
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
|
||||
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
|
||||
|
||||
gpu-zk-server-throughput-table:
|
||||
name: generate_documentation_svgs/gpu-zk-server-throughput-table
|
||||
uses: ./.github/workflows/generate_svg_common.yml
|
||||
if: inputs.generate-gpu-svgs
|
||||
with:
|
||||
backend: gpu
|
||||
hardware_name: n3-H100-SXM5x8
|
||||
layer: integer
|
||||
bench_subset: zk
|
||||
pbs_kind: multi_bit
|
||||
grouping_factor: 4
|
||||
bench_type: throughput
|
||||
time_span_days: ${{ inputs.time_span_days }}
|
||||
output_filename: gpu-zk-benchmark-throughput
|
||||
secrets:
|
||||
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
|
||||
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
|
||||
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# ERC7984 benchmarks tables
|
||||
# ERC20 benchmarks tables
|
||||
# -----------------------------------------------------------
|
||||
|
||||
cpu-erc7984-latency-throughput-table:
|
||||
name: generate_documentation_svgs/cpu-erc7984-latency-throughput-table
|
||||
cpu-erc20-latency-throughput-table:
|
||||
name: generate_documentation_svgs/cpu-erc20-latency-throughput-table
|
||||
uses: ./.github/workflows/generate_svg_common.yml
|
||||
if: inputs.generate-cpu-svgs
|
||||
with:
|
||||
backend: cpu
|
||||
hardware_name: hpc7a.96xlarge
|
||||
hardware_name: hpc8a.96xlarge
|
||||
layer: hlapi
|
||||
bench_subset: erc7984
|
||||
bench_subset: erc20
|
||||
pbs_kind: classical
|
||||
bench_type: both
|
||||
time_span_days: ${{ inputs.time_span_days }}
|
||||
output_filename: cpu-hlapi-erc7984-benchmark-latency-throughput
|
||||
output_filename: cpu-hlapi-erc20-benchmark-latency-throughput
|
||||
secrets:
|
||||
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
|
||||
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
|
||||
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
|
||||
|
||||
gpu-erc7984-latency-throughput-table:
|
||||
name: generate_documentation_svgs/gpu-erc7984-latency-throughput-table
|
||||
gpu-erc20-latency-throughput-table:
|
||||
name: generate_documentation_svgs/gpu-erc20-latency-throughput-table
|
||||
uses: ./.github/workflows/generate_svg_common.yml
|
||||
if: inputs.generate-gpu-svgs
|
||||
with:
|
||||
backend: gpu
|
||||
hardware_name: n3-H100-SXM5x8
|
||||
layer: hlapi
|
||||
bench_subset: erc7984
|
||||
bench_subset: erc20
|
||||
pbs_kind: multi_bit
|
||||
grouping_factor: 4
|
||||
bench_type: both
|
||||
time_span_days: ${{ inputs.time_span_days }}
|
||||
output_filename: gpu-hlapi-erc7984-benchmark-h100x8-sxm5-latency-throughput
|
||||
output_filename: gpu-hlapi-erc20-benchmark-h100x8-sxm5-latency-throughput
|
||||
secrets:
|
||||
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
|
||||
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
|
||||
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
|
||||
|
||||
hpu-erc7984-latency-throughput-table:
|
||||
name: generate_documentation_svgs/hpu-erc7984-latency-throughput-table
|
||||
hpu-erc20-latency-throughput-table:
|
||||
name: generate_documentation_svgs/hpu-erc20-latency-throughput-table
|
||||
uses: ./.github/workflows/generate_svg_common.yml
|
||||
if: inputs.generate-hpu-svgs
|
||||
with:
|
||||
backend: hpu
|
||||
hardware_name: hpu_x1
|
||||
layer: hlapi
|
||||
bench_subset: erc7984
|
||||
bench_subset: erc20
|
||||
pbs_kind: classical
|
||||
bench_type: both
|
||||
time_span_days: ${{ inputs.time_span_days }}
|
||||
output_filename: hpu-hlapi-erc7984-benchmark-hpux1-latency-throughput.svg
|
||||
output_filename: hpu-hlapi-erc20-benchmark-hpux1-latency-throughput.svg
|
||||
secrets:
|
||||
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
|
||||
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
|
||||
@@ -316,7 +278,7 @@ jobs:
|
||||
if: inputs.generate-cpu-svgs
|
||||
with:
|
||||
backend: cpu
|
||||
hardware_name: hpc7a.96xlarge
|
||||
hardware_name: hpc8a.96xlarge
|
||||
layer: core_crypto
|
||||
pbs_kind: any
|
||||
grouping_factor: 4
|
||||
|
||||
@@ -43,7 +43,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -149,7 +149,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
32
.github/workflows/gpu_core_h100_tests.yml
vendored
32
.github/workflows/gpu_core_h100_tests.yml
vendored
@@ -23,7 +23,7 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled, opened, synchronize ]
|
||||
types: [ labeled ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -38,7 +38,6 @@ jobs:
|
||||
pull-requests: read # Needed to check for file change
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
@@ -63,24 +62,29 @@ jobs:
|
||||
- tfhe/src/integer/server_key/radix_parallel/tests_cases_unsigned.rs
|
||||
- tfhe/src/shortint/parameters/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- '.github/workflows/gpu_core_h100_tests.yml'
|
||||
core_crypto:
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
|
||||
setup-instance:
|
||||
name: gpu_core_h100_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -89,6 +93,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
@@ -121,6 +132,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
@@ -164,14 +176,14 @@ jobs:
|
||||
|
||||
teardown-instance:
|
||||
name: gpu_core_h100_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/gpu_fast_tests.yml
vendored
4
.github/workflows/gpu_fast_tests.yml
vendored
@@ -77,7 +77,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -182,7 +182,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
26
.github/workflows/gpu_full_h100_tests.yml
vendored
26
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -25,11 +25,17 @@ jobs:
|
||||
name: gpu_full_h100_tests/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -38,6 +44,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: gpu_full_h100_tests/cuda-tests-linux
|
||||
needs: [ setup-instance ]
|
||||
@@ -61,6 +74,7 @@ jobs:
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
@@ -104,13 +118,13 @@ jobs:
|
||||
|
||||
teardown-instance:
|
||||
name: gpu_full_h100_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -80,7 +80,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -186,7 +186,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
31
.github/workflows/gpu_hlapi_h100_tests.yml
vendored
31
.github/workflows/gpu_hlapi_h100_tests.yml
vendored
@@ -23,7 +23,7 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled, opened, synchronize ]
|
||||
types: [ labeled ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -38,7 +38,6 @@ jobs:
|
||||
pull-requests: read # Needed to check for file change
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
@@ -66,23 +65,27 @@ jobs:
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- '.github/workflows/gpu_hlapi_h100_tests.yml'
|
||||
core_crypto:
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
|
||||
setup-instance:
|
||||
name: gpu_hlapi_h100_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -91,6 +94,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
@@ -123,6 +133,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
@@ -173,14 +184,14 @@ jobs:
|
||||
|
||||
teardown-instance:
|
||||
name: gpu_hlapi_h100_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
43
.github/workflows/gpu_integer_long_run_tests.yml
vendored
43
.github/workflows/gpu_integer_long_run_tests.yml
vendored
@@ -17,8 +17,8 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly tests will be triggered every Monday at 8p.m.
|
||||
- cron: "0 20 * * 1"
|
||||
# Nightly tests will be triggered each evening 8p.m.
|
||||
- cron: "0 20 * * *"
|
||||
pull_request:
|
||||
|
||||
|
||||
@@ -28,48 +28,17 @@ permissions:
|
||||
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_integer_long_run_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read # Needed to check for file change
|
||||
outputs:
|
||||
is_needed_in_gpu_ci: ${{ env.IS_PR == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
- tfhe/build.rs
|
||||
- backends/tfhe-cuda-backend/**
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
- tfhe/src/integer/gpu/**
|
||||
- tfhe/src/shortint/parameters/**
|
||||
- '.github/workflows/gpu_integer_long_run_tests.yml'
|
||||
|
||||
setup-instance:
|
||||
name: gpu_integer_long_run_tests/setup-instance
|
||||
needs: [should-run]
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
needs.should-run.outputs.is_needed_in_gpu_ci == 'true'
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -143,7 +112,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/gpu_memory_sanitizer.yml
vendored
4
.github/workflows/gpu_memory_sanitizer.yml
vendored
@@ -74,7 +74,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -166,7 +166,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -166,7 +166,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
12
.github/workflows/gpu_pcc.yml
vendored
12
.github/workflows/gpu_pcc.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -131,10 +131,6 @@ jobs:
|
||||
env:
|
||||
GCC_VERSION: ${{ matrix.gcc }}
|
||||
|
||||
- name: Run semgrep and lint checks on CUDA code
|
||||
run: |
|
||||
make semgrep_and_lint_gpu_code
|
||||
|
||||
- name: Run fmt checks
|
||||
run: |
|
||||
make check_fmt_gpu
|
||||
@@ -143,6 +139,10 @@ jobs:
|
||||
run: |
|
||||
make pcc_gpu
|
||||
|
||||
- name: Run semgrep and lint checks on CUDA code
|
||||
run: |
|
||||
make semgrep_and_lint_gpu_code
|
||||
|
||||
- name: Run semver checks on tfhe-cuda-backend
|
||||
run: |
|
||||
make semver_check_cuda_backend
|
||||
@@ -176,7 +176,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -63,6 +63,7 @@ jobs:
|
||||
- tfhe/src/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- '.github/workflows/gpu_signed_integer_classic_tests.yml'
|
||||
- scripts/integer-tests.sh
|
||||
|
||||
@@ -79,7 +80,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -168,7 +169,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -23,7 +23,7 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled, opened, synchronize ]
|
||||
types: [ labeled ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -38,7 +38,6 @@ jobs:
|
||||
pull-requests: read # Needed to check for file change
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
@@ -64,25 +63,30 @@ jobs:
|
||||
- tfhe/src/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- '.github/workflows/gpu_signed_integer_h100_tests.yml'
|
||||
- scripts/integer-tests.sh
|
||||
core_crypto:
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
|
||||
setup-instance:
|
||||
name: gpu_signed_integer_h100_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -91,6 +95,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
@@ -123,6 +134,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
@@ -164,14 +176,14 @@ jobs:
|
||||
|
||||
teardown-instance:
|
||||
name: gpu_signed_integer_h100_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -64,6 +64,7 @@ jobs:
|
||||
- tfhe/src/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- '.github/workflows/gpu_signed_integer_tests.yml'
|
||||
- scripts/integer-tests.sh
|
||||
|
||||
@@ -80,7 +81,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -177,7 +178,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -63,6 +63,7 @@ jobs:
|
||||
- tfhe/src/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- '.github/workflows/gpu_unsigned_integer_classic_tests.yml'
|
||||
- scripts/integer-tests.sh
|
||||
|
||||
@@ -79,7 +80,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -168,7 +169,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -23,7 +23,7 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled, opened, synchronize ]
|
||||
types: [ labeled ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -38,7 +38,6 @@ jobs:
|
||||
pull-requests: read # Needed to check for file change
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
@@ -64,25 +63,30 @@ jobs:
|
||||
- tfhe/src/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- '.github/workflows/gpu_unsigned_integer_h100_tests.yml'
|
||||
- scripts/integer-tests.sh
|
||||
core_crypto:
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
|
||||
setup-instance:
|
||||
name: gpu_unsigned_integer_h100_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -91,6 +95,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
@@ -123,6 +134,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
@@ -164,14 +176,14 @@ jobs:
|
||||
|
||||
teardown-instance:
|
||||
name: gpu_unsigned_integer_h100_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -64,6 +64,7 @@ jobs:
|
||||
- tfhe/src/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- '.github/workflows/gpu_unsigned_integer_tests.yml'
|
||||
- scripts/integer-tests.sh
|
||||
|
||||
@@ -80,7 +81,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -177,7 +178,7 @@ jobs:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
13
.github/workflows/gpu_zk_tests.yml
vendored
13
.github/workflows/gpu_zk_tests.yml
vendored
@@ -51,13 +51,7 @@ jobs:
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
- tfhe/build.rs
|
||||
- backends/tfhe-cuda-backend/**
|
||||
- backends/zk-cuda-backend/**
|
||||
- tfhe/src/shortint/parameters/**
|
||||
- tfhe/src/zk/**
|
||||
- tfhe-zk-pok/**
|
||||
- '.github/workflows/gpu_zk_tests.yml'
|
||||
- ci/slab.toml
|
||||
|
||||
@@ -73,7 +67,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -132,9 +126,6 @@ jobs:
|
||||
- name: Run zk-cuda-backend integration tests
|
||||
run: |
|
||||
make test_zk_cuda_backend
|
||||
make test_zk_pok_experimental_gpu
|
||||
make test_integer_zk_gpu
|
||||
make test_integer_zk_experimental_gpu
|
||||
|
||||
slack-notify:
|
||||
name: gpu_zk_tests/slack-notify
|
||||
@@ -167,7 +158,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/make_release_common.yml
vendored
6
.github/workflows/make_release_common.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
||||
PACKAGE: ${{ inputs.package-name }}
|
||||
run: |
|
||||
cargo package -p "${PACKAGE}"
|
||||
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
with:
|
||||
name: crate-${{ inputs.package-name }}
|
||||
path: target/package/*.crate
|
||||
@@ -101,13 +101,13 @@ jobs:
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
|
||||
with:
|
||||
name: crate-${{ inputs.package-name }}
|
||||
path: target/package
|
||||
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@bbd81622f20ce9e2dd9622e3218b975523e45bbe # v1.0.4
|
||||
uses: rust-lang/crates-io-auth-action@b7e9a28eded4986ec6b1fa40eeee8f8f165559ec # v1.0.3
|
||||
id: auth
|
||||
|
||||
- name: Publish crate.io package
|
||||
|
||||
@@ -1,36 +1,12 @@
|
||||
# Common workflow to make crate release for CUDA backend
|
||||
name: make_release_common_cuda
|
||||
name: make_release_cuda
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
package-name:
|
||||
type: string
|
||||
required: true
|
||||
dry-run:
|
||||
dry_run:
|
||||
description: "Dry-run"
|
||||
type: boolean
|
||||
default: true
|
||||
secrets:
|
||||
REPO_CHECKOUT_TOKEN:
|
||||
required: true
|
||||
SLAB_ACTION_TOKEN:
|
||||
required: true
|
||||
SLAB_BASE_URL:
|
||||
required: true
|
||||
SLAB_URL:
|
||||
required: true
|
||||
JOB_SECRET:
|
||||
required: true
|
||||
SLACK_CHANNEL:
|
||||
required: true
|
||||
BOT_USERNAME:
|
||||
required: true
|
||||
SLACK_WEBHOOK:
|
||||
required: true
|
||||
ALLOWED_TEAM:
|
||||
required: true
|
||||
READ_ORG_TOKEN:
|
||||
required: true
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
@@ -45,15 +21,15 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
verify-triggering-actor:
|
||||
name: make_release_common_cuda/verify-triggering-actor
|
||||
name: make_release_cuda/verify-triggering-actor
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: ./.github/workflows/verify_triggering_actor.yml
|
||||
secrets:
|
||||
ALLOWED_TEAM: ${{ secrets.ALLOWED_TEAM }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
setup-instance:
|
||||
name: make_release_common_cuda/setup-instance
|
||||
name: make_release_cuda/setup-instance
|
||||
needs: verify-triggering-actor
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
@@ -61,7 +37,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -71,7 +47,7 @@ jobs:
|
||||
profile: gpu-build
|
||||
|
||||
package:
|
||||
name: make_release_common_cuda/package
|
||||
name: make_release_cuda/package
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
outputs:
|
||||
@@ -100,6 +76,7 @@ jobs:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
{
|
||||
@@ -112,6 +89,7 @@ jobs:
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
||||
@@ -123,14 +101,12 @@ jobs:
|
||||
GCC_VERSION: ${{ matrix.gcc }}
|
||||
|
||||
- name: Prepare package
|
||||
env:
|
||||
PACKAGE: ${{ inputs.package-name }}
|
||||
run: |
|
||||
cargo package -p "${PACKAGE}"
|
||||
cargo package -p tfhe-cuda-backend
|
||||
|
||||
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
with:
|
||||
name: crate-${{ inputs.package-name }}
|
||||
name: crate-tfhe-cuda-backend
|
||||
path: target/package/*.crate
|
||||
|
||||
- name: generate hash
|
||||
@@ -138,8 +114,8 @@ jobs:
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
name: make_release_common_cuda/provenance
|
||||
if: ${{ !inputs.dry-run }}
|
||||
name: make_release_cuda/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
# This action cannot be pinned to a specific commit (see https://github.com/slsa-framework/slsa-github-generator/blob/main/README.md#referencing-slsa-builders-and-generators)
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0 # zizmor: ignore[unpinned-uses] as said above SLSA cannot be pinned by tag today
|
||||
@@ -152,7 +128,7 @@ jobs:
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish-cuda-release:
|
||||
name: make_release_common_cuda/publish-cuda-release
|
||||
name: make_release_cuda/publish-cuda-release
|
||||
needs: [setup-instance, package] # for comparing hashes
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
permissions:
|
||||
@@ -174,6 +150,7 @@ jobs:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
{
|
||||
@@ -186,6 +163,7 @@ jobs:
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
|
||||
@@ -196,33 +174,25 @@ jobs:
|
||||
env:
|
||||
GCC_VERSION: ${{ matrix.gcc }}
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: "false"
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0
|
||||
with:
|
||||
name: crate-${{ inputs.package-name }}
|
||||
name: crate-tfhe-cuda-backend
|
||||
path: target/package
|
||||
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@bbd81622f20ce9e2dd9622e3218b975523e45bbe # v1.0.4
|
||||
uses: rust-lang/crates-io-auth-action@b7e9a28eded4986ec6b1fa40eeee8f8f165559ec # v1.0.3
|
||||
id: auth
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
PACKAGE: ${{ inputs.package-name }}
|
||||
DRY_RUN: ${{ inputs.dry-run && '--dry-run' || '' }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
|
||||
# would fail. This is safe since DRY_RUN is handled in the env section above.
|
||||
# shellcheck disable=SC2086
|
||||
cargo publish -p "${PACKAGE}" ${DRY_RUN}
|
||||
cargo publish -p tfhe-cuda-backend ${DRY_RUN}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
@@ -234,7 +204,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_MESSAGE: "SLSA ${{ inputs.package-name }} crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "SLSA tfhe-cuda-backend crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
|
||||
@@ -242,17 +212,17 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "${{ inputs.package-name }} release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: make_release_common_cuda/teardown-instance
|
||||
name: make_release_cuda/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [setup-instance, publish-cuda-release]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -262,7 +232,7 @@ jobs:
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (${{ inputs.package-name }} release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (publish-cuda-release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
21
.github/workflows/make_release_tfhe.yml
vendored
21
.github/workflows/make_release_tfhe.yml
vendored
@@ -16,10 +16,6 @@ on:
|
||||
description: "Push web js package"
|
||||
type: boolean
|
||||
default: true
|
||||
push_web_compat_package:
|
||||
description: "Push web compat (cross-origin) js package"
|
||||
type: boolean
|
||||
default: true
|
||||
push_node_package:
|
||||
description: "Push node js package"
|
||||
type: boolean
|
||||
@@ -103,23 +99,6 @@ jobs:
|
||||
tag: ${{ env.NPM_TAG }}
|
||||
provenance: true
|
||||
|
||||
- name: Build web compat (cross-origin) package
|
||||
if: ${{ inputs.push_web_compat_package }}
|
||||
run: |
|
||||
rm -rf tfhe/pkg
|
||||
|
||||
make build_web_js_api
|
||||
sed -i 's/"tfhe"/"tfhe-compat"/g' tfhe/pkg/package.json
|
||||
|
||||
- name: Publish web compat (cross-origin) package
|
||||
if: ${{ inputs.push_web_compat_package }}
|
||||
uses: JS-DevTools/npm-publish@0fd2f4369c5d6bcfcde6091a7c527d810b9b5c3f
|
||||
with:
|
||||
package: tfhe/pkg/package.json
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
tag: ${{ env.NPM_TAG }}
|
||||
provenance: true
|
||||
|
||||
- name: Build Node package
|
||||
if: ${{ inputs.push_node_package }}
|
||||
run: |
|
||||
|
||||
44
.github/workflows/make_release_tfhe_cuda.yml
vendored
44
.github/workflows/make_release_tfhe_cuda.yml
vendored
@@ -1,44 +0,0 @@
|
||||
# Publish new release of tfhe-rs CUDA backend on crates.io.
|
||||
name: make_release_tfhe_cuda
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: "Dry-run"
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
permissions: {}
|
||||
|
||||
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
|
||||
|
||||
jobs:
|
||||
make-release:
|
||||
name: make_release_tfhe_cuda/make-release
|
||||
uses: ./.github/workflows/make_release_common_cuda.yml
|
||||
with:
|
||||
package-name: "tfhe-cuda-backend"
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
permissions:
|
||||
actions: read # Needed to detect the GitHub Actions environment
|
||||
id-token: write # Needed to create the provenance via GitHub OIDC
|
||||
contents: write # Needed to upload assets/artifacts
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
@@ -1,32 +0,0 @@
|
||||
name: make_release_tfhe_safe_serialize
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: "Dry-run"
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
permissions: {}
|
||||
|
||||
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
|
||||
|
||||
jobs:
|
||||
make-release:
|
||||
name: make_release_tfhe_safe_serialize/make-release
|
||||
uses: ./.github/workflows/make_release_common.yml
|
||||
with:
|
||||
package-name: "tfhe-safe-serialize"
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
permissions:
|
||||
actions: read # Needed to detect the GitHub Actions environment
|
||||
id-token: write # Needed to create the provenance via GitHub OIDC
|
||||
contents: write # Needed to upload assets/artifacts
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
44
.github/workflows/make_release_zk_cuda.yml
vendored
44
.github/workflows/make_release_zk_cuda.yml
vendored
@@ -1,44 +0,0 @@
|
||||
# Publish new release of CUDA Zero-Knowledge primitives on crates.io.
|
||||
name: make_release_zk_cuda
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: "Dry-run"
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
permissions: {}
|
||||
|
||||
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
|
||||
|
||||
jobs:
|
||||
make-release:
|
||||
name: make_release_zk_cuda/make-release
|
||||
uses: ./.github/workflows/make_release_common_cuda.yml
|
||||
with:
|
||||
package-name: "zk-cuda-backend"
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
permissions:
|
||||
actions: read # Needed to detect the GitHub Actions environment
|
||||
id-token: write # Needed to create the provenance via GitHub OIDC
|
||||
contents: write # Needed to upload assets/artifacts
|
||||
secrets:
|
||||
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
|
||||
SLAB_URL: ${{ secrets.SLAB_URL }}
|
||||
JOB_SECRET: ${{ secrets.JOB_SECRET }}
|
||||
4
.github/workflows/parameters_check.yml
vendored
4
.github/workflows/parameters_check.yml
vendored
@@ -53,7 +53,7 @@ jobs:
|
||||
|
||||
- name: Restore Sagemath image from cache
|
||||
id: docker-cache
|
||||
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
with:
|
||||
path: /tmp/sagemath_image
|
||||
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}
|
||||
@@ -76,7 +76,7 @@ jobs:
|
||||
- name: Store Sagemath image in cache
|
||||
if: steps.docker-cache.outputs.cache-hit != 'true'
|
||||
continue-on-error: true
|
||||
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
|
||||
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
|
||||
with:
|
||||
path: /tmp/sagemath_image
|
||||
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}
|
||||
|
||||
47
.github/workflows/sync_on_push.yml
vendored
47
.github/workflows/sync_on_push.yml
vendored
@@ -24,8 +24,6 @@ jobs:
|
||||
SOURCE_REPO: "zama-ai/tfhe-rs"
|
||||
SOURCE_BRANCH: "main"
|
||||
DESTINATION_BRANCH: "main"
|
||||
SOURCE_TAGS: "refs/tags/*"
|
||||
DESTINATION_TAGS: "refs/tags/*"
|
||||
USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
TOKEN: ${{ secrets.SYNC_REPO_TOKEN }}
|
||||
DEST_REPO: ${{ secrets.SYNC_DEST_REPO }}
|
||||
@@ -35,16 +33,49 @@ jobs:
|
||||
git clone --quiet "https://${USERNAME}:${TOKEN}@github.com/${SOURCE_REPO}.git" ./tfhe-rs --origin source && cd ./tfhe-rs
|
||||
git remote add destination "https://${USERNAME}:${TOKEN}@github.com/${DEST_REPO}.git"
|
||||
|
||||
# The LFS config disables pulling files by default, so remove it
|
||||
# TODO: see if we need to more precisely fetch LFS files or if git is smart
|
||||
rm .lfsconfig
|
||||
|
||||
echo ">>> Fetching all branches references down locally so subsequent commands can see them..."
|
||||
git fetch --all --tags --update-head-ok --quiet
|
||||
git fetch source '+refs/heads/*:refs/heads/*' --update-head-ok
|
||||
|
||||
echo ">>> Sync LFS items from source..."
|
||||
./scripts/lfs_sync.sh source destination "${SOURCE_BRANCH}"
|
||||
echo ">>> Print out all branches"
|
||||
git --no-pager branch -a -vv
|
||||
|
||||
echo ">>> Pushing git changes for ${SOURCE_BRANCH}..."
|
||||
echo ">>> Pull LFS items from source..."
|
||||
git lfs pull source "${SOURCE_BRANCH}"
|
||||
|
||||
echo ">>> Pushing git changes and LFS content..."
|
||||
git push destination "${SOURCE_BRANCH}:${DESTINATION_BRANCH}" -f
|
||||
|
||||
echo ">>> Pushing git tags..."
|
||||
git push destination "${SOURCE_TAGS}:${DESTINATION_TAGS}" -f
|
||||
shred --remove .git/config
|
||||
|
||||
- name: git-sync-tags
|
||||
env:
|
||||
SOURCE_REPO: "zama-ai/tfhe-rs"
|
||||
SOURCE_BRANCH: "refs/tags/*"
|
||||
DESTINATION_BRANCH: "refs/tags/*"
|
||||
USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
TOKEN: ${{ secrets.SYNC_REPO_TOKEN }}
|
||||
DEST_REPO: ${{ secrets.SYNC_DEST_REPO }}
|
||||
run: |
|
||||
echo ">>> Cloning source repo..."
|
||||
git lfs install
|
||||
git clone --quiet "https://${USERNAME}:${TOKEN}@github.com/${SOURCE_REPO}.git" ./tfhe-rs-tag --origin source && cd ./tfhe-rs-tag
|
||||
git remote add destination "https://${USERNAME}:${TOKEN}@github.com/${DEST_REPO}.git"
|
||||
|
||||
# The LFS config disables pulling files by default, so remove it
|
||||
# TODO: see if we need to more precisely fetch LFS files for new tags or if git is smart
|
||||
rm .lfsconfig
|
||||
|
||||
echo ">>> Fetching all branches references down locally so subsequent commands can see them..."
|
||||
git fetch source '+refs/heads/*:refs/heads/*' --update-head-ok
|
||||
|
||||
echo ">>> Print out all branches"
|
||||
git --no-pager branch -a -vv
|
||||
|
||||
echo ">>> Pushing git changes and LFS content..."
|
||||
git push destination "${SOURCE_BRANCH}:${DESTINATION_BRANCH}" -f
|
||||
|
||||
shred --remove .git/config
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -25,7 +25,6 @@ dieharder_run.log
|
||||
|
||||
# Cuda local build
|
||||
backends/tfhe-cuda-backend/cuda/cmake-build-debug/
|
||||
backends/tfhe-cuda-backend/cuda/build/
|
||||
|
||||
# WASM tests
|
||||
tfhe/web_wasm_parallel_tests/server.PID
|
||||
@@ -35,9 +34,6 @@ node_modules/
|
||||
package-lock.json
|
||||
utils/wasm-par-mq/examples/*/pkg/
|
||||
|
||||
# Commit lock files of backward data generation crates
|
||||
!utils/tfhe-backward-compat-data/crates/generate_*/Cargo.lock
|
||||
|
||||
# Python .env
|
||||
.env
|
||||
__pycache__
|
||||
|
||||
@@ -12,7 +12,6 @@ ignore:
|
||||
- utils/tfhe-lints/**/main.stderr
|
||||
- utils/tfhe-lints/**/*.json
|
||||
- utils/tfhe-backward-compat-data/**/*.ron # ron files are autogenerated
|
||||
- tests/corrupted_inputs_deserialization/data/proven_compact_list/**/metadata.txt
|
||||
|
||||
rules:
|
||||
# checks if file ends in a newline character
|
||||
|
||||
@@ -14,12 +14,10 @@ members = [
|
||||
"tfhe-fft",
|
||||
"tfhe-ntt",
|
||||
"tfhe-zk-pok",
|
||||
"utils/benchmark_spec",
|
||||
"utils/param_dedup",
|
||||
"utils/tfhe-backward-compat-checker",
|
||||
"utils/tfhe-backward-compat-data",
|
||||
"utils/tfhe-backward-compat-data/crates/add_new_version",
|
||||
"utils/tfhe-safe-serialize",
|
||||
"utils/tfhe-versionable",
|
||||
"utils/tfhe-versionable-derive",
|
||||
"utils/wasm-par-mq",
|
||||
@@ -27,7 +25,7 @@ members = [
|
||||
"utils/wasm-par-mq/web_tests",
|
||||
]
|
||||
|
||||
exclude = ["utils/tfhe-lints", "apps/trivium", "apps/princev2"]
|
||||
exclude = ["utils/tfhe-lints", "apps/trivium"]
|
||||
|
||||
[workspace.package]
|
||||
rust-version = "1.91.1"
|
||||
@@ -45,7 +43,6 @@ rand = "0.8"
|
||||
rayon = "1.11"
|
||||
serde = { version = "1.0", default-features = false }
|
||||
wasm-bindgen = { version = "0.2.114" }
|
||||
wasm-bindgen-futures = { version = "0.4.56" }
|
||||
# js-sys (at this point in time) automatically enables the unsafe-eval feature which we do not want
|
||||
# this does not prevent other deps from enabling it, but it at least conveys our need to not have it
|
||||
# we still enable std, which was part of default before
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
BSD 3-Clause Clear License
|
||||
|
||||
Copyright © 2026 ZAMA.
|
||||
Copyright © 2025 ZAMA.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
|
||||
285
Makefile
285
Makefile
@@ -26,7 +26,6 @@ BENCH_CUSTOM_COMMAND:=
|
||||
NODE_VERSION=24.12
|
||||
BACKWARD_COMPAT_DATA_DIR=utils/tfhe-backward-compat-data
|
||||
BACKWARD_COMPAT_DATA_GEN_VERSION:=$(TFHE_VERSION)
|
||||
CORRUPTED_INPUTS_TEST=tests/corrupted_inputs_deserialization
|
||||
TEST_VECTORS_DIR=apps/test-vectors
|
||||
CURRENT_TFHE_VERSION:=$(shell grep '^version[[:space:]]*=' tfhe/Cargo.toml | cut -d '=' -f 2 | xargs)
|
||||
WASM_PACK_VERSION="0.13.1"
|
||||
@@ -122,12 +121,6 @@ install_build_wasm32_target:
|
||||
( echo "Unable to install wasm32-unknown-unknown target toolchain, check your rustup installation. \
|
||||
Rustup can be downloaded at https://rustup.rs/" && exit 1 )
|
||||
|
||||
.PHONY: install_check_wasm32_target # Install the wasm32 toolchain used for checks
|
||||
install_check_wasm32_target:
|
||||
rustup target add wasm32-unknown-unknown --toolchain "$(RS_CHECK_TOOLCHAIN)" || \
|
||||
( echo "Unable to install wasm32-unknown-unknown target toolchain, check your rustup installation. \
|
||||
Rustup can be downloaded at https://rustup.rs/" && exit 1 )
|
||||
|
||||
.PHONY: install_cargo_nextest # Install cargo nextest used for shortint tests
|
||||
install_cargo_nextest:
|
||||
@cargo nextest --version > /dev/null 2>&1 || \
|
||||
@@ -288,7 +281,6 @@ fmt_internal: install_rs_check_toolchain
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt $(FMT_CHECK)
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C utils/tfhe-lints fmt $(FMT_CHECK)
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C apps/trivium fmt $(FMT_CHECK)
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C apps/princev2 fmt $(FMT_CHECK)
|
||||
for crate in `ls -1 $(BACKWARD_COMPAT_DATA_DIR)/crates/ | grep generate_`; do \
|
||||
echo "fmt $$crate"; \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate fmt $(FMT_CHECK); \
|
||||
@@ -313,7 +305,7 @@ semgrep_and_lint_gpu_code: semgrep_lint_setup_venv
|
||||
find "$(TFHECUDA_SRC)" -name '*.h' -o -name '*.cuh' -o -name '*.cu' \
|
||||
| grep -v '/cmake-build-debug/' \
|
||||
| grep -v '/build/' \
|
||||
| xargs venv/bin/semgrep --error --config "$(TFHECUDA_SRC)/.semgrep/release-ordering.yaml" --scan-unknown-extensions
|
||||
| xargs venv/bin/semgrep --config "$(TFHECUDA_SRC)/.semgrep/release-ordering.yaml" --scan-unknown-extensions
|
||||
venv/bin/python3 "scripts/check_scratch_cleanup.py"
|
||||
|
||||
.PHONY: semver_check_cuda_backend # Run semver checks on tfhe-cuda-backend
|
||||
@@ -357,23 +349,23 @@ check_fmt_js: check_nvm_installed
|
||||
.PHONY: check_fmt_toml # Check TOML files format
|
||||
check_fmt_toml: install_taplo
|
||||
@RUST_LOG=warn taplo fmt --check || \
|
||||
{ echo "TOML files format check failed. Please run 'make fmt_toml'"; exit 1; }
|
||||
echo "TOML files format check failed. Please run 'make fmt_toml'"
|
||||
|
||||
.PHONY: check_typos # Check for typos in codebase
|
||||
check_typos: install_typos_checker
|
||||
@git ls-files ":!*.png" ":!*.cbor" ":!*.bcode" ":!*.ico" ":!*/twiddles.cu" | typos --file-list - && echo "No typos found"
|
||||
@typos && echo "No typos found"
|
||||
|
||||
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
|
||||
clippy_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,gpu-experimental-zk,pbs-stats,extended-types,zk-pok \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats,extended-types,zk-pok \
|
||||
--all-targets \
|
||||
-p tfhe -- --no-deps -D warnings
|
||||
|
||||
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
|
||||
check_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,gpu-experimental-zk,pbs-stats \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
|
||||
--all-targets \
|
||||
-p tfhe
|
||||
|
||||
@@ -387,7 +379,7 @@ clippy_hpu: install_rs_check_toolchain
|
||||
.PHONY: clippy_gpu_hpu # Run clippy lints on tfhe with "gpu" and "hpu" enabled
|
||||
clippy_gpu_hpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,gpu-experimental-zk,hpu,pbs-stats,extended-types,zk-pok \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,hpu,pbs-stats,extended-types,zk-pok \
|
||||
--all-targets \
|
||||
-p tfhe -- --no-deps -D warnings
|
||||
|
||||
@@ -480,7 +472,7 @@ clippy_rustdoc_gpu: install_rs_check_toolchain
|
||||
fi && \
|
||||
CARGO_TERM_QUIET=true CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
|
||||
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu,gpu-experimental-zk \
|
||||
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu \
|
||||
-p tfhe -- --nocapture
|
||||
|
||||
.PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
|
||||
@@ -491,17 +483,11 @@ clippy_c_api: install_rs_check_toolchain
|
||||
|
||||
.PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
|
||||
clippy_js_wasm_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,extended-types \
|
||||
-p tfhe -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,zk-pok,extended-types \
|
||||
-p tfhe -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,zk-pok,extended-types,parallel-wasm-api \
|
||||
-p tfhe -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,zk-pok,extended-types,cross-origin-wasm-api \
|
||||
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,extended-types \
|
||||
-p tfhe -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_tasks # Run clippy lints on helper tasks crate.
|
||||
@@ -514,15 +500,10 @@ clippy_trivium: install_rs_check_toolchain
|
||||
cd apps/trivium; RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-trivium -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_princev2 # Run clippy lints on PRINCEv2 app
|
||||
clippy_princev2: install_rs_check_toolchain
|
||||
cd apps/princev2; RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-princev2 -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_ws_tests # Run clippy on the workspace level tests
|
||||
clippy_ws_tests: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --tests \
|
||||
-p tests --features=shortint,integer,zk-pok,strings -- --no-deps -D warnings
|
||||
-p tests --features=shortint,integer,zk-pok -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
|
||||
clippy_all_targets: install_rs_check_toolchain
|
||||
@@ -547,15 +528,6 @@ clippy_zk_pok: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-zk-pok --features=experimental -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_zk_pok_wasm # Run clippy lints on tfhe-zk-pok for wasm32 target
|
||||
clippy_zk_pok_wasm: install_rs_check_toolchain install_check_wasm32_target
|
||||
RUSTFLAGS="$(WASM_RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--target wasm32-unknown-unknown \
|
||||
-p tfhe-zk-pok -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(WASM_RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--target wasm32-unknown-unknown \
|
||||
-p tfhe-zk-pok --features cross-origin-wasm -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_versionable # Run clippy lints on tfhe-versionable
|
||||
clippy_versionable: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
@@ -563,11 +535,6 @@ clippy_versionable: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-versionable -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_safe_serialize # Run clippy lints on tfhe-safe-serialize
|
||||
clippy_safe_serialize: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-safe-serialize -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_param_dedup # Run clippy lints on param_dedup tool
|
||||
clippy_param_dedup: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
@@ -593,28 +560,15 @@ clippy_backward_compat_data: install_rs_check_toolchain # the toolchain is selec
|
||||
echo "Cannot run clippy for backward compat crate on non x86 platform for now."; \
|
||||
fi
|
||||
|
||||
.PHONY: check_backward_compat_locks_did_not_change # Check backward compat Cargo.lock files are up to date
|
||||
check_backward_compat_locks_did_not_change: install_rs_check_toolchain
|
||||
@for crate in `ls -1 $(BACKWARD_COMPAT_DATA_DIR)/crates/ | grep generate_`; do \
|
||||
echo "checking Cargo.lock for $$crate"; \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options \
|
||||
-C $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate metadata --locked --format-version 1 > /dev/null || \
|
||||
( echo "Cargo.lock for $$crate is out of date. Update it with:" && \
|
||||
echo " cd $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate && cargo metadata --format-version 1 > /dev/null" && \
|
||||
echo "then commit the updated Cargo.lock." && exit 1 ); \
|
||||
done
|
||||
|
||||
.PHONY: clippy_test_vectors # Run clippy lints on the test vectors app
|
||||
clippy_test_vectors: install_rs_check_toolchain
|
||||
cd apps/test-vectors; RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-test-vectors -- --no-deps -D warnings
|
||||
|
||||
# WARNING: This target is not directly run in CI. When adding a subtarget here,
|
||||
# MAKE SURE TO ALSO ADD IT TO A PCC BATCH BELOW
|
||||
.PHONY: clippy_all # Run all clippy targets
|
||||
clippy_all: clippy_rustdoc clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets \
|
||||
clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_tfhe_csprng clippy_zk_pok clippy_zk_pok_wasm clippy_trivium clippy_princev2 \
|
||||
clippy_versionable clippy_safe_serialize clippy_tfhe_lints clippy_ws_tests clippy_bench clippy_param_dedup \
|
||||
clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_tfhe_csprng clippy_zk_pok clippy_trivium \
|
||||
clippy_versionable clippy_tfhe_lints clippy_ws_tests clippy_bench clippy_param_dedup \
|
||||
clippy_test_vectors clippy_backward_compat_data clippy_wasm_par_mq
|
||||
|
||||
.PHONY: clippy_fast # Run main clippy targets
|
||||
@@ -711,7 +665,7 @@ build_c_api: install_rs_check_toolchain
|
||||
.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
|
||||
build_c_api_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types,gpu,gpu-experimental-zk \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types,gpu \
|
||||
-p tfhe
|
||||
|
||||
.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
|
||||
@@ -720,14 +674,11 @@ build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
|
||||
-p tfhe
|
||||
|
||||
.PHONY: build_web_js_api # Build the js API targeting the web browser, in sequential or cross origin parallelism modes.
|
||||
.PHONY: build_web_js_api # Build the js API targeting the web browser
|
||||
build_web_js_api: install_wasm_pack
|
||||
cd tfhe && \
|
||||
RUSTFLAGS="$(WASM_RUSTFLAGS)" wasm-pack build --release --target=web \
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok,extended-types,cross-origin-wasm-api && \
|
||||
find pkg/snippets -type f -iname worker_helpers.js -exec sed -i 's|import("../../..")|import("../../../tfhe.js")|g' {} \;
|
||||
cp utils/wasm-par-mq/js/coordinator.js tfhe/pkg/
|
||||
jq '.files += ["snippets"]' tfhe/pkg/package.json > tmp_pkg.json && mv -f tmp_pkg.json tfhe/pkg/package.json
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok,extended-types
|
||||
|
||||
.PHONY: build_web_js_api_parallel # Build the js API targeting the web browser with parallelism support
|
||||
# parallel wasm requires specific build options, see https://github.com/rust-lang/rust/pull/147225
|
||||
@@ -813,7 +764,7 @@ test_zk_cuda_backend:
|
||||
|
||||
|
||||
.PHONY: test_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
|
||||
test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend test_zk_cuda_backend
|
||||
test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
|
||||
|
||||
.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
|
||||
test_core_crypto_gpu:
|
||||
@@ -1249,31 +1200,12 @@ test_tfhe_csprng_big_endian: install_cargo_cross
|
||||
RUSTFLAGS="" cross test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-csprng --target=powerpc64-unknown-linux-gnu
|
||||
|
||||
|
||||
.PHONY: test_zk_pok # Run tfhe-zk-pok tests
|
||||
test_zk_pok:
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-zk-pok --features experimental
|
||||
|
||||
.PHONY: test_zk_pok_experimental_gpu # Run tfhe-zk-pok GPU-accelerated tests
|
||||
test_zk_pok_experimental_gpu:
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-zk-pok --features experimental,gpu-experimental -- gpu
|
||||
|
||||
.PHONY: test_integer_zk_gpu # Run tfhe-zk-pok tests
|
||||
test_integer_zk_gpu:
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
|
||||
--features=integer,zk-pok,gpu -p tfhe -- \
|
||||
integer::gpu::zk::
|
||||
|
||||
.PHONY: test_integer_zk_experimental_gpu # Run tfhe-zk-pok tests
|
||||
test_integer_zk_experimental_gpu:
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
|
||||
--features=integer,zk-pok,gpu,gpu-experimental-zk -p tfhe -- \
|
||||
integer::gpu::zk::
|
||||
|
||||
.PHONY: test_zk_cuda # Run all GPU MSM integration tests (CPU vs GPU comparison + integration test)
|
||||
test_zk_cuda: test_zk_cuda_backend test_zk_pok_experimental_gpu test_integer_zk_gpu test_integer_zk_experimental_gpu
|
||||
|
||||
.PHONY: test_zk_wasm_x86_compat_ci
|
||||
test_zk_wasm_x86_compat_ci: check_nvm_installed
|
||||
source ~/.nvm/nvm.sh && \
|
||||
@@ -1292,11 +1224,6 @@ test_versionable:
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
|
||||
--all-targets -p tfhe-versionable
|
||||
|
||||
.PHONY: test_safe_serialize # Run tests for tfhe-safe-serialize subcrate
|
||||
test_safe_serialize:
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
|
||||
--all-targets -p tfhe-safe-serialize
|
||||
|
||||
# The backward compat data folder holds historical binary data but also rust code to generate and load them.
|
||||
.PHONY: gen_backward_compat_data # Re-generate backward compatibility data
|
||||
gen_backward_compat_data:
|
||||
@@ -1310,19 +1237,11 @@ new_backward_compat_crate:
|
||||
.PHONY: test_backward_compatibility_ci
|
||||
test_backward_compatibility_ci:
|
||||
TFHE_BACKWARD_COMPAT_DATA_DIR="../$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
|
||||
--features=shortint,integer,zk-pok,strings -p tests test_backward_compatibility -- --nocapture
|
||||
--features=shortint,integer,zk-pok -p tests test_backward_compatibility -- --nocapture
|
||||
|
||||
.PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
|
||||
test_backward_compatibility: pull_backward_compat_data test_backward_compatibility_ci
|
||||
|
||||
.PHONY: test_corrupted_inputs_ci
|
||||
test_corrupted_inputs_ci:
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
|
||||
--features=integer,zk-pok,strings -p tests test_corrupted_inputs_deserialization -- --nocapture
|
||||
|
||||
.PHONY: test_corrupted_inputs # Same as test_corrupted_inputs_ci but pulls data first
|
||||
test_corrupted_inputs: pull_corrupted_inputs_data test_corrupted_inputs_ci
|
||||
|
||||
# Generate the test vectors and update the hash file
|
||||
.PHONY: gen_test_vectors
|
||||
gen_test_vectors:
|
||||
@@ -1431,19 +1350,6 @@ test_nodejs_wasm_api_ci: build_node_js_api
|
||||
|
||||
# This is an internal target, not meant to be called on its own.
|
||||
run_web_js_api_parallel: build_web_js_api_parallel setup_venv
|
||||
cd $(WEB_SERVER_DIR) && npm install && npm run build
|
||||
source venv/bin/activate && \
|
||||
python ci/webdriver.py \
|
||||
--browser-path $(browser_path) \
|
||||
--driver-path $(driver_path) \
|
||||
--browser-kind $(browser_kind) \
|
||||
--server-cmd $(server_cmd) \
|
||||
--server-workdir "$(WEB_SERVER_DIR)" \
|
||||
--id-pattern $(filter) \
|
||||
--id-exclude-pattern asyncMainThread
|
||||
|
||||
# This is an internal target, not meant to be called on its own.
|
||||
run_web_js_api_cross_origin: build_web_js_api setup_venv
|
||||
cd $(WEB_SERVER_DIR) && npm install && npm run build
|
||||
source venv/bin/activate && \
|
||||
python ci/webdriver.py \
|
||||
@@ -1486,38 +1392,6 @@ test_web_js_api_parallel_firefox_ci: setup_venv
|
||||
nvm use $(NODE_VERSION) && \
|
||||
$(MAKE) test_web_js_api_parallel_firefox
|
||||
|
||||
test_web_js_api_cross_origin_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
|
||||
test_web_js_api_cross_origin_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
|
||||
test_web_js_api_cross_origin_chrome: browser_kind = chrome
|
||||
test_web_js_api_cross_origin_chrome: server_cmd = "npm run server:cross-origin"
|
||||
test_web_js_api_cross_origin_chrome: filter = ZeroKnowledgeTest # Only run zk proof tests in cross-origin mode
|
||||
|
||||
.PHONY: test_web_js_api_cross_origin_chrome # Run tests for the web wasm api in cross-origin mode on Chrome
|
||||
test_web_js_api_cross_origin_chrome: run_web_js_api_cross_origin
|
||||
|
||||
.PHONY: test_web_js_api_cross_origin_chrome_ci # Run tests for the web wasm api in cross-origin mode on Chrome
|
||||
test_web_js_api_cross_origin_chrome_ci: setup_venv
|
||||
source ~/.nvm/nvm.sh && \
|
||||
nvm install $(NODE_VERSION) && \
|
||||
nvm use $(NODE_VERSION) && \
|
||||
$(MAKE) test_web_js_api_cross_origin_chrome
|
||||
|
||||
test_web_js_api_cross_origin_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
|
||||
test_web_js_api_cross_origin_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
|
||||
test_web_js_api_cross_origin_firefox: browser_kind = firefox
|
||||
test_web_js_api_cross_origin_firefox: server_cmd = "npm run server:cross-origin"
|
||||
test_web_js_api_cross_origin_firefox: filter = ZeroKnowledgeTest # Only run zk proof tests in cross-origin mode
|
||||
|
||||
.PHONY: test_web_js_api_cross_origin_firefox # Run tests for the web wasm api in cross-origin mode on Firefox
|
||||
test_web_js_api_cross_origin_firefox: run_web_js_api_cross_origin
|
||||
|
||||
.PHONY: test_web_js_api_cross_origin_firefox_ci # Run tests for the web wasm api in cross-origin mode on Firefox
|
||||
test_web_js_api_cross_origin_firefox_ci: setup_venv
|
||||
source ~/.nvm/nvm.sh && \
|
||||
nvm install $(NODE_VERSION) && \
|
||||
nvm use $(NODE_VERSION) && \
|
||||
$(MAKE) test_web_js_api_cross_origin_firefox
|
||||
|
||||
WASM_PAR_MQ_TEST_DIR=utils/wasm-par-mq/web_tests
|
||||
|
||||
.PHONY: build_wasm_par_mq_tests # Build the wasm-par-mq test WASM package
|
||||
@@ -1674,57 +1548,27 @@ bench_integer_rerand: install_rs_check_toolchain
|
||||
--bench integer-rerand \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_integer_rerand_gpu # Run benchmarks for integer rerand on GPU backend
|
||||
bench_integer_rerand_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-rerand \
|
||||
--features=integer,internal-keycache,gpu,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_msm_zk
|
||||
bench_msm_zk: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench zk-msm \
|
||||
--features=zk-pok -p tfhe-benchmark --profile release --
|
||||
|
||||
# GPU benchmarks need --profile release for correct measurements
|
||||
.PHONY: bench_msm_zk_gpu
|
||||
bench_msm_zk_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench zk-msm \
|
||||
--features=gpu,gpu-experimental-zk,zk-pok -p tfhe-benchmark --profile release -- zk::cuda::msm
|
||||
|
||||
# GPU benchmarks need --profile release for correct measurements
|
||||
.PHONY: bench_integer_zk_gpu
|
||||
bench_integer_zk_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-zk-pke \
|
||||
--features=integer,internal-keycache,gpu,pbs-stats,zk-pok -p tfhe-benchmark --profile release --
|
||||
|
||||
# GPU benchmarks need --profile release for correct measurements
|
||||
.PHONY: bench_integer_zk_experimental_gpu
|
||||
bench_integer_zk_experimental_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-zk-pke \
|
||||
--features=integer,internal-keycache,gpu,gpu-experimental-zk,pbs-stats,zk-pok -p tfhe-benchmark --profile release --
|
||||
--features=integer,internal-keycache,gpu,pbs-stats,zk-pok -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_integer_aes_gpu # Run benchmarks for AES on GPU backend
|
||||
bench_integer_aes_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-aes \
|
||||
--features=integer,internal-keycache,gpu -p tfhe-benchmark --profile release_lto_off --
|
||||
--features=integer,internal-keycache,gpu, -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_integer_aes256_gpu # Run benchmarks for AES256 on GPU backend
|
||||
bench_integer_aes256_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-aes256 \
|
||||
--features=integer,internal-keycache,gpu -p tfhe-benchmark --profile release_lto_off --
|
||||
--features=integer,internal-keycache,gpu, -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_integer_trivium_gpu # Run benchmarks for trivium on GPU backend
|
||||
bench_integer_trivium_gpu: install_rs_check_toolchain
|
||||
@@ -1888,37 +1732,37 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
|
||||
nvm use $(NODE_VERSION) && \
|
||||
$(MAKE) bench_web_js_api_parallel_firefox
|
||||
|
||||
bench_web_js_api_cross_origin_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
|
||||
bench_web_js_api_cross_origin_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
|
||||
bench_web_js_api_cross_origin_chrome: browser_kind = chrome
|
||||
bench_web_js_api_cross_origin_chrome: server_cmd = "npm run server:cross-origin"
|
||||
bench_web_js_api_cross_origin_chrome: filter = ZeroKnowledgeBench # Only bench zk with cross-origin workers
|
||||
bench_web_js_api_unsafe_coop_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
|
||||
bench_web_js_api_unsafe_coop_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
|
||||
bench_web_js_api_unsafe_coop_chrome: browser_kind = chrome
|
||||
bench_web_js_api_unsafe_coop_chrome: server_cmd = "npm run server:unsafe-coop"
|
||||
bench_web_js_api_unsafe_coop_chrome: filter = ZeroKnowledgeBench # Only bench zk with unsafe coop
|
||||
|
||||
.PHONY: bench_web_js_api_cross_origin_chrome # Run benchmarks for the web wasm api without cross-origin isolation
|
||||
bench_web_js_api_cross_origin_chrome: run_web_js_api_cross_origin
|
||||
.PHONY: bench_web_js_api_unsafe_coop_chrome # Run benchmarks for the web wasm api without cross-origin isolation
|
||||
bench_web_js_api_unsafe_coop_chrome: run_web_js_api_parallel
|
||||
|
||||
.PHONY: bench_web_js_api_cross_origin_chrome_ci # Run benchmarks for the web wasm api without cross-origin isolation
|
||||
bench_web_js_api_cross_origin_chrome_ci: setup_venv
|
||||
.PHONY: bench_web_js_api_unsafe_coop_chrome_ci # Run benchmarks for the web wasm api without cross-origin isolation
|
||||
bench_web_js_api_unsafe_coop_chrome_ci: setup_venv
|
||||
source ~/.nvm/nvm.sh && \
|
||||
nvm install $(NODE_VERSION) && \
|
||||
nvm use $(NODE_VERSION) && \
|
||||
$(MAKE) bench_web_js_api_cross_origin_chrome
|
||||
$(MAKE) bench_web_js_api_unsafe_coop_chrome
|
||||
|
||||
bench_web_js_api_cross_origin_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
|
||||
bench_web_js_api_cross_origin_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
|
||||
bench_web_js_api_cross_origin_firefox: browser_kind = firefox
|
||||
bench_web_js_api_cross_origin_firefox: server_cmd = "npm run server:cross-origin"
|
||||
bench_web_js_api_cross_origin_firefox: filter = ZeroKnowledgeBench # Only bench zk with cross-origin workers
|
||||
bench_web_js_api_unsafe_coop_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
|
||||
bench_web_js_api_unsafe_coop_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
|
||||
bench_web_js_api_unsafe_coop_firefox: browser_kind = firefox
|
||||
bench_web_js_api_unsafe_coop_firefox: server_cmd = "npm run server:unsafe-coop"
|
||||
bench_web_js_api_unsafe_coop_firefox: filter = ZeroKnowledgeBench # Only bench zk with unsafe coop
|
||||
|
||||
.PHONY: bench_web_js_api_cross_origin_firefox # Run benchmarks for the web wasm api without cross-origin isolation
|
||||
bench_web_js_api_cross_origin_firefox: run_web_js_api_cross_origin
|
||||
.PHONY: bench_web_js_api_unsafe_coop_firefox # Run benchmarks for the web wasm api without cross-origin isolation
|
||||
bench_web_js_api_unsafe_coop_firefox: run_web_js_api_parallel
|
||||
|
||||
.PHONY: bench_web_js_api_cross_origin_firefox_ci # Run benchmarks for the web wasm api without cross-origin isolation
|
||||
bench_web_js_api_cross_origin_firefox_ci: setup_venv
|
||||
.PHONY: bench_web_js_api_unsafe_coop_firefox_ci # Run benchmarks for the web wasm api without cross-origin isolation
|
||||
bench_web_js_api_unsafe_coop_firefox_ci: setup_venv
|
||||
source ~/.nvm/nvm.sh && \
|
||||
nvm install $(NODE_VERSION) && \
|
||||
nvm use $(NODE_VERSION) && \
|
||||
$(MAKE) bench_web_js_api_cross_origin_firefox
|
||||
$(MAKE) bench_web_js_api_unsafe_coop_firefox
|
||||
|
||||
.PHONY: bench_hlapi_unsigned # Run benchmarks for integer operations
|
||||
bench_hlapi_unsigned: install_rs_check_toolchain
|
||||
@@ -1951,25 +1795,25 @@ bench_hlapi_hpu: install_rs_check_toolchain
|
||||
--bench hlapi \
|
||||
--features=integer,internal-keycache,hpu,hpu-v80,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_hlapi_erc7984 # Run benchmarks for ERC7984 operations
|
||||
bench_hlapi_erc7984: install_rs_check_toolchain
|
||||
.PHONY: bench_hlapi_erc20 # Run benchmarks for ERC20 operations
|
||||
bench_hlapi_erc20: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc7984 \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_hlapi_erc7984_gpu # Run benchmarks for ERC7984 operations on GPU
|
||||
bench_hlapi_erc7984_gpu: install_rs_check_toolchain
|
||||
.PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ERC20 operations on GPU
|
||||
bench_hlapi_erc20_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc7984 \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_hlapi_erc7984_gpu_classical # Run benchmarks for ERC7984 operations on GPU with classical parameters
|
||||
bench_hlapi_erc7984_gpu_classical: install_rs_check_toolchain
|
||||
.PHONY: bench_hlapi_erc20_gpu_classical # Run benchmarks for ERC20 operations on GPU with classical parameters
|
||||
bench_hlapi_erc20_gpu_classical: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=classical \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc7984 \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_hlapi_dex # Run benchmarks for DEX operations
|
||||
@@ -1993,13 +1837,13 @@ bench_hlapi_dex_gpu_classical: install_rs_check_toolchain
|
||||
--bench hlapi-dex \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_hlapi_erc7984_hpu # Run benchmarks for ECR20 operations on HPU
|
||||
bench_hlapi_erc7984_hpu: install_rs_check_toolchain
|
||||
.PHONY: bench_hlapi_erc20_hpu # Run benchmarks for ECR20 operations on HPU
|
||||
bench_hlapi_erc20_hpu: install_rs_check_toolchain
|
||||
source ./setup_hpu.sh --config $(HPU_CONFIG); \
|
||||
export V80_PCIE_DEV=${V80_PCIE_DEV}; \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc7984 \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,internal-keycache,hpu,hpu-v80,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
|
||||
@@ -2007,13 +1851,6 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench -p tfhe-zk-pok --
|
||||
|
||||
.PHONY: bench_tfhe_zk_pok_gpu # Run benchmarks for the tfhe_zk_pok crate using GPU acceleration
|
||||
bench_tfhe_zk_pok_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--package tfhe-zk-pok \
|
||||
--features=gpu-experimental --profile release
|
||||
|
||||
.PHONY: bench_hlapi_noise_squash # Run benchmarks for noise squash operation
|
||||
bench_hlapi_noise_squash: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
|
||||
@@ -2055,10 +1892,10 @@ bench_summary: install_rs_check_toolchain
|
||||
--bench hlapi-noise-squash \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::decomp_noise_squash_comp::'
|
||||
|
||||
# ERC7984
|
||||
# ERC20
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc7984 \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,internal-keycache -p tfhe-benchmark -- '::transfer::overflow'
|
||||
|
||||
# DEX
|
||||
@@ -2100,10 +1937,10 @@ bench_summary_gpu: install_rs_check_toolchain
|
||||
--bench hlapi-noise-squash \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::decomp_noise_squash_comp::'
|
||||
|
||||
# ERC7984
|
||||
# ERC20
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc7984 \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,gpu,internal-keycache -p tfhe-benchmark --profile release_lto_off -- '::transfer::overflow'
|
||||
|
||||
# DEX
|
||||
@@ -2196,10 +2033,6 @@ write_params_to_file: install_rs_check_toolchain
|
||||
pull_backward_compat_data:
|
||||
./scripts/pull_lfs_data.sh $(BACKWARD_COMPAT_DATA_DIR)
|
||||
|
||||
.PHONY: pull_corrupted_inputs_data # Pull the data files needed for corrupted inputs deserialization tests
|
||||
pull_corrupted_inputs_data:
|
||||
./scripts/pull_lfs_data.sh $(CORRUPTED_INPUTS_TEST)
|
||||
|
||||
.PHONY: pull_hpu_files # Pull the hpu files
|
||||
pull_hpu_files:
|
||||
./scripts/pull_lfs_data.sh backends/tfhe-hpu-backend/
|
||||
@@ -2282,7 +2115,6 @@ pcc_batch_5:
|
||||
$(call run_recipe_with_details,clippy_tfhe_lints)
|
||||
$(call run_recipe_with_details,check_compile_tests)
|
||||
$(call run_recipe_with_details,clippy_backward_compat_data)
|
||||
$(call run_recipe_with_details,check_backward_compat_locks_did_not_change)
|
||||
|
||||
.PHONY: pcc_batch_6 # duration: 6'32''
|
||||
pcc_batch_6:
|
||||
@@ -2291,11 +2123,8 @@ pcc_batch_6:
|
||||
$(call run_recipe_with_details,clippy_tasks)
|
||||
$(call run_recipe_with_details,clippy_tfhe_csprng)
|
||||
$(call run_recipe_with_details,clippy_zk_pok)
|
||||
$(call run_recipe_with_details,clippy_zk_pok_wasm)
|
||||
$(call run_recipe_with_details,clippy_trivium)
|
||||
$(call run_recipe_with_details,clippy_princev2)
|
||||
$(call run_recipe_with_details,clippy_versionable)
|
||||
$(call run_recipe_with_details,clippy_safe_serialize)
|
||||
$(call run_recipe_with_details,clippy_param_dedup)
|
||||
$(call run_recipe_with_details,docs)
|
||||
|
||||
|
||||
@@ -15,3 +15,12 @@ extend-ignore-identifiers-re = [
|
||||
"0x[0-9a-fA-F]+",
|
||||
"xrt_coreutil",
|
||||
]
|
||||
|
||||
[files]
|
||||
extend-exclude = [
|
||||
"backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cu",
|
||||
"backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu",
|
||||
"backends/tfhe-hpu-backend/config_store/**/*.link_summary",
|
||||
"*.cbor",
|
||||
"*.bcode",
|
||||
]
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
[package]
|
||||
name = "tfhe-princev2"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
rayon = "1.11.0"
|
||||
tfhe = { path = "../../tfhe", features = ["shortint"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.5.1"
|
||||
|
||||
[features]
|
||||
verbose-timings = []
|
||||
|
||||
[profile.release]
|
||||
lto = "fat"
|
||||
opt-level = 3
|
||||
codegen-units = 1
|
||||
|
||||
[[bench]]
|
||||
name = "princev2"
|
||||
harness = false
|
||||
@@ -1,79 +0,0 @@
|
||||
# FHE implementation of PRINCEv2 using TFHE-rs
|
||||
|
||||
This crate implements homomorphic encryption and decryption of the PRINCEv2 block cipher [BEK+20] using TFHE-rs's shortint API. It takes FHE ciphertexts representing the plaintext (resp. ciphertext) block and the two halves of the PRINCEv2 key and produces FHE ciphertexts of the encrypted (resp. decrypted) block.
|
||||
|
||||
Inputs and outputs encrypt 64-bit integers that are represented as vectors of 2-bit nibbles, most significant nibble first, stacked in the lower part of the FHE message space of each ciphertext.
|
||||
|
||||
The cipher itself (a succession of S-box, Linear, Permutation, Xor layers) is evaluated under FHE using the `shortint` API, systematically operating on 4-bit lookup tables. More details on the FHE design can be found in [BJ26, Section 6].
|
||||
|
||||
## References
|
||||
|
||||
PRINCEv2 is specified in:
|
||||
> [BEK+20] Dusan Božilov, Maria Eichlseder, Miroslav Kneževic, Baptiste Lambin, Gregor Leander, Thorben Moos, Ventzislav Nikov, Shahram Rasoolzadeh, Yosuke Todo, and Friedrich Wiemer. *PRINCEv2: More security for (almost) no overhead.* In Selected Areas in Cryptography (SAC 2020), volume 12804 of LNCS, pp.483--511, Springer, 2020. DOI:10.1007/978-3-030-81652-0_19.
|
||||
|
||||
Test vectors are those of Appendix B of the paper.
|
||||
|
||||
More details on the FHE implementation design can be found in Section 6 of:
|
||||
> [BJ26] Olivier Bernard and Marc Joye. *Hash function constructions from lightweight block ciphers for fully homomorphic encryption*. Cryptology ePrint Archive, ePrint:2026/309, 2026.
|
||||
|
||||
## Layout
|
||||
|
||||
- `src/u64_conv.rs` — plaintext-side conversions between `u64` and the 32-element 2-bit-nibble vectors used on the FHE side; it exposes `u64_to_vec_u2` and `vec_u2_to_u64` as part of the encoding contract for the underlying plaintexts of the inputs and outputs.
|
||||
- `src/permute.rs` — generic permutation helper over ciphertext arrays.
|
||||
- `src/pv2_lut.rs` — precomputed S-box, inverse S-box, M-layer and round-constant lookup tables.
|
||||
- `src/pv2_cipher.rs` — the homomorphic round functions and the public `pv2_encrypt` / `pv2_decrypt` entry points.
|
||||
- `tests/pv2_kat.rs` — known-answer tests against the paper vectors.
|
||||
- `benches/princev2.rs` — benchmarks for a full call of `pv2_encrypt` (`pv2_decrypt` has exactly the same performance characteristics).
|
||||
|
||||
## Usage
|
||||
|
||||
```rust,no_run
|
||||
use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe_princev2::{pv2_encrypt, u64_to_vec_u2, vec_u2_to_u64};
|
||||
|
||||
let (s_key, ev_key) = tfhe::shortint::gen_keys(PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let encode = |x: u64| -> [Ciphertext; 32] {
|
||||
let nibbles = u64_to_vec_u2(x);
|
||||
let v: Vec<_> = nibbles.into_iter().map(|n| s_key.encrypt(n as u64)).collect();
|
||||
v.try_into().unwrap()
|
||||
};
|
||||
|
||||
let ct_m = encode(0x0123456789abcdef);
|
||||
let ct_k0 = encode(0x0123456789abcdef);
|
||||
let ct_k1 = encode(0xfedcba9876543210);
|
||||
|
||||
let mut ct_out: [Ciphertext; 32] = std::array::from_fn(|_| ev_key.create_trivial(0));
|
||||
pv2_encrypt(&ev_key, &mut ct_out, &ct_m, &ct_k0, &ct_k1);
|
||||
|
||||
let out_nibbles: [u8; 32] =
|
||||
std::array::from_fn(|i| s_key.decrypt_message_and_carry(&ct_out[i]) as u8);
|
||||
assert_eq!(vec_u2_to_u64(out_nibbles), 0x603cd95fa72a8704);
|
||||
```
|
||||
|
||||
## Running tests
|
||||
|
||||
```bash
|
||||
RAYON_NUM_THREADS=64 cargo test --release --test pv2_kat -- --test-threads=1
|
||||
```
|
||||
|
||||
Each KAT should take approximately 5 seconds (resp. 800ms) on 8 cores (resp. 64 cores) on an Amazon AWS hpc7a.96xlarge machine. There are currently 10 KATs (5 for encryption and same for decryption). Optimal timings depend on the hardware but will be structurally better using a number of threads which is a power of 2 up to 64; the best possible latency is obtained through 64 individual threads.
|
||||
|
||||
|
||||
## Optional verbose timings
|
||||
|
||||
```bash
|
||||
RAYON_NUM_THREADS=64 cargo test --release --test pv2_kat --features verbose-timings -- --test-threads=1 --nocapture
|
||||
```
|
||||
|
||||
This times each internal round function call and emits one `eprintln!` per such call.
|
||||
|
||||
|
||||
## Running benchmarks
|
||||
|
||||
```bash
|
||||
RAYON_NUM_THREADS=64 cargo bench --bench princev2
|
||||
```
|
||||
|
||||
Timings obtained on up to 64 cores of an `Amazon AWS hpc7a.96xlarge` machine can also be found in [BJ26, Table 6.1].
|
||||
@@ -1,86 +0,0 @@
|
||||
//! Benchmarks for homomorphic PRINCEv2 encryption (and decryption)
|
||||
//!
|
||||
//! Times one full call of `pv2_encrypt`, i.e., transciphering one block of 64 bits.
|
||||
//! Note that decryption `pv2_decrypt` follows exactly the same logic as encryption with different
|
||||
//! constants, hence it is not benched separately.
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
|
||||
use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
|
||||
use tfhe::shortint::prelude::*;
|
||||
|
||||
use tfhe_princev2::{pv2_encrypt, u64_to_vec_u2, vec_u2_to_u64};
|
||||
|
||||
// [NB] We don't expect pv2_decrypt() to behave differently from pv2_encrypt()
|
||||
criterion_group!(pv2_benches, bench_pv2_encrypt);
|
||||
criterion_main!(pv2_benches);
|
||||
|
||||
// KAT structure for Pv2 cipher
|
||||
struct Pv2Kat {
|
||||
name: &'static str,
|
||||
ptxt: u64,
|
||||
k0: u64,
|
||||
k1: u64,
|
||||
ctxt: u64,
|
||||
}
|
||||
|
||||
static PV2_KAT_LN2: Pv2Kat = Pv2Kat {
|
||||
// ptxt, k0, k1 are the first three u64 words of ln(2) fractional part.
|
||||
// ctxt was computed with the Sagemaths reference implementation and cross-checked here.
|
||||
name: "PRINCEv2 KAT from ln(2)",
|
||||
ptxt: 0xb17217f7d1cf79ab,
|
||||
k0: 0xc9e3b39803f2f6af,
|
||||
k1: 0x40f343267298b62d,
|
||||
ctxt: 0x40ac916b4598216d,
|
||||
};
|
||||
|
||||
/// Encrypt a u64 as 32 ciphertexts, each holding a 2-bit nibble in the low bits of the FHE message
|
||||
/// space. Most significant bits of the input are at index 0 in the output
|
||||
fn encrypt_u64_as_vec_u2l(s_key: &ClientKey, x: u64) -> [Ciphertext; 32] {
|
||||
let x_u2: [u8; 32] = u64_to_vec_u2(x);
|
||||
let ct: Vec<Ciphertext> = x_u2
|
||||
.into_iter()
|
||||
.map(|u2| s_key.encrypt(u2 as u64))
|
||||
.collect();
|
||||
ct.try_into().unwrap()
|
||||
}
|
||||
|
||||
/// Reverse of function encrypt_u64_as_vec_u2l()
|
||||
fn decrypt_vec_u2l_as_u64(s_key: &ClientKey, v: &[Ciphertext; 32]) -> u64 {
|
||||
let x_u2: [u8; 32] = std::array::from_fn(|n| s_key.decrypt_message_and_carry(&v[n]) as u8);
|
||||
let x: u64 = vec_u2_to_u64(x_u2);
|
||||
x
|
||||
}
|
||||
|
||||
/// Run benches for PRINCEv2 transciphering.
|
||||
fn bench_pv2_encrypt(c: &mut Criterion) {
|
||||
let (s_key, ev_key): (ClientKey, ServerKey) = // Params: Need 4-bits msg + nu >= 4
|
||||
tfhe::shortint::gen_keys(PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
// Encryptions of inputs (k0,k1,m)
|
||||
let ct_k0: [Ciphertext; 32] = encrypt_u64_as_vec_u2l(&s_key, PV2_KAT_LN2.k0);
|
||||
let ct_k1: [Ciphertext; 32] = encrypt_u64_as_vec_u2l(&s_key, PV2_KAT_LN2.k1);
|
||||
let ct_m: [Ciphertext; 32] = encrypt_u64_as_vec_u2l(&s_key, PV2_KAT_LN2.ptxt);
|
||||
|
||||
// PRINCEv2 Enc in FHE
|
||||
let mut ct_out: [Ciphertext; 32] = std::array::from_fn(|_| ev_key.create_trivial(0)); // [NB] shortint::create_trivial() vs boolean::trivial_encrypt()
|
||||
c.bench_function("PRINCEv2 Trans-Encryption of one message block", |b| {
|
||||
b.iter(|| {
|
||||
pv2_encrypt(&ev_key, &mut ct_out, &ct_m, &ct_k0, &ct_k1);
|
||||
});
|
||||
});
|
||||
|
||||
// Testing the (de-)encrypted result
|
||||
let pt_out: u64 = decrypt_vec_u2l_as_u64(&s_key, &ct_out);
|
||||
assert_eq!(
|
||||
pt_out,
|
||||
PV2_KAT_LN2.ctxt,
|
||||
"{} failed: ptxt={:#018x}, k0={:#018x}, k1={:#018x}, expected={:#018x}, got={:#018x}",
|
||||
PV2_KAT_LN2.name,
|
||||
PV2_KAT_LN2.ptxt,
|
||||
PV2_KAT_LN2.k0,
|
||||
PV2_KAT_LN2.k1,
|
||||
PV2_KAT_LN2.ctxt,
|
||||
pt_out
|
||||
);
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
// Pure Rust Helpers
|
||||
mod u64_conv;
|
||||
pub use u64_conv::{u64_to_vec_u2, vec_u2_to_u64}; // For tests, part of the encoding contract for pv2_encrypt()/pv2_decrypt()
|
||||
mod permute;
|
||||
|
||||
// Cipher internals: pre-computed constants, s-box and perms
|
||||
mod pv2_cipher;
|
||||
mod pv2_lut;
|
||||
pub use pv2_cipher::{pv2_decrypt, pv2_encrypt};
|
||||
@@ -1,68 +0,0 @@
|
||||
/*
|
||||
* Apply Permutations on arrays
|
||||
* --------------------------------------------------------------------------------------------- */
|
||||
|
||||
pub fn apply_perm_assign<T>(list: &mut [T], order: &[usize]) {
|
||||
assert_eq!(list.len(), order.len());
|
||||
let n: usize = order.len();
|
||||
let mut done: Vec<bool> = vec![false; n];
|
||||
|
||||
for i in 0..n {
|
||||
// Do not cycle multiple times
|
||||
if done[i] {
|
||||
continue;
|
||||
}
|
||||
let mut from = i;
|
||||
let mut to = order[i];
|
||||
|
||||
// Cycle always of length < n
|
||||
while to != i {
|
||||
list.swap(from, to);
|
||||
done[from] = true;
|
||||
from = to;
|
||||
to = order[to];
|
||||
}
|
||||
done[from] = true;
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_perm_assign() {
|
||||
let perm: [usize; 16] = [
|
||||
0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3, 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
|
||||
];
|
||||
let mut list: [u8; 16] = [
|
||||
0x8, 0x3, 0x2, 0xb, 0xd, 0x4, 0x6, 0xf, 0x1, 0x0, 0x8, 0xe, 0x7, 0x8, 0x5, 0x7,
|
||||
];
|
||||
let exp_res: [u8; 16] = [
|
||||
0x8, 0x4, 0x8, 0x7, 0xd, 0x0, 0x5, 0xb, 0x1, 0x8, 0x2, 0xf, 0x7, 0x3, 0x6, 0xe,
|
||||
];
|
||||
|
||||
apply_perm_assign(&mut list, &perm);
|
||||
assert_eq!(exp_res, list);
|
||||
|
||||
#[rustfmt::skip]
|
||||
let perm: [usize; 64] = [ // FHE_MP_PERM_FW
|
||||
0x00, 0x05, 0x0a, 0x0f, 0x12, 0x17, 0x18, 0x1d, 0x21, 0x26, 0x2b, 0x2c, 0x31, 0x36, 0x3b, 0x3c,
|
||||
0x13, 0x14, 0x19, 0x1e, 0x22, 0x27, 0x28, 0x2d, 0x32, 0x37, 0x38, 0x3d, 0x01, 0x06, 0x0b, 0x0c,
|
||||
0x23, 0x24, 0x29, 0x2e, 0x33, 0x34, 0x39, 0x3e, 0x02, 0x07, 0x08, 0x0d, 0x10, 0x15, 0x1a, 0x1f,
|
||||
0x30, 0x35, 0x3a, 0x3f, 0x03, 0x04, 0x09, 0x0e, 0x11, 0x16, 0x1b, 0x1c, 0x20, 0x25, 0x2a, 0x2f,
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
let mut list: [u8; 64] = [ // Random
|
||||
0xc2, 0x0d, 0x97, 0xd6, 0xb0, 0x79, 0x1b, 0x43, 0xcd, 0x03, 0x33, 0xfe, 0x4b, 0x1c, 0x7f, 0xa9,
|
||||
0xc0, 0xc2, 0xa7, 0x17, 0x88, 0xbf, 0xa6, 0x49, 0x5d, 0xcd, 0x11, 0xee, 0xdc, 0xc4, 0x17, 0x90,
|
||||
0x12, 0x7e, 0x0d, 0xb0, 0x1f, 0x58, 0xf5, 0xf4, 0x9f, 0xcc, 0xdd, 0xca, 0x49, 0x5a, 0x0e, 0xd2,
|
||||
0xf6, 0x37, 0xe9, 0x40, 0x6d, 0x56, 0x79, 0x53, 0xd6, 0x63, 0x6f, 0x8a, 0xf5, 0xaa, 0x5b, 0x08,
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
let exp_res: [u8; 64] = [
|
||||
0xc2, 0x79, 0x33, 0xa9, 0xa7, 0x49, 0x5d, 0xc4, 0x7e, 0xf5, 0xca, 0x49, 0x37, 0x79, 0x8a, 0xf5,
|
||||
0x17, 0x88, 0xcd, 0x17, 0x0d, 0xf4, 0x9f, 0x5a, 0xe9, 0x53, 0xd6, 0xaa, 0x0d, 0x1b, 0xfe, 0x4b,
|
||||
0xb0, 0x1f, 0xcc, 0x0e, 0x40, 0x6d, 0x63, 0x5b, 0x97, 0x43, 0xcd, 0x1c, 0xc0, 0xbf, 0x11, 0x90,
|
||||
0xf6, 0x56, 0x6f, 0x08, 0xd6, 0xb0, 0x03, 0x7f, 0xc2, 0xa6, 0xee, 0xdc, 0x12, 0x58, 0xdd, 0xd2,
|
||||
];
|
||||
|
||||
apply_perm_assign(&mut list, &perm);
|
||||
assert_eq!(exp_res, list);
|
||||
}
|
||||
@@ -1,537 +0,0 @@
|
||||
use rayon::prelude::*;
|
||||
use tfhe::shortint::prelude::*;
|
||||
|
||||
use crate::permute; // permute/shuffle/swap arrays
|
||||
use crate::pv2_lut; // fhe luts and constants for prince v2
|
||||
|
||||
/* Macro to monitor individual functions timings (feature related: "verbose-timings") */
|
||||
macro_rules! monitor {
|
||||
($fn:ident($( $a:expr ), *)) => {
|
||||
#[cfg(feature = "verbose-timings")]
|
||||
let t0 = std::time::Instant::now();
|
||||
$fn($( $a), *);
|
||||
#[cfg(feature = "verbose-timings")]
|
||||
eprintln!("{}:\t{:.4?}", stringify!($fn), t0.elapsed())
|
||||
}
|
||||
}
|
||||
|
||||
/* out_u4 = (in_u2q xor ct_k) as vec_u4
|
||||
* [Parallel:(32)/32/(16)] XOR stage -> u4 */
|
||||
fn pv2_xor_to_u4(
|
||||
ev_key: &ServerKey,
|
||||
out_u4: &mut [Ciphertext; 16],
|
||||
in_u2q: &[Ciphertext; 32],
|
||||
ct_k: &[Ciphertext; 32],
|
||||
) {
|
||||
// xor alternatively to pair of high/low bits
|
||||
let zlut_xor_fw = [
|
||||
ev_key.generate_lookup_table(|x: u64| pv2_lut::PV2_XOR_FW[0][x as usize] as u64),
|
||||
ev_key.generate_lookup_table(|x: u64| pv2_lut::PV2_XOR_FW[1][x as usize] as u64),
|
||||
];
|
||||
|
||||
/* "Bivariate" xor ------------------------------------------------------------------
|
||||
* Sum in_u2q + ct_k, apply xor LUT to high or low bit */
|
||||
/* [Sequential]
|
||||
let mut ct_hl: [Ciphertext; 32] = std::array::from_fn(|n| ev_key.unchecked_add(&in_u2q[n], &ct_k[n]));
|
||||
for n in 0..32 {
|
||||
ev_key.apply_lookup_table_assign(&mut ct_hl[n], &zlut_xor_fw[n & 0x1]);
|
||||
}
|
||||
// */
|
||||
//* [Parallel:32]
|
||||
let ct_hl: [Ciphertext; 32] = (0..32)
|
||||
.into_par_iter()
|
||||
.map(|n| {
|
||||
let both_n: Ciphertext = ev_key.unchecked_add(&in_u2q[n], &ct_k[n]);
|
||||
ev_key.apply_lookup_table(&both_n, &zlut_xor_fw[n & 0x1]) // Combined version faster?
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap();
|
||||
// */
|
||||
// [Parallel:16] Sum by pairs
|
||||
/* (*out_u4) = (0..16).into_par_iter().map(|w| {
|
||||
ev_key.unchecked_add(&ct_hl[2*w], &ct_hl[2*w+1])
|
||||
}).collect::<Vec<_>>().try_into().unwrap();*/
|
||||
for w in 0..16 {
|
||||
out_u4[w] = ev_key.unchecked_add(&ct_hl[2 * w], &ct_hl[2 * w + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
/* out_b = (in_u2q xor ct_k) as vec_b
|
||||
* [Parallel:(32)/64] -> drifted bits */
|
||||
fn pv2_xor_to_b(
|
||||
ev_key: &ServerKey,
|
||||
out_b: &mut [Ciphertext; 64],
|
||||
in_u2q: &[Ciphertext; 32],
|
||||
ct_k: &[Ciphertext; 32],
|
||||
) {
|
||||
let zlut_xor_bh = [
|
||||
ev_key.generate_lookup_table(|x: u64| pv2_lut::PV2_XOR_BH[0][x as usize] as u64),
|
||||
ev_key.generate_lookup_table(|x: u64| pv2_lut::PV2_XOR_BH[1][x as usize] as u64),
|
||||
ev_key.generate_lookup_table(|x: u64| pv2_lut::PV2_XOR_BH[2][x as usize] as u64),
|
||||
ev_key.generate_lookup_table(|x: u64| pv2_lut::PV2_XOR_BH[3][x as usize] as u64),
|
||||
];
|
||||
let zlut_xor_bl = [
|
||||
ev_key.generate_lookup_table(|x: u64| pv2_lut::PV2_XOR_BL[0][x as usize] as u64),
|
||||
ev_key.generate_lookup_table(|x: u64| pv2_lut::PV2_XOR_BL[1][x as usize] as u64),
|
||||
ev_key.generate_lookup_table(|x: u64| pv2_lut::PV2_XOR_BL[2][x as usize] as u64),
|
||||
ev_key.generate_lookup_table(|x: u64| pv2_lut::PV2_XOR_BL[3][x as usize] as u64),
|
||||
];
|
||||
|
||||
// [Parallel:32] Sum in_u2q + ct_k --> could stay as iter? and assign in the following loop
|
||||
let ct_hl: [Ciphertext; 32] =
|
||||
std::array::from_fn(|n| ev_key.unchecked_add(&in_u2q[n], &ct_k[n]));
|
||||
|
||||
// Apply xor (incl. bit_extract) luts on each nibble
|
||||
/* [Sequential]
|
||||
for w in 0..16 {
|
||||
let b_pos = w & 0x3; // w mod 4 (b_pos:0123_b, so b_pos=0 is for (b << 3))
|
||||
out_b[4*w] = ev_key.apply_lookup_table(&ct_hl[2*w], &zlut_xor_bh[b_pos]);
|
||||
out_b[4*w+1] = ev_key.apply_lookup_table(&ct_hl[2*w], &zlut_xor_bl[b_pos]);
|
||||
out_b[4*w+2] = ev_key.apply_lookup_table(&ct_hl[2*w+1], &zlut_xor_bh[b_pos]);
|
||||
out_b[4*w+3] = ev_key.apply_lookup_table(&ct_hl[2*w+1], &zlut_xor_bl[b_pos]);
|
||||
} // */
|
||||
//* [Parallel:64] Apply xor (incl. bit_extract) luts on each nibble
|
||||
(*out_b) = (0..64)
|
||||
.into_par_iter()
|
||||
.map(|idx| {
|
||||
let n: usize = idx >> 1; // 2*w or 2*w+1
|
||||
let w: usize = idx >> 2;
|
||||
let b_pos: usize = w & 0x3;
|
||||
let zlut_bhl_pos = if (idx & 0x1) == 1 {
|
||||
&zlut_xor_bl[b_pos]
|
||||
} else {
|
||||
&zlut_xor_bh[b_pos]
|
||||
};
|
||||
ev_key.apply_lookup_table(&ct_hl[n], zlut_bhl_pos)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap(); // */
|
||||
}
|
||||
|
||||
// [Parallel:(32)/32]
|
||||
fn pv2_xor_to_u2(
|
||||
ev_key: &ServerKey,
|
||||
out_u2: &mut [Ciphertext; 32],
|
||||
in_u2q: &[Ciphertext; 32],
|
||||
ct_k: &[Ciphertext; 32],
|
||||
) {
|
||||
let zlut_xor =
|
||||
ev_key.generate_lookup_table(|x: u64| pv2_lut::PV2_XOR_TO_LOW[x as usize] as u64);
|
||||
|
||||
/* [Sequential]
|
||||
for n in 0..32 {
|
||||
out_u2[n] = ev_key.unchecked_add(&in_u2q[n], &ct_k[n]);
|
||||
ev_key.apply_lookup_table_assign(&mut out_u2[n], &zlut_xor);
|
||||
} // */
|
||||
// [Parallel:32] Apply xor luts on each nibble
|
||||
//* [Parallel:32]
|
||||
(*out_u2) = (0..32)
|
||||
.into_par_iter()
|
||||
.map(|n| {
|
||||
let both_n: Ciphertext = ev_key.unchecked_add(&in_u2q[n], &ct_k[n]);
|
||||
ev_key.apply_lookup_table(&both_n, &zlut_xor)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap(); // */
|
||||
}
|
||||
|
||||
// [Parallel:64/(32/16)/64/(32)] Fw Round
|
||||
// Forward round receives full 4-bit nibbles (16) and returns 2-bit nibbles (32) packed on high bits
|
||||
fn pv2_fw_round(
|
||||
ev_key: &ServerKey,
|
||||
out_u2q: &mut [Ciphertext; 32], // out: 2-bits (high)
|
||||
in_u4: &[Ciphertext; 16], // in: 4-bits (full)
|
||||
zlut: &[[u8; 16]; 16],
|
||||
) {
|
||||
/* S-Boxes ------------------------------------------------------------------------------------
|
||||
* . each 4-bit nibbles requires 4 applications of (same LUT + Bit extraction)
|
||||
* . extracted bits for word w go at position 3-w mod 4 (w=0 --> b000, w=1 --> 0b00, etc) */
|
||||
/* [Sequential]
|
||||
let mut ct_tmp: [Ciphertext; 64] = std::array::from_fn(|_| ev_key.create_trivial(0));
|
||||
for w in 0..16 {
|
||||
for b in 0..4 { // use apply_many_lookup_tables ?
|
||||
let zlut_b = ev_key.generate_lookup_table(
|
||||
|x:u64| (((zlut[w][x as usize] >> (3-b)) & 0x1) << ((3-w) % 4)) as u64
|
||||
);
|
||||
ct_tmp[b + 4*w] = ev_key.apply_lookup_table(&in_u4[w], &zlut_b);
|
||||
}
|
||||
} // */
|
||||
/* [Sequential::array]
|
||||
let mut ct_tmp: [Ciphertext; 64] = std::array::from_fn(|idx| {
|
||||
let w: usize = idx >> 2;
|
||||
let b: usize = idx & 0x3;
|
||||
let zlut_b = ev_key.generate_lookup_table(
|
||||
|x:u64| (((zlut[w][x as usize] >> (3-b)) & 0x1) << ((3-w) % 4)) as u64
|
||||
);
|
||||
ev_key.apply_lookup_table(&in_u4[w], &zlut_b) // ct_tmp[idx]
|
||||
}); // */
|
||||
//* [Parallel:64]
|
||||
let ct_tmp: [Ciphertext; 64] = (0..64)
|
||||
.into_par_iter()
|
||||
.map(|idx| {
|
||||
// idx = 4*w + b
|
||||
let w: usize = idx >> 2;
|
||||
let b: usize = idx & 0x3;
|
||||
let zlut_b = ev_key.generate_lookup_table(|x: u64| {
|
||||
// [Nb] w=0..15
|
||||
(((zlut[w][x as usize] >> (3 - b)) & 0x1) << (3 - (w % 4))) as u64
|
||||
});
|
||||
ev_key.apply_lookup_table(&in_u4[w], &zlut_b)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap();
|
||||
// */
|
||||
/* Bridging Sbox --> MLayer ----------------------------------------------------------
|
||||
* So as to obtain 4-bit enc nibbles with: 048c, 159d, etc */
|
||||
// TODO(?): [Parallel:32/16]
|
||||
for w in 0..16 {
|
||||
// this uses u2q for some u4 ahead of time (as temporary holder)
|
||||
let oo: usize = 16 * (w / 4) + (w % 4);
|
||||
out_u2q[w] = ev_key.unchecked_add(&ct_tmp[oo], &ct_tmp[oo + 4]);
|
||||
out_u2q[w + 1] = ev_key.unchecked_add(&ct_tmp[oo + 8], &ct_tmp[oo + 12]);
|
||||
out_u2q[w] = ev_key.unchecked_add(&out_u2q[w], &out_u2q[w + 1]);
|
||||
}
|
||||
|
||||
/* M-layer: Apply exor matrices ------------------------------------------------------ */
|
||||
/* [Sequential]
|
||||
for w in 0..16 {
|
||||
for b in 0..4 {
|
||||
let zlut_ex = ev_key.generate_lookup_table(
|
||||
|x:u64| pv2_lut::PV2_EXOR_FW[w % 2][b][x as usize] as u64
|
||||
);
|
||||
ct_tmp[b + 4*w] = ev_key.apply_lookup_table(&out_u2q[w], &zlut_ex);
|
||||
}
|
||||
} // */
|
||||
//* [Parallel:64]
|
||||
let mut ct_tmp: [Ciphertext; 64] = (0..64)
|
||||
.into_par_iter()
|
||||
.map(|idx| {
|
||||
// idx = 4*w + b
|
||||
let w: usize = idx >> 2;
|
||||
let b: usize = idx & 0x3;
|
||||
let zlut_ex = ev_key
|
||||
.generate_lookup_table(|x: u64| pv2_lut::PV2_EXOR_FW[w % 2][b][x as usize] as u64);
|
||||
ev_key.apply_lookup_table(&out_u2q[w], &zlut_ex)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap(); // */
|
||||
// Apply Fhe Perm permutation + Permutation Layer
|
||||
// --> Directly assign correctly in above loop? as ct_tmp[INV_FHE_MP_PERM_FW[b + 4*w]] = ...
|
||||
permute::apply_perm_assign(&mut ct_tmp, &pv2_lut::FHE_MP_PERM_FW);
|
||||
|
||||
/* Bridging M-Layer --> Xor --------------------------------------------------------- */
|
||||
// [Parallel:32] Combine pairs
|
||||
for n in 0..32 {
|
||||
out_u2q[n] = ev_key.unchecked_add(&ct_tmp[2 * n], &ct_tmp[2 * n + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
// [Parallel:64/(32)/64/(32/16)/64/(32)/32/(32/16)/32]
|
||||
fn pv2_mid_round(
|
||||
ev_key: &ServerKey,
|
||||
out_u2q: &mut [Ciphertext; 32], // out: 2-bits (high)
|
||||
in_u4: &[Ciphertext; 16], // in: 4-bits (full)
|
||||
ct_k_fst: &[Ciphertext; 32],
|
||||
ct_k_scd: &[Ciphertext; 32],
|
||||
zlut_fst: &[[u8; 16]; 16],
|
||||
zlut_scd: &[[u8; 16]; 16],
|
||||
) {
|
||||
/* S-Boxes ------------------------------------------------------------------------------------
|
||||
* /!\ output for xor */
|
||||
/* [Sequential]
|
||||
for w in 0..16 {
|
||||
for b in 0..2 {
|
||||
let zlut_u2q = ev_key.generate_lookup_table(
|
||||
|x:u64| (((pv2_lut::PV2_5_S_M[w][x as usize] >> (2-2*b)) & 0x3) << 2) as u64
|
||||
);
|
||||
out_u2q[b + 2*w] = ev_key.apply_lookup_table(&in_u4[w], &zlut_u2q);
|
||||
}
|
||||
} // */
|
||||
//* [Parallel:64]
|
||||
(*out_u2q) = (0..32)
|
||||
.into_par_iter()
|
||||
.map(|n| {
|
||||
let w: usize = n >> 1;
|
||||
let b: usize = n & 0x1;
|
||||
let zlut_u2q = ev_key.generate_lookup_table(|x: u64| {
|
||||
(((zlut_fst[w][x as usize] >> (2 - 2 * b)) & 0x3) << 2) as u64
|
||||
});
|
||||
ev_key.apply_lookup_table(&in_u4[w], &zlut_u2q)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap(); // */
|
||||
/* XOR K0 [Parallel:(32)/64] --------------------------------------------------------- */
|
||||
let mut ct_tmp_b: [Ciphertext; 64] = std::array::from_fn(|_| ev_key.create_trivial(0));
|
||||
pv2_xor_to_b(ev_key, &mut ct_tmp_b, out_u2q, ct_k_fst);
|
||||
|
||||
/* Bridging to M-Layer --------------------------------------------------------------- */
|
||||
// [Parallel:32/16] Comb sum (048c,...)
|
||||
let mut ct_tmp_u4: [Ciphertext; 16] = std::array::from_fn(|_| ev_key.create_trivial(0));
|
||||
for w in 0..16 {
|
||||
// mm, use u2q for some u4 ahead of time
|
||||
let oo: usize = 16 * (w / 4) + (w % 4);
|
||||
out_u2q[w] = ev_key.unchecked_add(&ct_tmp_b[oo], &ct_tmp_b[oo + 4]);
|
||||
out_u2q[w + 1] = ev_key.unchecked_add(&ct_tmp_b[oo + 8], &ct_tmp_b[oo + 12]);
|
||||
ct_tmp_u4[w] = ev_key.unchecked_add(&out_u2q[w], &out_u2q[w + 1]);
|
||||
}
|
||||
|
||||
/* M-layer: Apply exor matrices ------------------------------------------------------ */
|
||||
/* [Sequential]
|
||||
for w in 0..16 {
|
||||
for b in 0..4 {
|
||||
let zlut_ex = ev_key.generate_lookup_table(
|
||||
|x:u64| pv2_lut::PV2_EXOR_FW[w % 2][b][x as usize] as u64
|
||||
);
|
||||
ct_tmp_b[b + 4*w] = ev_key.apply_lookup_table(&ct_tmp_u4[w], &zlut_ex);
|
||||
}
|
||||
} // */
|
||||
//* [Parallel:64]
|
||||
ct_tmp_b = (0..64)
|
||||
.into_par_iter()
|
||||
.map(|idx| {
|
||||
// idx = 4*w + b
|
||||
let w: usize = idx >> 2;
|
||||
let b: usize = idx & 0x3;
|
||||
let zlut_ex = ev_key
|
||||
.generate_lookup_table(|x: u64| pv2_lut::PV2_EXOR_FW[w % 2][b][x as usize] as u64);
|
||||
ev_key.apply_lookup_table(&ct_tmp_u4[w], &zlut_ex)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap();
|
||||
// */
|
||||
// Apply Fhe Perm permutation
|
||||
permute::apply_perm_assign(&mut ct_tmp_b, &pv2_lut::FHE_M_PERM);
|
||||
|
||||
/* Bridging M-Layer --> Xor --------------------------------------------------------- */
|
||||
// [Parallel:32] Combine pairs
|
||||
for n in 0..32 {
|
||||
out_u2q[n] = ev_key.unchecked_add(&ct_tmp_b[2 * n], &ct_tmp_b[2 * n + 1]);
|
||||
}
|
||||
|
||||
/* XOR k1 [Parallel:(32)/32/(16)] --------------------------------------------------- */
|
||||
pv2_xor_to_u4(ev_key, &mut ct_tmp_u4, out_u2q, ct_k_scd);
|
||||
|
||||
/* S-Boxes ------------------------------------------------------------------------------------
|
||||
* . output 2,2 bits on position (32)00 */
|
||||
/* [Sequential]
|
||||
for w in 0..16 {
|
||||
for b in 0..2 {
|
||||
let zlut_u2q = ev_key.generate_lookup_table(
|
||||
|x:u64| (((pv2_lut::PV2_0_IS_0[w][x as usize] >> (2-2*b)) & 0x3) << 2) as u64
|
||||
);
|
||||
out_u2q[b + 2*w] = ev_key.apply_lookup_table(&ct_tmp_u4[w], &zlut_u2q);
|
||||
}
|
||||
}
|
||||
// */
|
||||
//* [Parallel:32]
|
||||
(*out_u2q) = (0..32)
|
||||
.into_par_iter()
|
||||
.map(|n| {
|
||||
let w: usize = n >> 1;
|
||||
let b: usize = n & 0x1;
|
||||
let zlut_u2q = ev_key.generate_lookup_table(|x: u64| {
|
||||
(((zlut_scd[w][x as usize] >> (2 - 2 * b)) & 0x3) << 2) as u64
|
||||
});
|
||||
ev_key.apply_lookup_table(&ct_tmp_u4[w], &zlut_u2q)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap(); // */
|
||||
}
|
||||
|
||||
// [Parallel:(32/16)/64/(32/16)/32]
|
||||
fn pv2_bw_round(
|
||||
ev_key: &ServerKey,
|
||||
out_u2q: &mut [Ciphertext; 32], // out: 2-bits (high)
|
||||
in_b: &[Ciphertext; 64], // in: 1-bits (<< w%4 = 333322221111000033...)
|
||||
zlut: &[[u8; 16]; 16],
|
||||
) {
|
||||
let mut ct_tmp_u4: [Ciphertext; 16] = std::array::from_fn(|_| ev_key.create_trivial(0)); // ...
|
||||
|
||||
// iPerm + M-Layer
|
||||
// [Parallel:32/16] Combined iPerm + comb sum (048c,etc)
|
||||
for w in 0..16 {
|
||||
let idx: [usize; 4] =
|
||||
std::array::from_fn(|b| (w & 0x3) + 4 * pv2_lut::IPERM[4 * (w >> 2) + b]);
|
||||
out_u2q[2 * w] = ev_key.unchecked_add(&in_b[idx[0]], &in_b[idx[1]]);
|
||||
out_u2q[2 * w + 1] = ev_key.unchecked_add(&in_b[idx[2]], &in_b[idx[3]]);
|
||||
ct_tmp_u4[w] = ev_key.unchecked_add(&out_u2q[2 * w], &out_u2q[2 * w + 1]);
|
||||
}
|
||||
|
||||
/* M-layer: Apply exor matrices ------------------------------------------------------ */
|
||||
/* [Sequential]
|
||||
let mut ct_tmp_b: [Ciphertext; 64] = std::array::from_fn(|_| ev_key.create_trivial(0));
|
||||
for w in 0..16 {
|
||||
for b in 0..4 {
|
||||
let zlut_ex = ev_key.generate_lookup_table(
|
||||
|x:u64| pv2_lut::PV2_EXOR_BW[w % 4][b][x as usize] as u64
|
||||
);
|
||||
ct_tmp_b[b + 4*w] = ev_key.apply_lookup_table(&ct_tmp_u4[w], &zlut_ex);
|
||||
}
|
||||
} // */
|
||||
//* [Parallel:64]
|
||||
let mut ct_tmp_b: [Ciphertext; 64] = (0..64)
|
||||
.into_par_iter()
|
||||
.map(|idx| {
|
||||
// idx = 4*w + b
|
||||
let w: usize = idx >> 2;
|
||||
let b: usize = idx & 0x3;
|
||||
let zlut_ex = ev_key
|
||||
.generate_lookup_table(|x: u64| pv2_lut::PV2_EXOR_BW[w % 4][b][x as usize] as u64);
|
||||
ev_key.apply_lookup_table(&ct_tmp_u4[w], &zlut_ex)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap();
|
||||
// */
|
||||
// FHE Perm permutation
|
||||
permute::apply_perm_assign(&mut ct_tmp_b, &pv2_lut::FHE_M_PERM);
|
||||
|
||||
/* Bridging MLayer --> SBox ----------------------------------------------------------- */
|
||||
// [Parallel:32/16] Combine to u4 = sum[4*i:4*i+4] for i in range(16)
|
||||
for w in 0..16 {
|
||||
out_u2q[2 * w] = ev_key.unchecked_add(&ct_tmp_b[4 * w], &ct_tmp_b[4 * w + 1]);
|
||||
out_u2q[2 * w + 1] = ev_key.unchecked_add(&ct_tmp_b[4 * w + 2], &ct_tmp_b[4 * w + 3]);
|
||||
ct_tmp_u4[w] = ev_key.unchecked_add(&out_u2q[2 * w], &out_u2q[2 * w + 1]);
|
||||
}
|
||||
|
||||
/* S-Boxes ------------------------------------------------------------------------------------
|
||||
* . output 2,2 bits on position (32)00 */
|
||||
/* [Sequential]
|
||||
for w in 0..16 {
|
||||
for b in 0..2 {
|
||||
let zlut_u2q = ev_key.generate_lookup_table(
|
||||
|x:u64| (((zlut[w][x as usize] >> (2-2*b)) & 0x3) << 2) as u64
|
||||
);
|
||||
out_u2q[b + 2*w] = ev_key.apply_lookup_table(&ct_tmp_u4[w], &zlut_u2q);
|
||||
}
|
||||
}
|
||||
// */
|
||||
//* [Parallel:32]
|
||||
(*out_u2q) = (0..32)
|
||||
.into_par_iter()
|
||||
.map(|n| {
|
||||
let w: usize = n >> 1;
|
||||
let b: usize = n & 0x1;
|
||||
let zlut_u2q = ev_key.generate_lookup_table(|x: u64| {
|
||||
(((zlut[w][x as usize] >> (2 - 2 * b)) & 0x3) << 2) as u64
|
||||
});
|
||||
ev_key.apply_lookup_table(&ct_tmp_u4[w], &zlut_u2q)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap(); // */
|
||||
}
|
||||
|
||||
/* Encryption -----------------------------------------------------------------------------------
|
||||
* (Whitening + Fw Rounds + Mid Round + Bw Rounds + Whitening)
|
||||
*/
|
||||
|
||||
#[rustfmt::skip] // [skip] Each of 22 monitor! calls get split on 5 lines which destroys readability
|
||||
pub fn pv2_encrypt(
|
||||
ev_key: &ServerKey,
|
||||
ct_enc: &mut [Ciphertext; 32],
|
||||
ct_m: &[Ciphertext; 32],
|
||||
ct_k0: &[Ciphertext; 32],
|
||||
ct_k1: &[Ciphertext; 32],
|
||||
) {
|
||||
// Work buffers: u4, u2q, b (depending on the inner nibbles format, u2q = u2 <<2)
|
||||
let mut ct_u4: [Ciphertext; 16] = std::array::from_fn(|_| ev_key.create_trivial(0));
|
||||
let mut ct_b: [Ciphertext; 64] = std::array::from_fn(|_| ev_key.create_trivial(0));
|
||||
// [Parallel] + Init: ct_m << 2
|
||||
let mut ct_u2q: [Ciphertext; 32] =
|
||||
std::array::from_fn(|n| ev_key.unchecked_scalar_mul(&ct_m[n], 4));
|
||||
|
||||
// Whitening
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k0));
|
||||
// Forward rounds
|
||||
//*
|
||||
monitor!(pv2_fw_round(ev_key, &mut ct_u2q, &ct_u4, &pv2_lut::PV2_0_S_0));
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k1));
|
||||
monitor!(pv2_fw_round(ev_key, &mut ct_u2q, &ct_u4, &pv2_lut::PV2_1_S_2));
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k0));
|
||||
monitor!(pv2_fw_round(ev_key, &mut ct_u2q, &ct_u4, &pv2_lut::PV2_0_S_0));
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k1));
|
||||
monitor!(pv2_fw_round(ev_key, &mut ct_u2q, &ct_u4, &pv2_lut::PV2_3_S_4));
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k0));
|
||||
monitor!(pv2_fw_round(ev_key, &mut ct_u2q, &ct_u4, &pv2_lut::PV2_0_S_0));
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k1)); // */
|
||||
// Middle round
|
||||
//*
|
||||
monitor!(pv2_mid_round(ev_key, &mut ct_u2q, &ct_u4,
|
||||
ct_k0, ct_k1, &pv2_lut::PV2_5_S_M, &pv2_lut::PV2_0_IS_0)); // */
|
||||
// Backward rounds
|
||||
//*
|
||||
monitor!(pv2_xor_to_b(ev_key, &mut ct_b, &ct_u2q, ct_k0));
|
||||
monitor!(pv2_bw_round(ev_key, &mut ct_u2q, &ct_b, &pv2_lut::PV2_6_IS_7));
|
||||
monitor!(pv2_xor_to_b(ev_key, &mut ct_b, &ct_u2q, ct_k1));
|
||||
monitor!(pv2_bw_round(ev_key, &mut ct_u2q, &ct_b, &pv2_lut::PV2_0_IS_0));
|
||||
monitor!(pv2_xor_to_b(ev_key, &mut ct_b, &ct_u2q, ct_k0));
|
||||
monitor!(pv2_bw_round(ev_key, &mut ct_u2q, &ct_b, &pv2_lut::PV2_8_IS_9));
|
||||
monitor!(pv2_xor_to_b(ev_key, &mut ct_b, &ct_u2q, ct_k1));
|
||||
monitor!(pv2_bw_round(ev_key, &mut ct_u2q, &ct_b, &pv2_lut::PV2_0_IS_0));
|
||||
monitor!(pv2_xor_to_b(ev_key, &mut ct_b, &ct_u2q, ct_k0));
|
||||
monitor!(pv2_bw_round(ev_key, &mut ct_u2q, &ct_b, &pv2_lut::PV2_A_IS_B));
|
||||
// Last Xor to u2l
|
||||
monitor!(pv2_xor_to_u2(ev_key, ct_enc, &ct_u2q, ct_k1)); // */
|
||||
}
|
||||
|
||||
/* Decryption -----------------------------------------------------------------------------------
|
||||
* Inverse of pv2_encrypt().
|
||||
*/
|
||||
|
||||
#[rustfmt::skip] // [skip] Each of 22 monitor! calls get split on 5 lines which destroys readability
|
||||
pub fn pv2_decrypt(
|
||||
ev_key: &ServerKey,
|
||||
ct_dec: &mut [Ciphertext; 32],
|
||||
ct_c: &[Ciphertext; 32],
|
||||
ct_k0: &[Ciphertext; 32],
|
||||
ct_k1: &[Ciphertext; 32],
|
||||
) {
|
||||
// Work buffers: u4, u2q, b (depending on the inner nibbles format, u2q = u2 <<2)
|
||||
let mut ct_u4: [Ciphertext; 16] = std::array::from_fn(|_| ev_key.create_trivial(0));
|
||||
let mut ct_b: [Ciphertext; 64] = std::array::from_fn(|_| ev_key.create_trivial(0));
|
||||
// [Parallel] + Init: ct_m << 2
|
||||
let mut ct_u2q: [Ciphertext; 32] =
|
||||
std::array::from_fn(|n| ev_key.unchecked_scalar_mul(&ct_c[n], 4));
|
||||
|
||||
// Whitening
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k1));
|
||||
// Forward rounds
|
||||
//*
|
||||
monitor!(pv2_fw_round(ev_key, &mut ct_u2q, &ct_u4, &pv2_lut::PV2_B_S_A));
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k0));
|
||||
monitor!(pv2_fw_round(ev_key, &mut ct_u2q, &ct_u4, &pv2_lut::PV2_0_S_0));
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k1));
|
||||
monitor!(pv2_fw_round(ev_key, &mut ct_u2q, &ct_u4, &pv2_lut::PV2_9_S_8));
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k0));
|
||||
monitor!(pv2_fw_round(ev_key, &mut ct_u2q, &ct_u4, &pv2_lut::PV2_0_S_0));
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k1));
|
||||
monitor!(pv2_fw_round(ev_key, &mut ct_u2q, &ct_u4, &pv2_lut::PV2_7_S_6));
|
||||
monitor!(pv2_xor_to_u4(ev_key, &mut ct_u4, &ct_u2q, ct_k0)); // */
|
||||
// Middle round
|
||||
//*
|
||||
monitor!(pv2_mid_round(ev_key, &mut ct_u2q, &ct_u4,
|
||||
ct_k1, ct_k0, &pv2_lut::PV2_0_S_0, &pv2_lut::PV2_M_IS_5)); // */
|
||||
// Backward rounds
|
||||
//*
|
||||
monitor!(pv2_xor_to_b(ev_key, &mut ct_b, &ct_u2q, ct_k1));
|
||||
monitor!(pv2_bw_round(ev_key, &mut ct_u2q, &ct_b, &pv2_lut::PV2_0_IS_0));
|
||||
monitor!(pv2_xor_to_b(ev_key, &mut ct_b, &ct_u2q, ct_k0));
|
||||
monitor!(pv2_bw_round(ev_key, &mut ct_u2q, &ct_b, &pv2_lut::PV2_4_IS_3));
|
||||
monitor!(pv2_xor_to_b(ev_key, &mut ct_b, &ct_u2q, ct_k1));
|
||||
monitor!(pv2_bw_round(ev_key, &mut ct_u2q, &ct_b, &pv2_lut::PV2_0_IS_0));
|
||||
monitor!(pv2_xor_to_b(ev_key, &mut ct_b, &ct_u2q, ct_k0));
|
||||
monitor!(pv2_bw_round(ev_key, &mut ct_u2q, &ct_b, &pv2_lut::PV2_2_IS_1));
|
||||
monitor!(pv2_xor_to_b(ev_key, &mut ct_b, &ct_u2q, ct_k1));
|
||||
monitor!(pv2_bw_round(ev_key, &mut ct_u2q, &ct_b, &pv2_lut::PV2_0_IS_0));
|
||||
// Last Xor to u2l
|
||||
monitor!(pv2_xor_to_u2(ev_key, ct_dec, &ct_u2q, ct_k0)); // */
|
||||
}
|
||||
@@ -1,331 +0,0 @@
|
||||
/*
|
||||
* Prince v2 constant definitions and Look-up tables for FHE
|
||||
* --------------------------------------------------------------------------------- */
|
||||
use crate::u64_conv;
|
||||
|
||||
/* Permutations -------------------------------------------------------------------- */
|
||||
static PERM: [usize; 64 / 4] = [
|
||||
// Prince permutation layer on nibbles
|
||||
0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3, 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
|
||||
];
|
||||
pub static IPERM: [usize; 64 / 4] = [
|
||||
// Prince inverse permutation on nibbles
|
||||
0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb, 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3,
|
||||
];
|
||||
|
||||
// Permutation to apply on 16-bits nibbles after M0 if computed as exor( 0123 )
|
||||
// ---> bits 0c84,51d9,a62e,fb73
|
||||
// ---> TODO: put Perm in cycle notation so as to use swaps
|
||||
// (u16 bits) 0123...def (as an array of bits from msb to lsb)
|
||||
static FHE_M0_PERM: [usize; 16] = [
|
||||
0x0, 0x5, 0xa, 0xf, 0x3, 0x4, 0x9, 0xe, 0x2, 0x7, 0x8, 0xd, 0x1, 0x6, 0xb, 0xc,
|
||||
];
|
||||
|
||||
// Permutation to apply on 16-bits nibbles after M1 if computed as exor( 0123 )
|
||||
// --> bits c840,1d95,62ea,b73f
|
||||
static FHE_M1_PERM: [usize; 16] = [
|
||||
0x3, 0x4, 0x9, 0xe, 0x2, 0x7, 0x8, 0xd, 0x1, 0x6, 0xb, 0xc, 0x0, 0x5, 0xa, 0xf,
|
||||
];
|
||||
|
||||
// Combined overall bits permutation: (p0 | p1 | p1 | p0) with indexes 0..63
|
||||
// FHE_M_PERM = sum(( [_c + _n*16 for _c in _perm] for _n,_perm
|
||||
// in enumerate([FHE_M0_PERM,FHE_M1_PERM,FHE_M1_PERM,FHE_M0_PERM]) ), []);
|
||||
pub static FHE_M_PERM: [usize; 64] = {
|
||||
let mut n: usize = 0;
|
||||
let mut m_perm: [usize; 64] = [0; 64];
|
||||
|
||||
while n < 4 {
|
||||
let mut p_idx: usize = 0;
|
||||
while p_idx < 16 {
|
||||
m_perm[p_idx + n * 16] = n * 16
|
||||
+ match n {
|
||||
0 | 3 => FHE_M0_PERM[p_idx],
|
||||
1 | 2 => FHE_M1_PERM[p_idx],
|
||||
_ => unreachable!(),
|
||||
};
|
||||
p_idx += 1;
|
||||
}
|
||||
n += 1;
|
||||
}
|
||||
m_perm
|
||||
};
|
||||
|
||||
// Combined with Permutation layer (fw)
|
||||
// = [ fhe_M_Perm[ 4*Perm[_i >> 2] + (_i & 0x3) ] for _i in range(64) ]
|
||||
pub static FHE_MP_PERM_FW: [usize; 64] = {
|
||||
let mut b: usize = 0;
|
||||
let mut m_perm: [usize; 64] = [0; 64];
|
||||
|
||||
while b < 64 {
|
||||
// Unnatural, but just to see the same structure as above
|
||||
m_perm[b] = FHE_M_PERM[(PERM[b >> 2] << 2) + (b & 0x3)];
|
||||
b += 1;
|
||||
}
|
||||
m_perm
|
||||
};
|
||||
|
||||
/* Round constants ----------------------------------------------------------------- */
|
||||
const PRINCE_NRND: usize = 12; // Number of rounds (more precisely, nb of round constants / non-linear layers)
|
||||
|
||||
static _RC_ALPHA: u64 = 0xc0ac29b7c97c50dd; // see paper about symmetry of RC
|
||||
static _RC_BETA: u64 = 0x3f84d5b5b5470917; // see paper about symmetry of RC_V2
|
||||
#[rustfmt::skip]
|
||||
static RC_V2: [u64; PRINCE_NRND] = [
|
||||
0x0000000000000000, 0x13198a2e03707344, 0xa4093822299f31d0, 0x082efa98ec4e6c89,
|
||||
0x452821e638d01377, 0xbe5466cf34e90c6c, 0x7ef84f78fd955cb1, 0x7aacf4538d971a60,
|
||||
0xc882d32f25323c54, 0x9b8ded979cd838c7, 0xd3b5a399ca0c2399, 0x3f84d5b5b5470917,
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
static RC_V2_IP_IM: [u64; PRINCE_NRND] = [ // iM . iP (RC) [from sage script]
|
||||
0x0000000000000000, 0x90ecdeb7cb7fc1ce, 0x81b2cb20a82a2928, 0x480cdfa91d749037,
|
||||
0xcb1a13467044d772, 0x9e8995b07a988c08, 0xe70338c395311a6a, 0x60dc22bf6e681c08,
|
||||
0x318672daf2dd0655, 0x2a74fad9b606e252, 0xe96673c424d657ac, 0xabc631f91e2ccb7a,
|
||||
];
|
||||
static RC_BETA_IM: u64 = 0x42f93b79daa0eea5; // iM (RC_BETA) [from sage script]
|
||||
|
||||
// Decomposed versions
|
||||
static ZRC_V2: [[u8; 64 / 4]; PRINCE_NRND] = array_u64_to_vec_u4(RC_V2);
|
||||
static ZRC_V2_IP_IM: [[u8; 64 / 4]; PRINCE_NRND] = array_u64_to_vec_u4(RC_V2_IP_IM);
|
||||
static ZRC_BETA_IM: [u8; 64 / 4] = u64_conv::u64_to_vec_u4(RC_BETA_IM);
|
||||
|
||||
// Emulating map on const for u64_to_vec_u4
|
||||
pub const fn array_u64_to_vec_u4<const N: usize>(tab: [u64; N]) -> [[u8; 64 / 4]; N] {
|
||||
let mut i: usize = 0;
|
||||
let mut mat: [[u8; 64 / 4]; N] = [[0; 64 / 4]; N];
|
||||
|
||||
while i < N {
|
||||
// for loop not allowed in const fn
|
||||
mat[i] = u64_conv::u64_to_vec_u4(tab[i]);
|
||||
i += 1;
|
||||
}
|
||||
mat
|
||||
}
|
||||
|
||||
/* (inv)SBox and derivatives ------------------------------------------------------- */
|
||||
static PV2_S: [u8; 1 << 4] = [
|
||||
// Forward SBox
|
||||
0xb, 0xf, 0x3, 0x2, 0xa, 0xc, 0x9, 0x1, 0x6, 0x7, 0x8, 0x0, 0xe, 0x5, 0xd, 0x4,
|
||||
];
|
||||
static PV2_IS: [u8; 1 << 4] = [
|
||||
// Backward SBox
|
||||
0xb, 0x7, 0x3, 0x2, 0xf, 0xd, 0x8, 0x9, 0xa, 0x6, 0x4, 0x0, 0x5, 0xe, 0xc, 0x1,
|
||||
];
|
||||
|
||||
// Combined RC+Sboxes for Encryption
|
||||
pub static PV2_0_S_0: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_S, [0_u8; 16], [0_u8; 16]); // Not ideal
|
||||
pub static PV2_1_S_2: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_S, ZRC_V2[1], ZRC_V2_IP_IM[2]);
|
||||
pub static PV2_3_S_4: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_S, ZRC_V2[3], ZRC_V2_IP_IM[4]);
|
||||
pub static PV2_5_S_M: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_S, ZRC_V2[5], ZRC_BETA_IM);
|
||||
pub static PV2_0_IS_0: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_IS, [0_u8; 16], [0_u8; 16]); // Not ideal
|
||||
pub static PV2_6_IS_7: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_IS, ZRC_V2_IP_IM[6], ZRC_V2[7]);
|
||||
pub static PV2_8_IS_9: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_IS, ZRC_V2_IP_IM[8], ZRC_V2[9]);
|
||||
pub static PV2_A_IS_B: [[u8; 1 << 4]; 64 / 4] =
|
||||
build_zlut_xsy(PV2_IS, ZRC_V2_IP_IM[10], ZRC_V2[11]);
|
||||
|
||||
// Additional RC+Sboxes for Decryption
|
||||
pub static PV2_B_S_A: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_S, ZRC_V2[11], ZRC_V2_IP_IM[10]);
|
||||
pub static PV2_9_S_8: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_S, ZRC_V2[9], ZRC_V2_IP_IM[8]);
|
||||
pub static PV2_7_S_6: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_S, ZRC_V2[7], ZRC_V2_IP_IM[6]);
|
||||
pub static PV2_M_IS_5: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_IS, ZRC_BETA_IM, ZRC_V2[5]);
|
||||
pub static PV2_4_IS_3: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_IS, ZRC_V2_IP_IM[4], ZRC_V2[3]);
|
||||
pub static PV2_2_IS_1: [[u8; 1 << 4]; 64 / 4] = build_zlut_xsy(PV2_IS, ZRC_V2_IP_IM[2], ZRC_V2[1]);
|
||||
|
||||
// Build special LUTs: SBox( x ^ inner ) ^ outer, depending on word index
|
||||
const fn build_zlut_xsy(
|
||||
sbox: [u8; 1 << 4],
|
||||
xor_inner: [u8; 64 / 4],
|
||||
xor_outer: [u8; 64 / 4],
|
||||
) -> [[u8; 1 << 4]; 64 / 4] {
|
||||
let mut zlut_xsy: [[u8; 1 << 4]; 64 / 4] = [[0; 1 << 4]; 64 / 4];
|
||||
let mut w: usize = 0;
|
||||
while w < 64 / 4 {
|
||||
// for loop not allowed in const fn
|
||||
let mut x: usize = 0;
|
||||
while x < (1 << 4) {
|
||||
zlut_xsy[w][x] = sbox[((x as u8) ^ xor_inner[w]) as usize] ^ xor_outer[w];
|
||||
x += 1;
|
||||
}
|
||||
w += 1;
|
||||
}
|
||||
zlut_xsy
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_xsy() {
|
||||
#[rustfmt::skip]
|
||||
let zlut_1s2: [[u8;16]; 16] = [
|
||||
[0x7, 0x3, 0xa, 0xb, 0x4, 0x2, 0x9, 0x1, 0xf, 0xe, 0x8, 0x0, 0xd, 0x6, 0xc, 0x5],
|
||||
[0x3, 0x2, 0xe, 0xa, 0x0, 0x8, 0xd, 0xb, 0x1, 0x9, 0x6, 0x7, 0x5, 0xc, 0x4, 0xf],
|
||||
[0x4, 0x0, 0x9, 0x8, 0x7, 0x1, 0xa, 0x2, 0xc, 0xd, 0xb, 0x3, 0xe, 0x5, 0xf, 0x6],
|
||||
[0x5, 0x4, 0x2, 0xa, 0x7, 0xc, 0x6, 0xf, 0xd, 0x9, 0x0, 0x1, 0xe, 0x8, 0x3, 0xb],
|
||||
[0xa, 0xb, 0x4, 0xc, 0x2, 0x9, 0x1, 0x8, 0x7, 0x3, 0xf, 0xe, 0x6, 0x0, 0x5, 0xd],
|
||||
[0x3, 0xb, 0xd, 0xc, 0x6, 0xf, 0x5, 0xe, 0x8, 0x9, 0x0, 0x4, 0x2, 0xa, 0x1, 0x7],
|
||||
[0x1, 0x0, 0x9, 0xd, 0xb, 0x3, 0x8, 0xe, 0xa, 0x2, 0x4, 0x5, 0xf, 0x6, 0xc, 0x7],
|
||||
[0xd, 0x4, 0xe, 0x5, 0x8, 0x0, 0x6, 0x7, 0x9, 0x1, 0xa, 0xc, 0x3, 0x2, 0xb, 0xf],
|
||||
[0x1, 0x5, 0x9, 0x8, 0x0, 0x6, 0x3, 0xb, 0xc, 0xd, 0x2, 0xa, 0x4, 0xf, 0x7, 0xe],
|
||||
[0xa, 0xb, 0x7, 0x3, 0x9, 0x1, 0x4, 0x2, 0x8, 0x0, 0xf, 0xe, 0xc, 0x5, 0xd, 0x6],
|
||||
[0x3, 0xb, 0xe, 0x8, 0x0, 0x1, 0xd, 0x9, 0x6, 0xf, 0x7, 0xc, 0x2, 0xa, 0x5, 0x4],
|
||||
[0x1, 0x5, 0x9, 0x8, 0x0, 0x6, 0x3, 0xb, 0xc, 0xd, 0x2, 0xa, 0x4, 0xf, 0x7, 0xe],
|
||||
[0x3, 0xb, 0xe, 0x8, 0x0, 0x1, 0xd, 0x9, 0x6, 0xf, 0x7, 0xc, 0x2, 0xa, 0x5, 0x4],
|
||||
[0xb, 0xa, 0x6, 0x2, 0x8, 0x0, 0x5, 0x3, 0x9, 0x1, 0xe, 0xf, 0xd, 0x4, 0xc, 0x7],
|
||||
[0x8, 0xe, 0xb, 0x3, 0x9, 0xd, 0x1, 0x0, 0xc, 0x7, 0xf, 0x6, 0x4, 0x5, 0xa, 0x2],
|
||||
[0x2, 0x4, 0x1, 0x9, 0x3, 0x7, 0xb, 0xa, 0x6, 0xd, 0x5, 0xc, 0xe, 0xf, 0x0, 0x8],
|
||||
];
|
||||
assert_eq!(zlut_1s2, PV2_1_S_2);
|
||||
|
||||
#[rustfmt::skip]
|
||||
let zlut_5sm: [[u8;16]; 16] = [
|
||||
[0x4, 0xc, 0x3, 0x2, 0x0, 0x9, 0x1, 0xa, 0x6, 0x7, 0xb, 0xf, 0x5, 0xd, 0x8, 0xe],
|
||||
[0xf, 0x6, 0xc, 0x7, 0xa, 0x2, 0x4, 0x5, 0xb, 0x3, 0x8, 0xe, 0x1, 0x0, 0x9, 0xd],
|
||||
[0x3, 0x5, 0xe, 0x6, 0x0, 0x4, 0xd, 0xc, 0xa, 0x1, 0xb, 0x2, 0x8, 0x9, 0xf, 0x7],
|
||||
[0x3, 0x5, 0x0, 0x8, 0x2, 0x6, 0xa, 0xb, 0x7, 0xc, 0x4, 0xd, 0xf, 0xe, 0x1, 0x9],
|
||||
[0xa, 0x2, 0x9, 0xf, 0x0, 0x1, 0x8, 0xc, 0xe, 0x7, 0xd, 0x6, 0xb, 0x3, 0x5, 0x4],
|
||||
[0x2, 0xa, 0x1, 0x7, 0x8, 0x9, 0x0, 0x4, 0x6, 0xf, 0x5, 0xe, 0x3, 0xb, 0xd, 0xc],
|
||||
[0x9, 0x2, 0xa, 0x3, 0x1, 0x0, 0xf, 0x7, 0xd, 0xb, 0xe, 0x6, 0xc, 0x8, 0x4, 0x5],
|
||||
[0xd, 0x4, 0xc, 0x7, 0x9, 0x1, 0xe, 0xf, 0x8, 0x0, 0x5, 0x3, 0xb, 0xa, 0x6, 0x2],
|
||||
[0xf, 0xe, 0x2, 0x6, 0xc, 0x4, 0x1, 0x7, 0xd, 0x5, 0xa, 0xb, 0x9, 0x0, 0x8, 0x3],
|
||||
[0x0, 0x6, 0x3, 0xb, 0x1, 0x5, 0x9, 0x8, 0x4, 0xf, 0x7, 0xe, 0xc, 0xd, 0x2, 0xa],
|
||||
[0x7, 0xe, 0x4, 0xf, 0x2, 0xa, 0xc, 0xd, 0x3, 0xb, 0x0, 0x6, 0x9, 0x8, 0x1, 0x5],
|
||||
[0x7, 0x6, 0x0, 0x8, 0x5, 0xe, 0x4, 0xd, 0xf, 0xb, 0x2, 0x3, 0xc, 0xa, 0x1, 0x9],
|
||||
[0x5, 0x1, 0xd, 0xc, 0x4, 0x2, 0x7, 0xf, 0x8, 0x9, 0x6, 0xe, 0x0, 0xb, 0x3, 0xa],
|
||||
[0x0, 0xb, 0x3, 0xa, 0x8, 0x9, 0x6, 0xe, 0x4, 0x2, 0x7, 0xf, 0x5, 0x1, 0xd, 0xc],
|
||||
[0x3, 0xb, 0x0, 0x6, 0x9, 0x8, 0x1, 0x5, 0x7, 0xe, 0x4, 0xf, 0x2, 0xa, 0xc, 0xd],
|
||||
[0xb, 0x0, 0x8, 0x1, 0x3, 0x2, 0xd, 0x5, 0xf, 0x9, 0xc, 0x4, 0xe, 0xa, 0x6, 0x7],
|
||||
];
|
||||
assert_eq!(zlut_5sm, PV2_5_S_M);
|
||||
|
||||
#[rustfmt::skip]
|
||||
let zlut_6is7: [[u8;16]; 16] = [
|
||||
[0xb, 0x6, 0x2, 0x9, 0x3, 0x7, 0xd, 0x1, 0xf, 0xe, 0x8, 0xa, 0x4, 0x5, 0xc, 0x0],
|
||||
[0x3, 0x2, 0x7, 0x5, 0x8, 0x9, 0xd, 0x1, 0xb, 0x6, 0x4, 0xf, 0xa, 0xe, 0xc, 0x0],
|
||||
[0x1, 0xd, 0x9, 0x8, 0x5, 0x7, 0x2, 0x3, 0x0, 0xc, 0xe, 0xa, 0xf, 0x4, 0x6, 0xb],
|
||||
[0xe, 0xf, 0xb, 0x7, 0x5, 0x4, 0x1, 0x3, 0xc, 0x8, 0xa, 0x6, 0xd, 0x0, 0x2, 0x9],
|
||||
[0xd, 0xc, 0x8, 0x4, 0x6, 0x7, 0x2, 0x0, 0xf, 0xb, 0x9, 0x5, 0xe, 0x3, 0x1, 0xa],
|
||||
[0xe, 0x2, 0x0, 0x4, 0x1, 0xa, 0x8, 0x5, 0xf, 0x3, 0x7, 0x6, 0xb, 0x9, 0xc, 0xd],
|
||||
[0x0, 0xb, 0x9, 0x4, 0xf, 0x3, 0x1, 0x5, 0xa, 0x8, 0xd, 0xc, 0xe, 0x2, 0x6, 0x7],
|
||||
[0x1, 0x0, 0x4, 0x8, 0xa, 0xb, 0xe, 0xc, 0x3, 0x7, 0x5, 0x9, 0x2, 0xf, 0xd, 0x6],
|
||||
[0xe, 0x2, 0x8, 0xc, 0x6, 0xd, 0x9, 0x4, 0xf, 0x3, 0xa, 0xb, 0x5, 0x7, 0x1, 0x0],
|
||||
[0x0, 0x2, 0x4, 0x5, 0xa, 0x6, 0xf, 0xe, 0x3, 0x8, 0xc, 0x1, 0xb, 0x7, 0xd, 0x9],
|
||||
[0xb, 0xa, 0xe, 0x2, 0x0, 0x1, 0x4, 0x6, 0x9, 0xd, 0xf, 0x3, 0x8, 0x5, 0x7, 0xc],
|
||||
[0x0, 0xc, 0x5, 0x4, 0xa, 0x8, 0xe, 0xf, 0x1, 0xd, 0x7, 0x3, 0x9, 0x2, 0x6, 0xb],
|
||||
[0x6, 0xa, 0x3, 0x2, 0xc, 0xe, 0x8, 0x9, 0x7, 0xb, 0x1, 0x5, 0xf, 0x4, 0x0, 0xd],
|
||||
[0xe, 0xa, 0x0, 0xc, 0x6, 0xb, 0xf, 0x4, 0x9, 0x8, 0x1, 0xd, 0x2, 0x3, 0x5, 0x7],
|
||||
[0xe, 0xf, 0x9, 0xb, 0x5, 0x4, 0xd, 0x1, 0xa, 0x7, 0x3, 0x8, 0x2, 0x6, 0xc, 0x0],
|
||||
[0x4, 0x0, 0xa, 0x6, 0xc, 0x1, 0x5, 0xe, 0x3, 0x2, 0xb, 0x7, 0x8, 0x9, 0xf, 0xd],
|
||||
];
|
||||
assert_eq!(zlut_6is7, PV2_6_IS_7);
|
||||
}
|
||||
|
||||
/* LUTs for M-layer (exors) -------------------------------------------------------- */
|
||||
// Bits (msb) 0123 (lsb) in u4
|
||||
static PV2_EXOR_TO_0: [[u8; 1 << 4]; 4] = build_zlut_exor(0);
|
||||
static PV2_EXOR_TO_1: [[u8; 1 << 4]; 4] = build_zlut_exor(1);
|
||||
static PV2_EXOR_TO_2: [[u8; 1 << 4]; 4] = build_zlut_exor(2);
|
||||
static PV2_EXOR_TO_3: [[u8; 1 << 4]; 4] = build_zlut_exor(3);
|
||||
#[rustfmt::skip]
|
||||
pub static PV2_EXOR_FW: [&[[u8; 1 << 4]; 4]; 2] = [
|
||||
&PV2_EXOR_TO_3, &PV2_EXOR_TO_2,
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
pub static PV2_EXOR_BW: [&[[u8; 1 << 4]; 4]; 4] = [
|
||||
&PV2_EXOR_TO_3, &PV2_EXOR_TO_2, &PV2_EXOR_TO_1, &PV2_EXOR_TO_0,
|
||||
];
|
||||
|
||||
// e-xor(b) = xor of all bits except b. [Ex: e-xor(1010,0) = 0, e-xor(1010,1) = 1.]
|
||||
// Bits (msb) 0123 (lsb) in u4 [more convenient for describing the M-layer?]
|
||||
const fn u4_exor(x: u8, b: u8) -> u8 {
|
||||
assert!(b < 4 && x < 16);
|
||||
let ex_mask: u8 = 0xf - (1 << (3 - b));
|
||||
let mut c: u8 = x & ex_mask;
|
||||
c = (c & 0x3) ^ (c >> 2);
|
||||
c = (c & 0x1) ^ (c >> 1);
|
||||
c
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_u4_exor() {
|
||||
let x: u8 = 0b1010;
|
||||
assert_eq!(u4_exor(x, 0), 1);
|
||||
assert_eq!(u4_exor(x, 1), 0);
|
||||
assert_eq!(u4_exor(x, 2), 1);
|
||||
assert_eq!(u4_exor(x, 3), 0);
|
||||
|
||||
let x: u8 = 0b1110;
|
||||
assert_eq!(u4_exor(x, 0), 0);
|
||||
assert_eq!(u4_exor(x, 1), 0);
|
||||
assert_eq!(u4_exor(x, 2), 0);
|
||||
assert_eq!(u4_exor(x, 3), 1);
|
||||
}
|
||||
|
||||
// e-xor(b) = xor of all bits except b. [Ex: e-xor(1010,0) = 0, e-xor(1010,1) = 1.]
|
||||
// Bits (msb) 0123 (lsb) in u4 [more convenient for describing the M-layer?]
|
||||
const fn build_zlut_exor(to_b: u8) -> [[u8; 1 << 4]; 4] {
|
||||
let mut zlut_exor_to_b: [[u8; 1 << 4]; 4] = [[0; 1 << 4]; 4];
|
||||
let mut b: usize = 0;
|
||||
|
||||
while b < 4 {
|
||||
// for loop not allowed in const fn
|
||||
let mut x: usize = 0;
|
||||
while x < (1 << 4) {
|
||||
zlut_exor_to_b[b][x] = u4_exor(x as u8, b as u8) << to_b;
|
||||
x += 1;
|
||||
}
|
||||
b += 1;
|
||||
}
|
||||
zlut_exor_to_b
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_exor() {
|
||||
let zlut_exor_to_1: [[u8; 16]; 4] = [
|
||||
[0, 2, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 2, 0, 0, 2],
|
||||
[0, 2, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 2, 0, 0, 2],
|
||||
[0, 2, 0, 2, 2, 0, 2, 0, 2, 0, 2, 0, 0, 2, 0, 2],
|
||||
[0, 0, 2, 2, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0, 2, 2],
|
||||
];
|
||||
assert_eq!(zlut_exor_to_1, PV2_EXOR_TO_1);
|
||||
}
|
||||
|
||||
/* LUTs for xoring to high / low bits ---------------------------------------------- */
|
||||
// [Nb] Probably overkill in this case
|
||||
pub static PV2_XOR_TO_LOW: [u8; 1 << 4] = [
|
||||
// (01) ^ (23): (x & 3) ^^ (x >> 2) for x in range(16)
|
||||
0x0, 0x1, 0x2, 0x3, 0x1, 0x0, 0x3, 0x2, 0x2, 0x3, 0x0, 0x1, 0x3, 0x2, 0x1, 0x0,
|
||||
];
|
||||
static PV2_XOR_TO_HIGH: [u8; 1 << 4] = [
|
||||
// <above> << 2
|
||||
0x0, 0x4, 0x8, 0xc, 0x4, 0x0, 0xc, 0x8, 0x8, 0xc, 0x0, 0x4, 0xc, 0x8, 0x4, 0x0,
|
||||
];
|
||||
pub static PV2_XOR_FW: [&[u8; 1 << 4]; 2] = [&PV2_XOR_TO_HIGH, &PV2_XOR_TO_LOW];
|
||||
|
||||
// xoring + extract bits (low/high of 2 output bits)
|
||||
pub static PV2_XOR_BL: [[u8; 1 << 4]; 4] = build_zlut_xor_bhl(0);
|
||||
pub static PV2_XOR_BH: [[u8; 1 << 4]; 4] = build_zlut_xor_bhl(1);
|
||||
|
||||
const fn build_zlut_xor_bhl(hl: u8) -> [[u8; 1 << 4]; 4] {
|
||||
// [(((01) ^ (23) >> hl) << (3 - b)
|
||||
let mut zlut_xor_hl_to_b: [[u8; 1 << 4]; 4] = [[0; 1 << 4]; 4];
|
||||
let mut b: usize = 0;
|
||||
|
||||
while b < 4 {
|
||||
// for loop not allowed in const fn
|
||||
let mut x: usize = 0;
|
||||
while x < (1 << 4) {
|
||||
zlut_xor_hl_to_b[b][x] = ((PV2_XOR_TO_LOW[x] >> hl) & 0x1) << (3 - b);
|
||||
x += 1;
|
||||
}
|
||||
b += 1;
|
||||
}
|
||||
zlut_xor_hl_to_b
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_xor_bhl() {
|
||||
let zlut_xor_bh: [[u8; 16]; 4] = [
|
||||
[0, 0, 8, 8, 0, 0, 8, 8, 8, 8, 0, 0, 8, 8, 0, 0],
|
||||
[0, 0, 4, 4, 0, 0, 4, 4, 4, 4, 0, 0, 4, 4, 0, 0],
|
||||
[0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 0, 0, 2, 2, 0, 0],
|
||||
[0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0],
|
||||
];
|
||||
assert_eq!(zlut_xor_bh, PV2_XOR_BH);
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
/*
|
||||
* Some bit manipulation converting u64 to vectors of 2/4-bit nibbles
|
||||
* ----------------------------------------------------------------------------------------------- */
|
||||
|
||||
// u64 -> [u4; 16], res[0] = 4 MSB bits of u64
|
||||
pub const fn u64_to_vec_u4(u: u64) -> [u8; 64 / 4] {
|
||||
let mut i: usize = 0;
|
||||
let mut v: [u8; 64 / 4] = [0; 64 / 4];
|
||||
|
||||
// "for" loop is unusable inside const
|
||||
while i < 64 / 4 {
|
||||
v[64 / 4 - i - 1] = ((u >> (4 * i)) & 0xf) as u8;
|
||||
i += 1;
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // kept for symmetry with u64_to_vec_u4(); might be useful to convert back decomposed constants
|
||||
pub const fn vec_u4_to_u64(v: [u8; 64 / 4]) -> u64 {
|
||||
let mut i: usize = 0;
|
||||
let mut u: u64 = 0;
|
||||
|
||||
// "for" loop is unusable inside const
|
||||
while i < 64 / 4 {
|
||||
u += (v[i] as u64) << (60 - 4 * i);
|
||||
i += 1;
|
||||
}
|
||||
u
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_u64_conv_vec_u4() {
|
||||
let u: u64 = 0x3f84d5b5b5470917;
|
||||
let u_dec: [u8; 16] = [
|
||||
0x3, 0xf, 0x8, 0x4, 0xd, 0x5, 0xb, 0x5, 0xb, 0x5, 0x4, 0x7, 0x0, 0x9, 0x1, 0x7,
|
||||
];
|
||||
assert_eq!(u_dec, u64_to_vec_u4(u));
|
||||
assert_eq!(u, vec_u4_to_u64(u_dec));
|
||||
|
||||
let u: u64 = 0x0ac6f9cd6e6f275d;
|
||||
let u_dec: [u8; 16] = [
|
||||
0x0, 0xa, 0xc, 0x6, 0xf, 0x9, 0xc, 0xd, 0x6, 0xe, 0x6, 0xf, 0x2, 0x7, 0x5, 0xd,
|
||||
];
|
||||
assert_eq!(u_dec, u64_to_vec_u4(u));
|
||||
assert_eq!(u, vec_u4_to_u64(u_dec));
|
||||
}
|
||||
|
||||
// u64 -> [u2; 32], res[0] = 2 MSB bits of u64
|
||||
pub const fn u64_to_vec_u2(u: u64) -> [u8; 64 / 2] {
|
||||
let mut i: usize = 0;
|
||||
let mut v: [u8; 64 / 2] = [0; 64 / 2];
|
||||
|
||||
while i < 64 / 2 {
|
||||
// for loop unusable inside const
|
||||
v[64 / 2 - i - 1] = ((u >> (2 * i)) & 0x3) as u8;
|
||||
i += 1;
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
pub const fn vec_u2_to_u64(v: [u8; 64 / 2]) -> u64 {
|
||||
let mut i: usize = 0;
|
||||
let mut u: u64 = 0;
|
||||
|
||||
while i < 64 / 2 {
|
||||
// for loop unusable inside const
|
||||
u += (v[i] as u64) << (62 - 2 * i);
|
||||
i += 1;
|
||||
}
|
||||
u
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_u64_conv_vec_u2() {
|
||||
let u: u64 = 0x603cd95fa72a8704;
|
||||
#[rustfmt::skip]
|
||||
let u_dec: [u8; 32] = [
|
||||
0x1, 0x2, 0x0, 0x0, 0x0, 0x3, 0x3, 0x0, 0x3, 0x1, 0x2, 0x1, 0x1, 0x1, 0x3, 0x3,
|
||||
0x2, 0x2, 0x1, 0x3, 0x0, 0x2, 0x2, 0x2, 0x2, 0x0, 0x1, 0x3, 0x0, 0x0, 0x1, 0x0];
|
||||
assert_eq!(u_dec, u64_to_vec_u2(u));
|
||||
assert_eq!(u, vec_u2_to_u64(u_dec));
|
||||
|
||||
let u: u64 = 0xee873b2ec447944d;
|
||||
#[rustfmt::skip]
|
||||
let u_dec: [u8; 32] = [
|
||||
0x3, 0x2, 0x3, 0x2, 0x2, 0x0, 0x1, 0x3, 0x0, 0x3, 0x2, 0x3, 0x0, 0x2, 0x3, 0x2,
|
||||
0x3, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x3, 0x2, 0x1, 0x1, 0x0, 0x1, 0x0, 0x3, 0x1];
|
||||
assert_eq!(u_dec, u64_to_vec_u2(u));
|
||||
assert_eq!(u, vec_u2_to_u64(u_dec));
|
||||
}
|
||||
@@ -1,132 +0,0 @@
|
||||
//! Known-answer tests against the PRINCEv2 paper test vectors.
|
||||
//!
|
||||
//! These tests run a full homomorphic PRINCEv2 encryption/decryption and assert that the decrypted
|
||||
//! ciphertext matches the values from PRINCEv2 specifications [BEK+20, Appendix B].
|
||||
//!
|
||||
//! [BEK+20] Dusan Božilov, Maria Eichlseder, Miroslav Kneževic, Baptiste Lambin, Gregor Leander,
|
||||
//! Thorben Moos, Ventzislav Nikov, Shahram Rasoolzadeh, Yosuke Todo, and Friedrich Wiemer.
|
||||
//! PRINCEv2: More security for (almost) no overhead. In Selected Areas in Cryptography (SAC 2020),
|
||||
//! volume 12804 of LNCS, pp.483--511, Springer, 2020. DOI:10.1007/978-3-030-81652-0_19.
|
||||
|
||||
use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
|
||||
use tfhe::shortint::prelude::*;
|
||||
|
||||
use tfhe_princev2::{pv2_decrypt, pv2_encrypt, u64_to_vec_u2, vec_u2_to_u64};
|
||||
|
||||
// KAT structure for Pv2 cipher
|
||||
struct Pv2Kat {
|
||||
name: &'static str,
|
||||
ptxt: u64,
|
||||
k0: u64,
|
||||
k1: u64,
|
||||
ctxt: u64,
|
||||
}
|
||||
|
||||
/// Test vectors from [BEK+20, Appendix B]
|
||||
static PV2_KATS_TABLE: [Pv2Kat; 5] = [
|
||||
Pv2Kat {
|
||||
name: "PRINCEv2 KAT #1",
|
||||
ptxt: 0x0000000000000000,
|
||||
k0: 0x0000000000000000,
|
||||
k1: 0x0000000000000000,
|
||||
ctxt: 0x0125fc7359441690,
|
||||
},
|
||||
Pv2Kat {
|
||||
name: "PRINCEv2 KAT #2",
|
||||
ptxt: 0xffffffffffffffff,
|
||||
k0: 0x0000000000000000,
|
||||
k1: 0x0000000000000000,
|
||||
ctxt: 0x832bd46f108e7857,
|
||||
},
|
||||
Pv2Kat {
|
||||
name: "PRINCEv2 KAT #3",
|
||||
ptxt: 0x0000000000000000,
|
||||
k0: 0xffffffffffffffff,
|
||||
k1: 0x0000000000000000,
|
||||
ctxt: 0xee873b2ec447944d,
|
||||
},
|
||||
Pv2Kat {
|
||||
name: "PRINCEv2 KAT #4",
|
||||
ptxt: 0x0000000000000000,
|
||||
k0: 0x0000000000000000,
|
||||
k1: 0xffffffffffffffff,
|
||||
ctxt: 0x0ac6f9cd6e6f275d,
|
||||
},
|
||||
Pv2Kat {
|
||||
name: "PRINCEv2 KAT #5",
|
||||
ptxt: 0x0123456789abcdef,
|
||||
k0: 0x0123456789abcdef,
|
||||
k1: 0xfedcba9876543210,
|
||||
ctxt: 0x603cd95fa72a8704,
|
||||
},
|
||||
];
|
||||
|
||||
/// Encrypt a u64 as 32 ciphertexts, each holding a 2-bit nibble in the low bits of the FHE message
|
||||
/// space. Most significant bits of the input are at index 0 in the output
|
||||
fn encrypt_u64_as_vec_u2l(s_key: &ClientKey, x: u64) -> [Ciphertext; 32] {
|
||||
let x_u2: [u8; 32] = u64_to_vec_u2(x);
|
||||
let ct: Vec<Ciphertext> = x_u2
|
||||
.into_iter()
|
||||
.map(|u2| s_key.encrypt(u2 as u64))
|
||||
.collect();
|
||||
ct.try_into().unwrap()
|
||||
}
|
||||
|
||||
/// Reverse of function encrypt_u64_as_vec_u2l()
|
||||
fn decrypt_vec_u2l_as_u64(s_key: &ClientKey, v: &[Ciphertext; 32]) -> u64 {
|
||||
let x_u2: [u8; 32] = std::array::from_fn(|n| s_key.decrypt_message_and_carry(&v[n]) as u8);
|
||||
let x: u64 = vec_u2_to_u64(x_u2);
|
||||
x
|
||||
}
|
||||
|
||||
/// Run KATs homomorphically for PRINCEv2 Encryption.
|
||||
/// [Note] Takes approximately 21s / KAT on 8 cores.
|
||||
#[test]
|
||||
fn pv2_enc_kat() {
|
||||
let (s_key, ev_key): (ClientKey, ServerKey) = // Params: Need 4-bits msg + nu >= 4
|
||||
tfhe::shortint::gen_keys(PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
for tkat in &PV2_KATS_TABLE {
|
||||
// Encryptions of inputs (k0,k1,m)
|
||||
let ct_k0: [Ciphertext; 32] = encrypt_u64_as_vec_u2l(&s_key, tkat.k0);
|
||||
let ct_k1: [Ciphertext; 32] = encrypt_u64_as_vec_u2l(&s_key, tkat.k1);
|
||||
let ct_m: [Ciphertext; 32] = encrypt_u64_as_vec_u2l(&s_key, tkat.ptxt);
|
||||
|
||||
// PRINCEv2 Enc in FHE
|
||||
let mut ct_out: [Ciphertext; 32] = std::array::from_fn(|_| ev_key.create_trivial(0)); // [NB] shortint::create_trivial() vs boolean::trivial_encrypt()
|
||||
pv2_encrypt(&ev_key, &mut ct_out, &ct_m, &ct_k0, &ct_k1);
|
||||
|
||||
// Testing the (de-)encrypted result
|
||||
let pt_out: u64 = decrypt_vec_u2l_as_u64(&s_key, &ct_out);
|
||||
assert_eq!(
|
||||
pt_out, tkat.ctxt,
|
||||
"{} failed: ptxt={:#018x}, k0={:#018x}, k1={:#018x}, expected={:#018x}, got={:#018x}",
|
||||
tkat.name, tkat.ptxt, tkat.k0, tkat.k1, tkat.ctxt, pt_out
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pv2_dec_kat() {
|
||||
let (s_key, ev_key): (ClientKey, ServerKey) = // Params: Need 4-bits msg + nu >= 4
|
||||
tfhe::shortint::gen_keys(PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
for tkat in &PV2_KATS_TABLE {
|
||||
// Encryptions of inputs (k0,k1,m)
|
||||
let ct_k0: [Ciphertext; 32] = encrypt_u64_as_vec_u2l(&s_key, tkat.k0);
|
||||
let ct_k1: [Ciphertext; 32] = encrypt_u64_as_vec_u2l(&s_key, tkat.k1);
|
||||
let ct_c: [Ciphertext; 32] = encrypt_u64_as_vec_u2l(&s_key, tkat.ctxt);
|
||||
|
||||
// PRINCEv2 Dec in FHE
|
||||
let mut ct_out: [Ciphertext; 32] = std::array::from_fn(|_| ev_key.create_trivial(0)); // [NB] shortint::create_trivial() vs boolean::trivial_encrypt()
|
||||
pv2_decrypt(&ev_key, &mut ct_out, &ct_c, &ct_k0, &ct_k1);
|
||||
|
||||
// Testing the (de-)encrypted result
|
||||
let pt_out: u64 = decrypt_vec_u2l_as_u64(&s_key, &ct_out);
|
||||
assert_eq!(
|
||||
pt_out, tkat.ptxt,
|
||||
"{} failed: ctxt={:#018x}, k0={:#018x}, k1={:#018x}, expected={:#018x}, got={:#018x}",
|
||||
tkat.name, tkat.ctxt, tkat.k0, tkat.k1, tkat.ptxt, pt_out
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
BSD 3-Clause Clear License
|
||||
|
||||
Copyright © 2026 ZAMA.
|
||||
Copyright © 2025 ZAMA.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
|
||||
@@ -1,14 +1,5 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn get_linux_distribution_name() -> Option<String> {
|
||||
let content = std::fs::read_to_string("/etc/os-release").ok()?;
|
||||
for line in content.lines() {
|
||||
if let Some(value) = line.strip_prefix("NAME=") {
|
||||
return Some(value.trim_matches('"').to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
use std::process::Command;
|
||||
|
||||
fn main() {
|
||||
if let Ok(val) = std::env::var("DOCS_RS") {
|
||||
@@ -37,7 +28,9 @@ fn main() {
|
||||
println!("cargo::rerun-if-changed=src");
|
||||
|
||||
if std::env::consts::OS == "linux" {
|
||||
if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
|
||||
let output = Command::new("./get_os_name.sh").output().unwrap();
|
||||
let distribution = String::from_utf8(output.stdout).unwrap();
|
||||
if distribution != "Ubuntu\n" {
|
||||
println!(
|
||||
"cargo:warning=This Linux distribution is not officially supported. \
|
||||
Only Ubuntu is supported by tfhe-cuda-backend at this time. Build may fail\n"
|
||||
|
||||
@@ -62,29 +62,3 @@ rules:
|
||||
cuda_synchronize_stream(...);
|
||||
...
|
||||
}
|
||||
|
||||
- id: tfhe-cuda-unwrapped-cuda-runtime-call
|
||||
message: "CUDA runtime API call is not wrapped in `check_cuda_error(...)`."
|
||||
severity: WARNING
|
||||
languages: [c, cpp]
|
||||
options:
|
||||
generic_ellipsis_max_span: 500
|
||||
paths:
|
||||
include:
|
||||
- "*.cu"
|
||||
- "*.cuh"
|
||||
- "*.cpp"
|
||||
- "*.h"
|
||||
exclude:
|
||||
- backends/tfhe-cuda-backend/cuda/check_cuda.cu # contains cuda checking functions
|
||||
- backends/tfhe-cuda-backend/cuda/include/device.h # contains the cuda_check_error macro (and others)
|
||||
patterns:
|
||||
- pattern: $FUNC(...)
|
||||
- metavariable-regex:
|
||||
metavariable: $FUNC
|
||||
regex: "^cuda[A-Z][A-Za-z0-9]*$" # matches cudaMalloc/cudaMemcpy/... (not project helpers like cuda_set_device)
|
||||
- pattern-not-inside: check_cuda_error(...)
|
||||
- pattern-not-inside: |
|
||||
$FUNC(...);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
- pattern-not-inside: $FUNC(...) == $VAL
|
||||
|
||||
@@ -36,19 +36,5 @@ void cuda_glwe_sample_extract_128_async(
|
||||
void const *glwe_array_in, uint32_t const *nth_array, uint32_t num_nths,
|
||||
uint32_t num_lwes_to_extract_per_glwe, uint32_t num_lwes_stored_per_glwe,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size);
|
||||
|
||||
void cuda_modulus_switch_multi_bit_64_async(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in, uint32_t size,
|
||||
uint32_t log_modulus,
|
||||
uint32_t degree,
|
||||
uint32_t grouping_factor);
|
||||
|
||||
void cuda_modulus_switch_multi_bit_128_async(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in, uint32_t size,
|
||||
uint32_t log_modulus,
|
||||
uint32_t degree,
|
||||
uint32_t grouping_factor);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -382,17 +382,14 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
|
||||
->use_sequential_algorithm_to_resolve_group_carries;
|
||||
|
||||
cuda_set_device(0);
|
||||
check_cuda_error(
|
||||
cudaEventCreateWithFlags(&create_indexes_done, cudaEventDisableTiming));
|
||||
cudaEventCreateWithFlags(&create_indexes_done, cudaEventDisableTiming);
|
||||
create_indexes_for_overflow_sub(streams.get_ith(0), num_blocks, group_size,
|
||||
use_seq, allocate_gpu_memory, size_tracker);
|
||||
check_cuda_error(cudaEventRecord(create_indexes_done, streams.stream(0)));
|
||||
cudaEventRecord(create_indexes_done, streams.stream(0));
|
||||
cuda_set_device(1);
|
||||
check_cuda_error(
|
||||
cudaStreamWaitEvent(streams.stream(1), create_indexes_done, 0));
|
||||
cudaStreamWaitEvent(streams.stream(1), create_indexes_done, 0);
|
||||
cuda_set_device(2);
|
||||
check_cuda_error(
|
||||
cudaStreamWaitEvent(streams.stream(2), create_indexes_done, 0));
|
||||
cudaStreamWaitEvent(streams.stream(2), create_indexes_done, 0);
|
||||
|
||||
scatter_indexes_for_overflowing_sub(
|
||||
streams.stream(1), streams.gpu_index(1),
|
||||
@@ -845,7 +842,7 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
|
||||
free(second_indexes_for_overflow_sub_gpu_2);
|
||||
free(scalars_for_overflow_sub_gpu_2);
|
||||
|
||||
check_cuda_error(cudaEventDestroy(create_indexes_done));
|
||||
cudaEventDestroy(create_indexes_done);
|
||||
|
||||
// release sub streams
|
||||
sub_streams_1.release();
|
||||
|
||||
@@ -153,13 +153,7 @@ void cuda_full_propagation_64_inplace_async(
|
||||
void cleanup_cuda_full_propagation_64_inplace(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void cuda_integer_mult_inplace_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_inout,
|
||||
bool const is_bool_left, CudaRadixCiphertextFFI const *radix_lwe_right,
|
||||
bool const is_bool_right, void *const *bsks, void *const *ksks,
|
||||
int8_t *mem_ptr, uint32_t polynomial_size, uint32_t num_blocks);
|
||||
|
||||
uint64_t scratch_cuda_integer_mult_inplace_64_async(
|
||||
uint64_t scratch_cuda_integer_mult_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, bool const is_boolean_left,
|
||||
bool const is_boolean_right, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t glwe_dimension, uint32_t lwe_dimension,
|
||||
@@ -168,8 +162,17 @@ uint64_t scratch_cuda_integer_mult_inplace_64_async(
|
||||
uint32_t num_blocks, PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cleanup_cuda_integer_mult_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
void cuda_integer_mult_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
CudaRadixCiphertextFFI const *radix_lwe_left,
|
||||
bool const is_bool_left,
|
||||
CudaRadixCiphertextFFI const *radix_lwe_right,
|
||||
bool const is_bool_right, void *const *bsks,
|
||||
void *const *ksks, int8_t *mem_ptr,
|
||||
uint32_t polynomial_size, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_integer_mult_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void cuda_negate_ciphertext_64(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
@@ -270,12 +273,7 @@ void cleanup_cuda_integer_comparison_64(CudaStreamsFFI streams,
|
||||
void cleanup_cuda_integer_scalar_comparison_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void cuda_boolean_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_inout,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
|
||||
uint64_t scratch_cuda_boolean_bitop_inplace_64_async(
|
||||
uint64_t scratch_cuda_boolean_bitop_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
@@ -285,8 +283,15 @@ uint64_t scratch_cuda_boolean_bitop_inplace_64_async(
|
||||
bool is_unchecked, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cleanup_cuda_boolean_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
void cuda_boolean_bitop_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2,
|
||||
int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks);
|
||||
|
||||
void cleanup_cuda_boolean_bitop_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_boolean_bitnot_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
@@ -311,40 +316,42 @@ void cuda_bitnot_ciphertext_64(CudaStreamsFFI streams,
|
||||
uint32_t param_message_modulus,
|
||||
uint32_t param_carry_modulus);
|
||||
|
||||
void cuda_integer_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_inout,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
uint64_t scratch_cuda_integer_bitop_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_bitop_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_integer_scalar_bitop_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
|
||||
void const *h_clear_blocks, uint32_t num_clear_blocks, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
|
||||
void cuda_integer_scalar_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_inout,
|
||||
void const *clear_blocks, void const *h_clear_blocks,
|
||||
uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks);
|
||||
void cuda_integer_bitop_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2,
|
||||
int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks);
|
||||
|
||||
uint64_t scratch_cuda_integer_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
void cleanup_cuda_integer_bitop_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void cleanup_cuda_integer_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cleanup_cuda_integer_scalar_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
void cleanup_cuda_integer_scalar_bitop_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
uint64_t scratch_cuda_cmux_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
@@ -721,7 +728,7 @@ void cuda_integer_grouped_oprf_custom_range_64_async(
|
||||
uint32_t num_blocks_intermediate, const void *seeded_lwe_input,
|
||||
const uint64_t *decomposed_scalar, const uint64_t *has_at_least_one_set,
|
||||
uint32_t num_scalars, uint32_t shift, int8_t *mem, void *const *bsks,
|
||||
void *const *compute_bsks, void *const *ksks);
|
||||
void *const *ksks);
|
||||
|
||||
void cleanup_cuda_integer_grouped_oprf_custom_range_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
@@ -2,17 +2,12 @@
|
||||
|
||||
#include "integer.h"
|
||||
|
||||
enum RERAND_MODE {
|
||||
RERAND_WITH_KS = 0,
|
||||
RERAND_WITHOUT_KS = 1,
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
uint64_t scratch_cuda_rerand_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory, RERAND_MODE rerand_type);
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_rerand_64_async(
|
||||
CudaStreamsFFI streams, void *lwe_array,
|
||||
|
||||
@@ -3,18 +3,16 @@
|
||||
#include "checked_arithmetic.h"
|
||||
#include "integer_utilities.h"
|
||||
#include "keyswitch/ks_enums.h"
|
||||
#include "rerand.h"
|
||||
#include "zk/expand.cuh"
|
||||
#include "zk/zk_utilities.h"
|
||||
|
||||
template <typename Torus> struct int_rerand_mem {
|
||||
int_radix_params params;
|
||||
|
||||
Torus *tmp_expanded_zero_lwes = nullptr;
|
||||
Torus *tmp_ksed_expanded_zero_lwes = nullptr;
|
||||
Torus *lwe_trivial_indexes = nullptr;
|
||||
Torus *tmp_zero_lwes;
|
||||
Torus *tmp_ksed_zero_lwes;
|
||||
Torus *lwe_trivial_indexes;
|
||||
uint32_t num_lwes;
|
||||
RERAND_MODE rerand_mode;
|
||||
|
||||
bool gpu_memory_allocated;
|
||||
|
||||
@@ -22,20 +20,24 @@ template <typename Torus> struct int_rerand_mem {
|
||||
ks_tmp_buf_vec; // not allocated, ReRand not using GEMM KS for now
|
||||
// kept empty to pass to the KS function indicating GEMM KS disabled
|
||||
|
||||
expand_job<Torus> *d_expand_jobs = nullptr;
|
||||
expand_job<Torus> *h_expand_jobs = nullptr;
|
||||
expand_job<Torus> *d_expand_jobs;
|
||||
expand_job<Torus> *h_expand_jobs;
|
||||
|
||||
int_rerand_mem(CudaStreams streams, int_radix_params params,
|
||||
const uint32_t num_lwes, const RERAND_MODE rerand_mode,
|
||||
const bool allocate_gpu_memory, uint64_t &size_tracker)
|
||||
: params(params), num_lwes(num_lwes), rerand_mode(rerand_mode),
|
||||
const uint32_t num_lwes, const bool allocate_gpu_memory,
|
||||
uint64_t &size_tracker)
|
||||
: params(params), num_lwes(num_lwes),
|
||||
gpu_memory_allocated(allocate_gpu_memory) {
|
||||
|
||||
tmp_expanded_zero_lwes =
|
||||
static_cast<Torus *>(cuda_malloc_with_size_tracking_async(
|
||||
safe_mul_sizeof<Torus>(num_lwes, params.big_lwe_dimension + 1),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory));
|
||||
tmp_zero_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
safe_mul_sizeof<Torus>(num_lwes, params.big_lwe_dimension + 1),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory);
|
||||
|
||||
tmp_ksed_zero_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
safe_mul_sizeof<Torus>(num_lwes, params.small_lwe_dimension + 1),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory);
|
||||
|
||||
d_expand_jobs =
|
||||
static_cast<expand_job<Torus> *>(cuda_malloc_with_size_tracking_async(
|
||||
@@ -44,63 +46,47 @@ template <typename Torus> struct int_rerand_mem {
|
||||
|
||||
h_expand_jobs = static_cast<expand_job<Torus> *>(
|
||||
malloc(safe_mul_sizeof<expand_job<Torus>>(num_lwes)));
|
||||
PANIC_IF_FALSE(h_expand_jobs != nullptr,
|
||||
"host allocation failed for h_expand_jobs");
|
||||
|
||||
if (rerand_mode == RERAND_MODE::RERAND_WITH_KS) {
|
||||
tmp_ksed_expanded_zero_lwes =
|
||||
static_cast<Torus *>(cuda_malloc_with_size_tracking_async(
|
||||
safe_mul_sizeof<Torus>(num_lwes, params.small_lwe_dimension + 1),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory));
|
||||
|
||||
auto h_lwe_trivial_indexes =
|
||||
static_cast<Torus *>(malloc(safe_mul_sizeof<Torus>(num_lwes)));
|
||||
PANIC_IF_FALSE(h_lwe_trivial_indexes != nullptr,
|
||||
"host allocation failed for h_lwe_trivial_indexes");
|
||||
for (uint32_t i = 0; i < num_lwes; ++i) {
|
||||
h_lwe_trivial_indexes[i] = i;
|
||||
}
|
||||
lwe_trivial_indexes =
|
||||
static_cast<Torus *>(cuda_malloc_with_size_tracking_async(
|
||||
safe_mul_sizeof<Torus>(num_lwes), streams.stream(0),
|
||||
streams.gpu_index(0), size_tracker, allocate_gpu_memory));
|
||||
cuda_memcpy_async_to_gpu(lwe_trivial_indexes, h_lwe_trivial_indexes,
|
||||
safe_mul_sizeof<Torus>(num_lwes),
|
||||
streams.stream(0), streams.gpu_index(0));
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
free(h_lwe_trivial_indexes);
|
||||
} else {
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
auto h_lwe_trivial_indexes =
|
||||
static_cast<Torus *>(malloc(safe_mul_sizeof<Torus>(num_lwes)));
|
||||
for (auto i = 0; i < num_lwes; ++i) {
|
||||
h_lwe_trivial_indexes[i] = i;
|
||||
}
|
||||
lwe_trivial_indexes = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
safe_mul_sizeof<Torus>(num_lwes), streams.stream(0),
|
||||
streams.gpu_index(0), size_tracker, allocate_gpu_memory);
|
||||
cuda_memcpy_async_to_gpu(lwe_trivial_indexes, h_lwe_trivial_indexes,
|
||||
safe_mul_sizeof<Torus>(num_lwes),
|
||||
streams.stream(0), streams.gpu_index(0));
|
||||
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
|
||||
free(h_lwe_trivial_indexes);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
cuda_drop_with_size_tracking_async(tmp_expanded_zero_lwes,
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
cuda_drop_with_size_tracking_async(tmp_zero_lwes, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
tmp_expanded_zero_lwes = nullptr;
|
||||
tmp_zero_lwes = nullptr;
|
||||
cuda_drop_with_size_tracking_async(tmp_ksed_zero_lwes, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
tmp_ksed_zero_lwes = nullptr;
|
||||
cuda_drop_with_size_tracking_async(lwe_trivial_indexes, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
lwe_trivial_indexes = nullptr;
|
||||
cuda_drop_with_size_tracking_async(d_expand_jobs, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
d_expand_jobs = nullptr;
|
||||
|
||||
if (rerand_mode == RERAND_MODE::RERAND_WITH_KS) {
|
||||
cuda_drop_with_size_tracking_async(
|
||||
tmp_ksed_expanded_zero_lwes, streams.stream(0), streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
tmp_ksed_expanded_zero_lwes = nullptr;
|
||||
cuda_drop_with_size_tracking_async(lwe_trivial_indexes, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
lwe_trivial_indexes = nullptr;
|
||||
|
||||
for (size_t i = 0; i < ks_tmp_buf_vec.size(); i++) {
|
||||
cleanup_cuda_keyswitch(streams.stream(i), streams.gpu_index(i),
|
||||
ks_tmp_buf_vec[i], gpu_memory_allocated);
|
||||
}
|
||||
ks_tmp_buf_vec.clear();
|
||||
for (auto i = 0; i < ks_tmp_buf_vec.size(); i++) {
|
||||
cleanup_cuda_keyswitch(streams.stream(i), streams.gpu_index(i),
|
||||
ks_tmp_buf_vec[i], gpu_memory_allocated);
|
||||
}
|
||||
ks_tmp_buf_vec.clear();
|
||||
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
free(h_expand_jobs);
|
||||
|
||||
@@ -64,12 +64,6 @@ void cuda_add_lwe_ciphertext_vector_plaintext_64(
|
||||
void const *lwe_array_in, const uint64_t plaintext_in,
|
||||
const uint32_t input_lwe_dimension,
|
||||
const uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_inplace_32(
|
||||
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_inout,
|
||||
CudaRadixCiphertextFFI const *input_2);
|
||||
void cuda_add_lwe_ciphertext_vector_inplace_64(
|
||||
void *stream, uint32_t gpu_index, CudaRadixCiphertextFFI *lwe_array_inout,
|
||||
CudaRadixCiphertextFFI const *input_2);
|
||||
}
|
||||
|
||||
#endif // CUDA_LINALG_H_
|
||||
|
||||
@@ -39,28 +39,6 @@ void cleanup_cuda_multi_bit_programmable_bootstrap_64(void *stream,
|
||||
uint32_t gpu_index,
|
||||
int8_t **pbs_buffer);
|
||||
|
||||
// Noise-tests-namespaced wrappers for scratch/cleanup, so that callers
|
||||
// working with the noise-tests PBS variant use a consistent naming scheme.
|
||||
uint64_t scratch_cuda_multi_bit_programmable_bootstrap_noise_tests_64_async(
|
||||
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
void cleanup_cuda_multi_bit_programmable_bootstrap_noise_tests_64(
|
||||
void *stream, uint32_t gpu_index, int8_t **pbs_buffer);
|
||||
|
||||
// Noise tests variant: 64-bit torus, polynomial_size=2048 only. Uses the
|
||||
// NOISE_TESTS keybundle mode for noise analysis purposes.
|
||||
void cuda_multi_bit_programmable_bootstrap_noise_tests_64_async(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_output_indexes, void const *lut_vector,
|
||||
void const *lut_vector_indexes, void const *lwe_array_in,
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
|
||||
uint64_t scratch_cuda_multi_bit_programmable_bootstrap_128_async(
|
||||
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
@@ -78,23 +56,6 @@ void cuda_multi_bit_programmable_bootstrap_128_async(
|
||||
void cleanup_cuda_multi_bit_programmable_bootstrap_128(void *stream,
|
||||
const uint32_t gpu_index,
|
||||
int8_t **buffer);
|
||||
|
||||
uint64_t scratch_cuda_multi_bit_programmable_bootstrap_noise_tests_128_async(
|
||||
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
void cleanup_cuda_multi_bit_programmable_bootstrap_noise_tests_128(
|
||||
void *stream, uint32_t gpu_index, int8_t **pbs_buffer);
|
||||
|
||||
void cuda_multi_bit_programmable_bootstrap_noise_tests_128_async(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_output_indexes, void const *lut_vector,
|
||||
void const *lwe_array_in, void const *lwe_input_indexes,
|
||||
void const *bootstrapping_key, int8_t *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
}
|
||||
|
||||
#endif // CUDA_MULTI_BIT_H
|
||||
|
||||
@@ -105,11 +105,11 @@ template <typename Torus> struct zk_expand_mem {
|
||||
uint32_t num_lwes;
|
||||
uint32_t num_compact_lists;
|
||||
|
||||
int_radix_lut<Torus> *message_and_carry_extract_luts = nullptr;
|
||||
int_radix_lut<Torus> *identity_lut = nullptr;
|
||||
int_radix_lut<Torus> *message_and_carry_extract_luts;
|
||||
int_radix_lut<Torus> *identity_lut;
|
||||
|
||||
Torus *tmp_expanded_lwes = nullptr;
|
||||
Torus *tmp_ksed_small_to_big_expanded_lwes = nullptr;
|
||||
Torus *tmp_expanded_lwes;
|
||||
Torus *tmp_ksed_small_to_big_expanded_lwes;
|
||||
|
||||
bool gpu_memory_allocated;
|
||||
|
||||
@@ -148,6 +148,66 @@ template <typename Torus> struct zk_expand_mem {
|
||||
PANIC("GPU backend requires carry_modulus equal to message_modulus")
|
||||
}
|
||||
|
||||
// We create the identity LUT only if we are doing a SANITY_CHECK
|
||||
if (expand_kind == EXPAND_KIND::SANITY_CHECK) {
|
||||
identity_lut =
|
||||
new int_radix_lut<Torus>(streams, computing_params, 1, 2 * num_lwes,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
auto identity_lut_f = [](Torus x) -> Torus { return x; };
|
||||
|
||||
identity_lut->generate_and_broadcast_lut(streams, {0}, {identity_lut_f},
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
auto message_extract_lut_f = [casting_params](Torus x) -> Torus {
|
||||
return x % casting_params.message_modulus;
|
||||
};
|
||||
auto carry_extract_lut_f = [casting_params](Torus x) -> Torus {
|
||||
return (x / casting_params.carry_modulus) %
|
||||
casting_params.message_modulus;
|
||||
};
|
||||
|
||||
// Booleans have to be sanitized
|
||||
auto sanitize_bool_f = [](Torus x) -> Torus { return x == 0 ? 0 : 1; };
|
||||
auto message_extract_and_sanitize_bool_lut_f =
|
||||
[message_extract_lut_f, sanitize_bool_f](Torus x) -> Torus {
|
||||
return sanitize_bool_f(message_extract_lut_f(x));
|
||||
};
|
||||
auto carry_extract_and_sanitize_bool_lut_f =
|
||||
[carry_extract_lut_f, sanitize_bool_f](Torus x) -> Torus {
|
||||
return sanitize_bool_f(carry_extract_lut_f(x));
|
||||
};
|
||||
|
||||
/** In case the casting key casts from BIG to SMALL key we run a single KS
|
||||
to expand using the casting key as ksk. Otherwise, in case the casting key
|
||||
casts from SMALL to BIG key, we first keyswitch from SMALL to BIG using
|
||||
the casting key as ksk, then we keyswitch from BIG to SMALL using the
|
||||
computing ksk, and lastly we apply the PBS. The output is always on the
|
||||
BIG key.
|
||||
**/
|
||||
auto params = casting_params;
|
||||
if (casting_key_type == SMALL_TO_BIG) {
|
||||
params = computing_params;
|
||||
}
|
||||
message_and_carry_extract_luts = new int_radix_lut<Torus>(
|
||||
streams, params, 4, 2 * num_lwes, allocate_gpu_memory, size_tracker);
|
||||
|
||||
// We are always packing two LWEs. We just need to be sure we have enough
|
||||
// space in the carry part to store a message of the same size as is in the
|
||||
// message part.
|
||||
if (params.carry_modulus < params.message_modulus)
|
||||
PANIC("Carry modulus must be at least as large as message modulus");
|
||||
auto num_packed_msgs = 2;
|
||||
|
||||
// Adjust indexes to permute the output and access the correct LUT
|
||||
auto h_indexes_in = static_cast<Torus *>(
|
||||
malloc(safe_mul_sizeof<Torus>(num_packed_msgs, num_lwes)));
|
||||
auto h_indexes_out = static_cast<Torus *>(
|
||||
malloc(safe_mul_sizeof<Torus>(num_packed_msgs, num_lwes)));
|
||||
auto h_lut_indexes = static_cast<Torus *>(
|
||||
malloc(safe_mul_sizeof<Torus>(num_packed_msgs, num_lwes)));
|
||||
|
||||
d_expand_jobs =
|
||||
static_cast<expand_job<Torus> *>(cuda_malloc_with_size_tracking_async(
|
||||
safe_mul_sizeof<expand_job<Torus>>(num_lwes), streams.stream(0),
|
||||
@@ -156,202 +216,144 @@ template <typename Torus> struct zk_expand_mem {
|
||||
h_expand_jobs = static_cast<expand_job<Torus> *>(
|
||||
malloc(safe_mul_sizeof<expand_job<Torus>>(num_lwes)));
|
||||
|
||||
// NO_CASTING expands directly into the output buffer — no LUTs, no PBS,
|
||||
// no intermediate buffers needed.
|
||||
if (expand_kind != EXPAND_KIND::NO_CASTING) {
|
||||
/** In case the casting key casts from BIG to SMALL key we run a single KS
|
||||
to expand using the casting key as ksk. Otherwise, in case the casting key
|
||||
casts from SMALL to BIG key, we first keyswitch from SMALL to BIG using
|
||||
the casting key as ksk, then we keyswitch from BIG to SMALL using the
|
||||
computing ksk, and lastly we apply the PBS. The output is always on the
|
||||
BIG key.
|
||||
**/
|
||||
auto params = casting_params;
|
||||
if (casting_key_type == SMALL_TO_BIG) {
|
||||
params = computing_params;
|
||||
/*
|
||||
* Each LWE contains encrypted data in both carry and message spaces
|
||||
* that needs to be extracted.
|
||||
*
|
||||
* The loop processes each compact list (k) and for each LWE within that
|
||||
* list:
|
||||
* 1. Sets input indexes to read each LWE twice (for carry and message
|
||||
* extraction)
|
||||
* 2. Creates output indexes to properly reorder the results
|
||||
* 3. Selects appropriate LUT index based on whether boolean sanitization is
|
||||
* needed
|
||||
*
|
||||
* We want the output to have always first the content of the message part
|
||||
* and then the content of the carry part of each LWE.
|
||||
*
|
||||
* i.e. msg_extract(LWE_0), carry_extract(LWE_0), msg_extract(LWE_1),
|
||||
* carry_extract(LWE_1), ...
|
||||
*
|
||||
* Aiming that behavior, with 4 LWEs we would have:
|
||||
*
|
||||
* // Each LWE is processed twice
|
||||
* h_indexes_in = {0, 1, 2, 3, 0, 1, 2, 3}
|
||||
*
|
||||
* // First 4 use message LUT, last 4 use carry LUT
|
||||
* h_lut_indexes = {0, 0, 0, 0, 1, 1, 1, 1}
|
||||
*
|
||||
* // Reorders output so message and carry for each LWE appear together
|
||||
* h_indexes_out = {0, 2, 4, 6, 1, 3, 5, 7}
|
||||
*
|
||||
* If an LWE contains a boolean value, its LUT index is shifted by
|
||||
* num_packed_msgs to use the sanitization LUT (which ensures output is
|
||||
* exactly 0 or 1).
|
||||
*/
|
||||
auto offset = 0;
|
||||
for (int k = 0; k < num_compact_lists; k++) {
|
||||
auto num_lwes_in_kth = this->num_lwes_per_compact_list[k];
|
||||
for (int i = 0; i < num_packed_msgs * num_lwes_in_kth; i++) {
|
||||
auto lwe_index = i + num_packed_msgs * offset;
|
||||
auto lwe_index_in_list = i % num_lwes_in_kth;
|
||||
PANIC_IF_FALSE(lwe_index < num_packed_msgs * num_lwes,
|
||||
"Cuda error: index %d is beyond the max value %d",
|
||||
lwe_index, num_packed_msgs * num_lwes);
|
||||
h_indexes_in[lwe_index] = lwe_index_in_list + offset;
|
||||
h_indexes_out[lwe_index] =
|
||||
num_packed_msgs * h_indexes_in[lwe_index] + i / num_lwes_in_kth;
|
||||
PANIC_IF_FALSE(h_indexes_in[lwe_index] < num_packed_msgs * num_lwes,
|
||||
"Cuda error: index %lu is beyond the max value %lu",
|
||||
(unsigned long)h_indexes_in[lwe_index],
|
||||
(unsigned long)(num_packed_msgs * num_lwes));
|
||||
PANIC_IF_FALSE(h_indexes_out[lwe_index] < num_packed_msgs * num_lwes,
|
||||
"Cuda error: index %lu is beyond the max value %lu",
|
||||
(unsigned long)h_indexes_out[lwe_index],
|
||||
(unsigned long)(num_packed_msgs * num_lwes));
|
||||
// is_boolean_array tells us which input is a boolean and thus the
|
||||
// related output needs boolean sanitization. It naturally has
|
||||
// total_blocks entries, but h_indexes_out reaches
|
||||
// message_modulus * ceil(total_blocks/2) - 1. When total_blocks is odd,
|
||||
// the ceiling causes out-of-bounds access. Reading garbage "true" would
|
||||
// set h_lut_indexes to an invalid index pointing to uninitialized
|
||||
// memory instead of a real LUT. Rust pads is_boolean_array with FALSE
|
||||
// to match.
|
||||
PANIC_IF_FALSE(h_indexes_out[lwe_index] < is_boolean_array_len,
|
||||
"Cuda error: index %lu for is_boolean_array is out of "
|
||||
"bounds (len is %lu)",
|
||||
(unsigned long)h_indexes_out[lwe_index],
|
||||
(unsigned long)is_boolean_array_len);
|
||||
}
|
||||
offset += num_lwes_in_kth;
|
||||
}
|
||||
|
||||
// We always pack two LWEs (message and carry parts per LWE)
|
||||
auto num_packed_msgs = 2;
|
||||
message_and_carry_extract_luts->set_lwe_indexes(
|
||||
streams.stream(0), streams.gpu_index(0), h_indexes_in, h_indexes_out);
|
||||
|
||||
// Adjust indexes to permute the output and access the correct LUT.
|
||||
//
|
||||
// The loop below fills h_indexes_in and h_indexes_out so that the output
|
||||
// is ordered as: msg_extract(LWE_0), carry_extract(LWE_0),
|
||||
// msg_extract(LWE_1), carry_extract(LWE_1), ...
|
||||
//
|
||||
// With 4 LWEs the arrays look like:
|
||||
// h_indexes_in = {0, 1, 2, 3, 0, 1, 2, 3} (each LWE read twice)
|
||||
// h_lut_indexes = {0, 0, 0, 0, 1, 1, 1, 1} (msg LUT then carry LUT)
|
||||
// h_indexes_out = {0, 2, 4, 6, 1, 3, 5, 7} (interleaved output)
|
||||
//
|
||||
// If an LWE contains a boolean its LUT index is shifted by
|
||||
// num_packed_msgs to use the sanitization LUT (output clamped to {0, 1}).
|
||||
auto h_indexes_in = static_cast<Torus *>(
|
||||
malloc(safe_mul_sizeof<Torus>(num_packed_msgs, num_lwes)));
|
||||
auto h_indexes_out = static_cast<Torus *>(
|
||||
malloc(safe_mul_sizeof<Torus>(num_packed_msgs, num_lwes)));
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(2 * num_lwes, params.pbs_type);
|
||||
|
||||
// Index generator for message/carry extraction LUTs
|
||||
auto index_gen = [num_compact_lists,
|
||||
num_lwes_per_compact_list =
|
||||
this->num_lwes_per_compact_list,
|
||||
num_packed_msgs, is_boolean_array,
|
||||
h_indexes_out](Torus *h_lut_indexes, uint32_t) {
|
||||
auto offset = 0;
|
||||
for (int k = 0; k < num_compact_lists; k++) {
|
||||
auto num_lwes_in_kth = this->num_lwes_per_compact_list[k];
|
||||
auto num_lwes_in_kth = num_lwes_per_compact_list[k];
|
||||
for (int i = 0; i < num_packed_msgs * num_lwes_in_kth; i++) {
|
||||
auto lwe_index = i + num_packed_msgs * offset;
|
||||
auto lwe_index_in_list = i % num_lwes_in_kth;
|
||||
PANIC_IF_FALSE(lwe_index < num_packed_msgs * num_lwes,
|
||||
"Cuda error: index %d is beyond the max value %d",
|
||||
lwe_index, num_packed_msgs * num_lwes);
|
||||
h_indexes_in[lwe_index] = lwe_index_in_list + offset;
|
||||
h_indexes_out[lwe_index] =
|
||||
num_packed_msgs * h_indexes_in[lwe_index] + i / num_lwes_in_kth;
|
||||
PANIC_IF_FALSE(h_indexes_in[lwe_index] < num_packed_msgs * num_lwes,
|
||||
"Cuda error: index %lu is beyond the max value %lu",
|
||||
(unsigned long)h_indexes_in[lwe_index],
|
||||
(unsigned long)(num_packed_msgs * num_lwes));
|
||||
PANIC_IF_FALSE(h_indexes_out[lwe_index] < num_packed_msgs * num_lwes,
|
||||
"Cuda error: index %lu is beyond the max value %lu",
|
||||
(unsigned long)h_indexes_out[lwe_index],
|
||||
(unsigned long)(num_packed_msgs * num_lwes));
|
||||
// is_boolean_array tells us which input is a boolean and thus the
|
||||
// related output needs boolean sanitization. It naturally has
|
||||
// total_blocks entries, but h_indexes_out reaches
|
||||
// message_modulus * ceil(total_blocks/2) - 1. When total_blocks is
|
||||
// odd, the ceiling causes out-of-bounds access. Reading garbage
|
||||
// "true" would set h_lut_indexes to an invalid index pointing to
|
||||
// uninitialized memory instead of a real LUT. Rust pads
|
||||
// is_boolean_array with FALSE to match.
|
||||
PANIC_IF_FALSE(h_indexes_out[lwe_index] < is_boolean_array_len,
|
||||
"Cuda error: index %lu for is_boolean_array is out of "
|
||||
"bounds (len is %lu)",
|
||||
(unsigned long)h_indexes_out[lwe_index],
|
||||
(unsigned long)is_boolean_array_len);
|
||||
auto boolean_offset =
|
||||
is_boolean_array[h_indexes_out[lwe_index]] ? num_packed_msgs : 0;
|
||||
h_lut_indexes[lwe_index] = i / num_lwes_in_kth + boolean_offset;
|
||||
}
|
||||
offset += num_lwes_in_kth;
|
||||
}
|
||||
};
|
||||
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(2 * num_lwes, params.pbs_type);
|
||||
message_and_carry_extract_luts->generate_and_broadcast_lut(
|
||||
active_streams, {0, 1, 2, 3},
|
||||
{message_extract_lut_f, carry_extract_lut_f,
|
||||
message_extract_and_sanitize_bool_lut_f,
|
||||
carry_extract_and_sanitize_bool_lut_f},
|
||||
index_gen, true, {}, h_lut_indexes);
|
||||
|
||||
// SANITY_CHECK uses identity_lut (skipping the full message/carry
|
||||
// extraction LUT and the SMALL_TO_BIG intermediate buffer).
|
||||
if (expand_kind == EXPAND_KIND::SANITY_CHECK) {
|
||||
identity_lut =
|
||||
new int_radix_lut<Torus>(streams, casting_params, 1, 2 * num_lwes,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
message_and_carry_extract_luts->allocate_lwe_vector_for_non_trivial_indexes(
|
||||
active_streams, 2 * num_lwes, size_tracker, allocate_gpu_memory);
|
||||
// The expanded LWEs will always be on the casting key format
|
||||
tmp_expanded_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
safe_mul_sizeof<Torus>(num_lwes, casting_params.big_lwe_dimension + 1),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory);
|
||||
|
||||
auto identity_lut_f = [](Torus x) -> Torus { return x; };
|
||||
identity_lut->generate_and_broadcast_lut(streams, {0}, {identity_lut_f},
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
identity_lut->set_lwe_indexes(streams.stream(0), streams.gpu_index(0),
|
||||
h_indexes_in, h_indexes_out);
|
||||
identity_lut->allocate_lwe_vector_for_non_trivial_indexes(
|
||||
active_streams, 2 * num_lwes, size_tracker, allocate_gpu_memory);
|
||||
} else {
|
||||
// We are always packing two LWEs. We just need to be sure we have
|
||||
// enough space in the carry part to store a message of the same size
|
||||
// as is in the message part.
|
||||
if (params.carry_modulus < params.message_modulus)
|
||||
PANIC("Carry modulus must be at least as large as message modulus");
|
||||
|
||||
message_and_carry_extract_luts =
|
||||
new int_radix_lut<Torus>(streams, params, 4, 2 * num_lwes,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
message_and_carry_extract_luts->set_lwe_indexes(
|
||||
streams.stream(0), streams.gpu_index(0), h_indexes_in,
|
||||
h_indexes_out);
|
||||
|
||||
auto message_extract_lut_f = [casting_params](Torus x) -> Torus {
|
||||
return x % casting_params.message_modulus;
|
||||
};
|
||||
auto carry_extract_lut_f = [casting_params](Torus x) -> Torus {
|
||||
return (x / casting_params.carry_modulus) %
|
||||
casting_params.message_modulus;
|
||||
};
|
||||
auto sanitize_bool_f = [](Torus x) -> Torus { return x == 0 ? 0 : 1; };
|
||||
auto message_extract_and_sanitize_bool_lut_f =
|
||||
[message_extract_lut_f, sanitize_bool_f](Torus x) -> Torus {
|
||||
return sanitize_bool_f(message_extract_lut_f(x));
|
||||
};
|
||||
auto carry_extract_and_sanitize_bool_lut_f =
|
||||
[carry_extract_lut_f, sanitize_bool_f](Torus x) -> Torus {
|
||||
return sanitize_bool_f(carry_extract_lut_f(x));
|
||||
};
|
||||
|
||||
auto h_lut_indexes = static_cast<Torus *>(
|
||||
malloc(safe_mul_sizeof<Torus>(num_packed_msgs, num_lwes)));
|
||||
|
||||
auto index_gen = [num_compact_lists,
|
||||
num_lwes_per_compact_list =
|
||||
this->num_lwes_per_compact_list,
|
||||
num_packed_msgs, is_boolean_array,
|
||||
h_indexes_out](Torus *h_lut_indexes, uint32_t) {
|
||||
auto offset = 0;
|
||||
for (int k = 0; k < num_compact_lists; k++) {
|
||||
auto num_lwes_in_kth = num_lwes_per_compact_list[k];
|
||||
for (int i = 0; i < num_packed_msgs * num_lwes_in_kth; i++) {
|
||||
auto lwe_index = i + num_packed_msgs * offset;
|
||||
auto boolean_offset = is_boolean_array[h_indexes_out[lwe_index]]
|
||||
? num_packed_msgs
|
||||
: 0;
|
||||
h_lut_indexes[lwe_index] = i / num_lwes_in_kth + boolean_offset;
|
||||
}
|
||||
offset += num_lwes_in_kth;
|
||||
}
|
||||
};
|
||||
|
||||
message_and_carry_extract_luts->generate_and_broadcast_lut(
|
||||
active_streams, {0, 1, 2, 3},
|
||||
{message_extract_lut_f, carry_extract_lut_f,
|
||||
message_extract_and_sanitize_bool_lut_f,
|
||||
carry_extract_and_sanitize_bool_lut_f},
|
||||
index_gen, true, {}, h_lut_indexes);
|
||||
message_and_carry_extract_luts
|
||||
->allocate_lwe_vector_for_non_trivial_indexes(
|
||||
active_streams, 2 * num_lwes, size_tracker,
|
||||
allocate_gpu_memory);
|
||||
free(h_lut_indexes);
|
||||
|
||||
// SANITY_CHECK panics on SMALL_TO_BIG, so this buffer is only needed
|
||||
// on the full casting path.
|
||||
tmp_ksed_small_to_big_expanded_lwes =
|
||||
(Torus *)cuda_malloc_with_size_tracking_async(
|
||||
safe_mul_sizeof<Torus>(num_lwes,
|
||||
casting_params.big_lwe_dimension + 1),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
// The expanded LWEs will always be on the casting key format
|
||||
tmp_expanded_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
|
||||
safe_mul_sizeof<Torus>(num_lwes,
|
||||
casting_params.big_lwe_dimension + 1),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory);
|
||||
|
||||
free(h_indexes_in);
|
||||
free(h_indexes_out);
|
||||
}
|
||||
tmp_ksed_small_to_big_expanded_lwes =
|
||||
(Torus *)cuda_malloc_with_size_tracking_async(
|
||||
safe_mul_sizeof<Torus>(num_lwes,
|
||||
casting_params.big_lwe_dimension + 1),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory);
|
||||
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
free(h_indexes_in);
|
||||
free(h_indexes_out);
|
||||
free(h_lut_indexes);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
if (expand_kind != EXPAND_KIND::NO_CASTING) {
|
||||
if (expand_kind == EXPAND_KIND::SANITY_CHECK) {
|
||||
identity_lut->release(streams);
|
||||
delete identity_lut;
|
||||
} else {
|
||||
message_and_carry_extract_luts->release(streams);
|
||||
delete message_and_carry_extract_luts;
|
||||
cuda_drop_with_size_tracking_async(
|
||||
tmp_ksed_small_to_big_expanded_lwes, streams.stream(0),
|
||||
streams.gpu_index(0), gpu_memory_allocated);
|
||||
}
|
||||
cuda_drop_with_size_tracking_async(tmp_expanded_lwes, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
message_and_carry_extract_luts->release(streams);
|
||||
delete message_and_carry_extract_luts;
|
||||
|
||||
if (expand_kind == EXPAND_KIND::SANITY_CHECK) {
|
||||
identity_lut->release(streams);
|
||||
delete identity_lut;
|
||||
}
|
||||
|
||||
cuda_drop_with_size_tracking_async(tmp_expanded_lwes, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(tmp_ksed_small_to_big_expanded_lwes,
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
cuda_drop_with_size_tracking_async(d_expand_jobs, streams.stream(0),
|
||||
streams.gpu_index(0),
|
||||
gpu_memory_allocated);
|
||||
|
||||
@@ -390,7 +390,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
XOR(&wires_a[6], &wires_a[15], &input_bits[7]);
|
||||
XOR(&wires_a[10], &wires_a[15], &wires_b[0]);
|
||||
XOR(&wires_a[11], &wires_a[20], &wires_a[9]);
|
||||
FLUSH(&wires_a[6], &wires_a[10], &wires_a[11]);
|
||||
FLUSH(&wires_a[6], &wires_a[10]);
|
||||
XOR(&wires_a[7], &input_bits[7], &wires_a[11]);
|
||||
FLUSH(&wires_a[7]);
|
||||
XOR(&wires_a[17], &wires_a[10], &wires_a[11]);
|
||||
@@ -426,7 +426,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
XOR(&wires_b[22], &wires_b[18], &wires_a[19]);
|
||||
XOR(&wires_b[23], &wires_b[19], &wires_a[21]);
|
||||
XOR(&wires_b[24], &wires_b[20], &wires_a[18]);
|
||||
FLUSH(&wires_b[21], &wires_b[22], &wires_b[23], &wires_b[24]);
|
||||
FLUSH(&wires_b[21], &wires_b[23], &wires_b[24]);
|
||||
XOR(&wires_b[25], &wires_b[21], &wires_b[22]);
|
||||
FLUSH(&wires_b[25]);
|
||||
|
||||
@@ -468,7 +468,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
|
||||
XOR(&wires_b[37], &wires_b[36], &wires_b[34]);
|
||||
XOR(&wires_b[38], &wires_b[27], &wires_b[36]);
|
||||
FLUSH(&wires_b[38], &wires_b[37]);
|
||||
FLUSH(&wires_b[38]);
|
||||
XOR(&wires_b[44], &wires_b[33], &wires_b[37]);
|
||||
|
||||
CudaRadixCiphertextFFI *and_outs_6[] = {&wires_b[39]};
|
||||
@@ -479,7 +479,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
XOR(&wires_b[40], &wires_b[25], &wires_b[39]);
|
||||
XOR(&wires_b[41], &wires_b[40], &wires_b[37]);
|
||||
XOR(&wires_b[43], &wires_b[29], &wires_b[40]);
|
||||
FLUSH(&wires_b[41], &wires_b[40], &wires_b[43], &wires_b[44]);
|
||||
FLUSH(&wires_b[41]);
|
||||
XOR(&wires_b[45], &wires_b[42], &wires_b[41]);
|
||||
FLUSH(&wires_b[45]);
|
||||
|
||||
@@ -514,7 +514,6 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
XOR(&wires_b[57], &wires_b[50], &wires_b[53]);
|
||||
XOR(&wires_b[58], &wires_c[4], &wires_b[46]);
|
||||
XOR(&wires_b[59], &wires_c[3], &wires_b[54]);
|
||||
FLUSH(&wires_b[57], &wires_b[58]);
|
||||
XOR(&wires_b[60], &wires_b[46], &wires_b[57]);
|
||||
XOR(&wires_b[61], &wires_c[14], &wires_b[57]);
|
||||
XOR(&wires_b[62], &wires_b[52], &wires_b[58]);
|
||||
@@ -590,7 +589,6 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
|
||||
#undef FLUSH
|
||||
#undef AND
|
||||
#undef ADD_ONE_FLUSH
|
||||
#undef ADD_ONE
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -83,8 +83,6 @@ void cuda_modulus_switch_inplace_64_async(void *stream, uint32_t gpu_index,
|
||||
void cuda_modulus_switch_64_async(void *stream, uint32_t gpu_index,
|
||||
void *lwe_out, const void *lwe_in,
|
||||
uint32_t size, uint32_t log_modulus) {
|
||||
PANIC_IF_FALSE(lwe_out != lwe_in, "Output and input pointers must be "
|
||||
"different for out-of-place operations");
|
||||
host_modulus_switch<uint64_t>(static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_out),
|
||||
static_cast<const uint64_t *>(lwe_in), size,
|
||||
@@ -95,8 +93,6 @@ void cuda_centered_modulus_switch_64_async(void *stream, uint32_t gpu_index,
|
||||
void *lwe_out, const void *lwe_in,
|
||||
uint32_t lwe_dimension,
|
||||
uint32_t log_modulus) {
|
||||
PANIC_IF_FALSE(lwe_out != lwe_in, "Output and input pointers must be "
|
||||
"different for out-of-place operations");
|
||||
host_centered_modulus_switch_inplace<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_out), static_cast<const uint64_t *>(lwe_in),
|
||||
@@ -150,31 +146,3 @@ void cuda_glwe_sample_extract_128_async(
|
||||
"N's are powers of two in the interval [256..4096].")
|
||||
}
|
||||
}
|
||||
|
||||
void cuda_modulus_switch_multi_bit_64_async(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in, uint32_t size,
|
||||
uint32_t log_modulus,
|
||||
uint32_t degree,
|
||||
uint32_t grouping_factor) {
|
||||
|
||||
host_modulus_switch_multi_bit<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<uint64_t *>(lwe_array_in), size, log_modulus, degree,
|
||||
grouping_factor);
|
||||
}
|
||||
|
||||
void cuda_modulus_switch_multi_bit_128_async(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in, uint32_t size,
|
||||
uint32_t log_modulus,
|
||||
uint32_t degree,
|
||||
uint32_t grouping_factor) {
|
||||
|
||||
host_modulus_switch_multi_bit<__uint128_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<__uint128_t *>(lwe_array_out),
|
||||
static_cast<__uint128_t *>(lwe_array_in), size, log_modulus, degree,
|
||||
grouping_factor);
|
||||
}
|
||||
|
||||
@@ -217,8 +217,6 @@ void cuda_closest_representable_64_async(void *stream, uint32_t gpu_index,
|
||||
void const *input, void *output,
|
||||
uint32_t base_log,
|
||||
uint32_t level_count) {
|
||||
PANIC_IF_FALSE(output != input, "Output and input pointers must be different "
|
||||
"for out-of-place operations");
|
||||
host_cuda_closest_representable(static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<const uint64_t *>(input),
|
||||
static_cast<uint64_t *>(output), base_log,
|
||||
|
||||
@@ -463,48 +463,5 @@ __global__ void __launch_bounds__(512)
|
||||
return;
|
||||
}
|
||||
}
|
||||
// This function is only used for noise tests, it follows the same logic
|
||||
// that is embedded in the keybundle just we need a global function to
|
||||
// be able to test it individually.
|
||||
template <typename Torus, class params>
|
||||
__global__ void
|
||||
modulus_switch_multi_bit(Torus *array_out, const Torus *array_in, int size,
|
||||
uint32_t log_modulus, uint32_t grouping_factor) {
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (tid < size) {
|
||||
int num_monomials = 1 << grouping_factor;
|
||||
int input_offset = tid * grouping_factor;
|
||||
int output_offset = tid * num_monomials;
|
||||
// We calculate all monomials even if the first one is never used.
|
||||
for (int ggsw_idx = 0; ggsw_idx < num_monomials; ggsw_idx++) {
|
||||
array_out[ggsw_idx + output_offset] =
|
||||
calculates_monomial_degree<Torus, params>(&array_in[input_offset],
|
||||
ggsw_idx, grouping_factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
// This aims to be launched only from the noise tests.
|
||||
// That is why we support a specific set of parameters
|
||||
template <typename Torus>
|
||||
__host__ void host_modulus_switch_multi_bit(
|
||||
cudaStream_t stream, uint32_t gpu_index, Torus *array_out, Torus *array_in,
|
||||
int size, uint32_t log_modulus, uint32_t degree, uint32_t grouping_factor) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
int multibit_size = size / grouping_factor;
|
||||
int num_threads = 0, num_blocks = 0;
|
||||
getNumBlocksAndThreads(multibit_size, 1024, num_blocks, num_threads);
|
||||
switch (degree) {
|
||||
case 2048:
|
||||
modulus_switch_multi_bit<Torus, Degree<2048>>
|
||||
<<<num_blocks, num_threads, 0, stream>>>(
|
||||
array_out, array_in, multibit_size, log_modulus, grouping_factor);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error: unsupported polynomial size. Supported "
|
||||
"N's are powers of two in the interval [2048].")
|
||||
};
|
||||
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
#endif // CNCRT_TORUS_H
|
||||
|
||||
@@ -326,10 +326,6 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
uint32_t gpu_index) {
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
GPU_ASSERT(src != nullptr, "Cuda error: null device ptr");
|
||||
GPU_ASSERT(dest != nullptr, "Cuda error: null device ptr");
|
||||
|
||||
cudaPointerAttributes attr_dest;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr_dest, dest));
|
||||
PANIC_IF_FALSE(
|
||||
|
||||
@@ -1,16 +1,6 @@
|
||||
#include "integer/bitwise_ops.cuh"
|
||||
|
||||
void cuda_boolean_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_inout,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
// In-place variant: lwe_array_inout op= lwe_array_2, no aliasing check needed
|
||||
host_boolean_bitop<uint64_t>(
|
||||
CudaStreams(streams), lwe_array_inout, lwe_array_inout, lwe_array_2,
|
||||
(boolean_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks));
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_boolean_bitop_inplace_64_async(
|
||||
uint64_t scratch_cuda_boolean_bitop_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
@@ -30,8 +20,20 @@ uint64_t scratch_cuda_boolean_bitop_inplace_64_async(
|
||||
lwe_ciphertext_count, params, op_type, is_unchecked, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cleanup_cuda_boolean_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
void cuda_boolean_bitop_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2,
|
||||
int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks) {
|
||||
|
||||
host_boolean_bitop<uint64_t>(
|
||||
CudaStreams(streams), lwe_array_out, lwe_array_1, lwe_array_2,
|
||||
(boolean_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks));
|
||||
}
|
||||
|
||||
void cleanup_cuda_boolean_bitop_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
|
||||
boolean_bitop_buffer<uint64_t> *mem_ptr =
|
||||
(boolean_bitop_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
@@ -78,6 +80,44 @@ void cleanup_cuda_boolean_bitnot_64(CudaStreamsFFI streams,
|
||||
*mem_ptr_void = nullptr;
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_bitop_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_bitop<uint64_t>(
|
||||
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
|
||||
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_bitop_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_bitop<uint64_t>(
|
||||
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
|
||||
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_bitnot_ciphertext_64(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *radix_ciphertext,
|
||||
uint32_t ct_message_modulus,
|
||||
@@ -89,37 +129,20 @@ void cuda_bitnot_ciphertext_64(CudaStreamsFFI streams,
|
||||
cuda_synchronize_stream(cuda_streams.stream(0), cuda_streams.gpu_index(0));
|
||||
}
|
||||
|
||||
void cuda_integer_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_inout,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
// In-place variant: lwe_array_inout op= lwe_array_2, no aliasing check needed
|
||||
host_bitop<uint64_t>(CudaStreams(streams), lwe_array_inout, lwe_array_inout,
|
||||
void cuda_integer_bitop_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2,
|
||||
int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks) {
|
||||
|
||||
host_bitop<uint64_t>(CudaStreams(streams), lwe_array_out, lwe_array_1,
|
||||
lwe_array_2, (int_bitop_buffer<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks));
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_bitop<uint64_t>(
|
||||
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
|
||||
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
void cleanup_cuda_integer_bitop_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
|
||||
int_bitop_buffer<uint64_t> *mem_ptr =
|
||||
(int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
@@ -128,27 +151,8 @@ void cleanup_cuda_integer_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
*mem_ptr_void = nullptr;
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_scalar_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus, noise_reduction_type);
|
||||
|
||||
return scratch_cuda_bitop<uint64_t>(
|
||||
CudaStreams(streams), (int_bitop_buffer<uint64_t> **)mem_ptr,
|
||||
lwe_ciphertext_count, params, op_type, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_scalar_bitop_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
void cleanup_cuda_integer_scalar_bitop_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
|
||||
int_bitop_buffer<uint64_t> *mem_ptr =
|
||||
(int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
|
||||
@@ -3,8 +3,6 @@
|
||||
void extend_radix_with_trivial_zero_blocks_msb_64(
|
||||
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
|
||||
CudaStreamsFFI streams) {
|
||||
PANIC_IF_FALSE(output != input, "Output and input pointers must be different "
|
||||
"for out-of-place operations");
|
||||
auto cuda_streams = CudaStreams(streams);
|
||||
host_extend_radix_with_trivial_zero_blocks_msb<uint64_t>(output, input,
|
||||
cuda_streams);
|
||||
@@ -14,8 +12,6 @@ void extend_radix_with_trivial_zero_blocks_msb_64(
|
||||
void trim_radix_blocks_lsb_64(CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
CudaStreamsFFI streams) {
|
||||
PANIC_IF_FALSE(output != input, "Output and input pointers must be different "
|
||||
"for out-of-place operations");
|
||||
|
||||
auto cuda_streams = CudaStreams(streams);
|
||||
host_trim_radix_blocks_lsb<uint64_t>(output, input, cuda_streams);
|
||||
@@ -25,8 +21,6 @@ void trim_radix_blocks_lsb_64(CudaRadixCiphertextFFI *output,
|
||||
void trim_radix_blocks_msb_64(CudaRadixCiphertextFFI *output,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
CudaStreamsFFI streams) {
|
||||
PANIC_IF_FALSE(output != input, "Output and input pointers must be different "
|
||||
"for out-of-place operations");
|
||||
|
||||
auto cuda_streams = CudaStreams(streams);
|
||||
host_trim_radix_blocks_msb<uint64_t>(output, input, cuda_streams);
|
||||
@@ -60,8 +54,6 @@ void cuda_cast_to_unsigned_64_async(CudaStreamsFFI streams,
|
||||
int8_t *mem_ptr, uint32_t target_num_blocks,
|
||||
bool input_is_signed, void *const *bsks,
|
||||
void *const *ksks) {
|
||||
PANIC_IF_FALSE(output != input, "Output and input pointers must be different "
|
||||
"for out-of-place operations");
|
||||
|
||||
host_cast_to_unsigned<uint64_t>(
|
||||
CudaStreams(streams), output, input,
|
||||
@@ -105,8 +97,6 @@ void cuda_cast_to_signed_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI const *input,
|
||||
int8_t *mem, bool input_is_signed,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
PANIC_IF_FALSE(output != input, "Output and input pointers must be different "
|
||||
"for out-of-place operations");
|
||||
|
||||
host_cast_to_signed<uint64_t>(CudaStreams(streams), output, input,
|
||||
(int_cast_to_signed_buffer<uint64_t> *)mem,
|
||||
|
||||
@@ -30,18 +30,6 @@ void cuda_cmux_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks) {
|
||||
PANIC_IF_FALSE(
|
||||
lwe_array_out != lwe_condition,
|
||||
"Output and condition pointers must be different for out-of-place "
|
||||
"operations");
|
||||
PANIC_IF_FALSE(
|
||||
lwe_array_out != lwe_array_true,
|
||||
"Output and true-branch pointers must be different for out-of-place "
|
||||
"operations");
|
||||
PANIC_IF_FALSE(
|
||||
lwe_array_out != lwe_array_false,
|
||||
"Output and false-branch pointers must be different for out-of-place "
|
||||
"operations");
|
||||
PUSH_RANGE("cmux")
|
||||
host_cmux<uint64_t>(CudaStreams(streams), lwe_array_out, lwe_condition,
|
||||
lwe_array_true, lwe_array_false,
|
||||
|
||||
@@ -80,12 +80,6 @@ void cuda_integer_comparison_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2,
|
||||
int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks) {
|
||||
PANIC_IF_FALSE(lwe_array_out != lwe_array_1,
|
||||
"Output and first input pointers must be different for "
|
||||
"out-of-place operations");
|
||||
PANIC_IF_FALSE(lwe_array_out != lwe_array_2,
|
||||
"Output and second input pointers must be different for "
|
||||
"out-of-place operations");
|
||||
PUSH_RANGE("comparison")
|
||||
if (lwe_array_1->num_radix_blocks != lwe_array_2->num_radix_blocks)
|
||||
PANIC("Cuda error: input num radix blocks must be the same")
|
||||
@@ -173,9 +167,6 @@ void cuda_integer_are_all_comparisons_block_true_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
|
||||
PANIC_IF_FALSE(lwe_array_out != lwe_array_in,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
int_comparison_buffer<uint64_t> *buffer =
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
@@ -218,9 +209,6 @@ void cuda_integer_is_at_least_one_comparisons_block_true_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
|
||||
PANIC_IF_FALSE(lwe_array_out != lwe_array_in,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
int_comparison_buffer<uint64_t> *buffer =
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
@@ -27,21 +27,6 @@ void cuda_integer_div_rem_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI const *divisor,
|
||||
bool is_signed, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
PANIC_IF_FALSE(quotient != numerator,
|
||||
"Quotient and numerator pointers must be different for "
|
||||
"out-of-place operations");
|
||||
PANIC_IF_FALSE(quotient != divisor,
|
||||
"Quotient and divisor pointers must be different for "
|
||||
"out-of-place operations");
|
||||
PANIC_IF_FALSE(remainder != numerator,
|
||||
"Remainder and numerator pointers must be different for "
|
||||
"out-of-place operations");
|
||||
PANIC_IF_FALSE(remainder != divisor,
|
||||
"Remainder and divisor pointers must be different for "
|
||||
"out-of-place operations");
|
||||
PANIC_IF_FALSE(quotient != remainder,
|
||||
"Quotient and remainder pointers must be different for "
|
||||
"out-of-place operations");
|
||||
PUSH_RANGE("div")
|
||||
auto mem = (int_div_rem_memory<uint64_t> *)mem_ptr;
|
||||
|
||||
|
||||
@@ -30,9 +30,6 @@ void cuda_integer_count_of_consecutive_bits_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_ct,
|
||||
CudaRadixCiphertextFFI const *input_ct, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks) {
|
||||
PANIC_IF_FALSE(output_ct != input_ct,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
host_integer_count_of_consecutive_bits<uint64_t, uint64_t>(
|
||||
CudaStreams(streams), output_ct, input_ct,
|
||||
@@ -84,9 +81,6 @@ void cuda_integer_ilog2_64_async(
|
||||
CudaRadixCiphertextFFI const *trivial_ct_2,
|
||||
CudaRadixCiphertextFFI const *trivial_ct_m_minus_1_block, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
PANIC_IF_FALSE(output_ct != input_ct,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
host_integer_ilog2<uint64_t, uint64_t>(
|
||||
CudaStreams(streams), output_ct, input_ct, trivial_ct_neg_n, trivial_ct_2,
|
||||
|
||||
@@ -212,9 +212,6 @@ void cuda_apply_univariate_lut_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks) {
|
||||
PANIC_IF_FALSE(output_radix_lwe != input_radix_lwe,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
host_apply_univariate_lut<uint64_t>(
|
||||
CudaStreams(streams), output_radix_lwe, input_radix_lwe,
|
||||
@@ -246,9 +243,6 @@ void cuda_apply_many_univariate_lut_64_async(
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
PANIC_IF_FALSE(output_radix_lwe != input_radix_lwe,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
host_apply_many_univariate_lut<uint64_t>(
|
||||
CudaStreams(streams), output_radix_lwe, input_radix_lwe,
|
||||
@@ -318,9 +312,6 @@ void cuda_apply_noise_squashing_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks) {
|
||||
PANIC_IF_FALSE(output_radix_lwe != input_radix_lwe,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
PUSH_RANGE("apply noise squashing")
|
||||
integer_radix_apply_noise_squashing<uint64_t>(
|
||||
|
||||
@@ -61,65 +61,11 @@ void generate_ids_update_degrees(uint64_t *terms_degree, size_t *h_lwe_idx_in,
|
||||
|
||||
total_count = message_count + carry_count;
|
||||
}
|
||||
void cuda_integer_mult_inplace_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_inout,
|
||||
bool const is_bool_left, CudaRadixCiphertextFFI const *radix_lwe_right,
|
||||
bool const is_bool_right, void *const *bsks, void *const *ksks,
|
||||
int8_t *mem_ptr, uint32_t polynomial_size, uint32_t num_blocks) {
|
||||
// In-place variant: radix_lwe_inout *= radix_lwe_right, no aliasing check
|
||||
// needed
|
||||
PUSH_RANGE("mul_inplace")
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<256>>(
|
||||
CudaStreams(streams), radix_lwe_inout, radix_lwe_inout, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 512:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<512>>(
|
||||
CudaStreams(streams), radix_lwe_inout, radix_lwe_inout, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 1024:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<1024>>(
|
||||
CudaStreams(streams), radix_lwe_inout, radix_lwe_inout, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 2048:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<2048>>(
|
||||
CudaStreams(streams), radix_lwe_inout, radix_lwe_inout, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 4096:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<4096>>(
|
||||
CudaStreams(streams), radix_lwe_inout, radix_lwe_inout, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 8192:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<8192>>(
|
||||
CudaStreams(streams), radix_lwe_inout, radix_lwe_inout, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 16384:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<16384>>(
|
||||
CudaStreams(streams), radix_lwe_inout, radix_lwe_inout, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
|
||||
"Supported N's are powers of two in the interval [256..16384].")
|
||||
}
|
||||
POP_RANGE()
|
||||
}
|
||||
|
||||
uint64_t scratch_cuda_integer_mult_inplace_64_async(
|
||||
/*
|
||||
* This scratch function allocates the necessary amount of data on the GPU for
|
||||
* the integer radix multiplication in keyswitch->bootstrap order.
|
||||
*/
|
||||
uint64_t scratch_cuda_integer_mult_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, bool const is_boolean_left,
|
||||
bool const is_boolean_right, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t glwe_dimension, uint32_t lwe_dimension,
|
||||
@@ -151,8 +97,94 @@ uint64_t scratch_cuda_integer_mult_inplace_64_async(
|
||||
}
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_mult_inplace_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
/*
|
||||
* Computes a multiplication between two 64 bit radix lwe ciphertexts
|
||||
* encrypting integer values. keyswitch -> bootstrap pattern is used, function
|
||||
* works for single pair of radix ciphertexts, 'v_stream' can be used for
|
||||
* parallelization
|
||||
* - 'v_stream' is a void pointer to the Cuda stream to be used in the kernel
|
||||
* launch
|
||||
* - 'gpu_index' is the index of the GPU to be used in the kernel launch
|
||||
* - 'radix_lwe_out' is 64 bit radix big lwe ciphertext, product of
|
||||
* multiplication
|
||||
* - 'radix_lwe_left' left radix big lwe ciphertext
|
||||
* - 'radix_lwe_right' right radix big lwe ciphertext
|
||||
* - 'bsk' bootstrapping key in fourier domain
|
||||
* - 'ksk' keyswitching key
|
||||
* - 'mem_ptr'
|
||||
* - 'message_modulus' message_modulus
|
||||
* - 'carry_modulus' carry_modulus
|
||||
* - 'glwe_dimension' glwe_dimension
|
||||
* - 'lwe_dimension' is the dimension of small lwe ciphertext
|
||||
* - 'polynomial_size' polynomial size
|
||||
* - 'pbs_base_log' base log used in the pbs
|
||||
* - 'pbs_level' decomposition level count used in the pbs
|
||||
* - 'ks_level' decomposition level count used in the keyswitch
|
||||
* - 'num_blocks' is the number of big lwe ciphertext blocks inside radix
|
||||
* ciphertext
|
||||
* - 'pbs_type' selects which PBS implementation should be used
|
||||
*/
|
||||
void cuda_integer_mult_64_async(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
CudaRadixCiphertextFFI const *radix_lwe_left,
|
||||
bool const is_bool_left,
|
||||
CudaRadixCiphertextFFI const *radix_lwe_right,
|
||||
bool const is_bool_right, void *const *bsks,
|
||||
void *const *ksks, int8_t *mem_ptr,
|
||||
uint32_t polynomial_size, uint32_t num_blocks) {
|
||||
PUSH_RANGE("mul")
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<256>>(
|
||||
CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 512:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<512>>(
|
||||
CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 1024:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<1024>>(
|
||||
CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 2048:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<2048>>(
|
||||
CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 4096:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<4096>>(
|
||||
CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 8192:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<8192>>(
|
||||
CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 16384:
|
||||
host_integer_mult_radix<uint64_t, AmortizedDegree<16384>>(
|
||||
CudaStreams(streams), radix_lwe_out, radix_lwe_left, is_bool_left,
|
||||
radix_lwe_right, is_bool_right, bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
|
||||
"Supported N's are powers of two in the interval [256..16384].")
|
||||
}
|
||||
POP_RANGE()
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_mult_64(CudaStreamsFFI streams,
|
||||
int8_t **mem_ptr_void) {
|
||||
PUSH_RANGE("cleanup mul")
|
||||
int_mul_memory<uint64_t> *mem_ptr =
|
||||
(int_mul_memory<uint64_t> *)(*mem_ptr_void);
|
||||
@@ -189,9 +221,6 @@ void cuda_partial_sum_ciphertexts_vec_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
CudaRadixCiphertextFFI *radix_lwe_vec, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks) {
|
||||
PANIC_IF_FALSE(radix_lwe_out != radix_lwe_vec,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
auto mem = (int_sum_ciphertexts_vec_memory<uint64_t> *)mem_ptr;
|
||||
if (radix_lwe_vec->num_radix_blocks % radix_lwe_out->num_radix_blocks != 0)
|
||||
|
||||
@@ -5,9 +5,6 @@ void cuda_negate_ciphertext_64(CudaStreamsFFI streams,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in,
|
||||
uint32_t message_modulus, uint32_t carry_modulus,
|
||||
uint32_t num_radix_blocks) {
|
||||
PANIC_IF_FALSE(lwe_array_out != lwe_array_in,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
auto cuda_streams = CudaStreams(streams);
|
||||
host_negation<uint64_t>(cuda_streams, lwe_array_out, lwe_array_in,
|
||||
|
||||
@@ -72,13 +72,13 @@ void cuda_integer_grouped_oprf_custom_range_64_async(
|
||||
uint32_t num_blocks_intermediate, const void *seeded_lwe_input,
|
||||
const uint64_t *decomposed_scalar, const uint64_t *has_at_least_one_set,
|
||||
uint32_t num_scalars, uint32_t shift, int8_t *mem, void *const *bsks,
|
||||
void *const *compute_bsks, void *const *ksks) {
|
||||
void *const *ksks) {
|
||||
|
||||
host_integer_grouped_oprf_custom_range<uint64_t>(
|
||||
CudaStreams(streams), radix_lwe_out, num_blocks_intermediate,
|
||||
(const uint64_t *)seeded_lwe_input, decomposed_scalar,
|
||||
has_at_least_one_set, num_scalars, shift,
|
||||
(int_grouped_oprf_custom_range_memory<uint64_t> *)mem, bsks, compute_bsks,
|
||||
(int_grouped_oprf_custom_range_memory<uint64_t> *)mem, bsks,
|
||||
(uint64_t *const *)ksks);
|
||||
}
|
||||
|
||||
|
||||
@@ -114,7 +114,7 @@ void host_integer_grouped_oprf_custom_range(
|
||||
const Torus *decomposed_scalar, const Torus *has_at_least_one_set,
|
||||
uint32_t num_scalars, uint32_t shift,
|
||||
int_grouped_oprf_custom_range_memory<Torus> *mem_ptr, void *const *bsks,
|
||||
void *const *compute_bsks, Torus *const *ksks) {
|
||||
Torus *const *ksks) {
|
||||
|
||||
CudaRadixCiphertextFFI *computation_buffer = mem_ptr->tmp_oprf_output;
|
||||
set_zero_radix_ciphertext_slice_async<Torus>(
|
||||
@@ -127,12 +127,12 @@ void host_integer_grouped_oprf_custom_range(
|
||||
|
||||
host_integer_scalar_mul_radix<Torus>(
|
||||
streams, computation_buffer, decomposed_scalar, has_at_least_one_set,
|
||||
mem_ptr->scalar_mul_buffer, compute_bsks, ksks,
|
||||
mem_ptr->params.message_modulus, num_scalars);
|
||||
mem_ptr->scalar_mul_buffer, bsks, ksks, mem_ptr->params.message_modulus,
|
||||
num_scalars);
|
||||
|
||||
host_logical_scalar_shift_inplace<Torus>(
|
||||
streams, computation_buffer, shift, mem_ptr->logical_scalar_shift_buffer,
|
||||
compute_bsks, ksks, num_blocks_intermediate);
|
||||
host_logical_scalar_shift_inplace<Torus>(streams, computation_buffer, shift,
|
||||
mem_ptr->logical_scalar_shift_buffer,
|
||||
bsks, ksks, num_blocks_intermediate);
|
||||
|
||||
uint32_t num_blocks_output = radix_lwe_out->num_radix_blocks;
|
||||
uint32_t blocks_to_copy =
|
||||
|
||||
@@ -5,7 +5,7 @@ uint64_t scratch_cuda_rerand_64_async(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory, RERAND_MODE rerand_type) {
|
||||
uint32_t carry_modulus, bool allocate_gpu_memory) {
|
||||
PUSH_RANGE("scratch rerand")
|
||||
int_radix_params params(PBS_TYPE::CLASSICAL, 0, 0, big_lwe_dimension,
|
||||
small_lwe_dimension, ks_level, ks_base_log, 0, 0, 0,
|
||||
@@ -13,9 +13,8 @@ uint64_t scratch_cuda_rerand_64_async(
|
||||
PBS_MS_REDUCTION_T::NO_REDUCTION);
|
||||
|
||||
uint64_t ret = scratch_cuda_rerand<uint64_t>(
|
||||
CudaStreams(streams),
|
||||
reinterpret_cast<int_rerand_mem<uint64_t> **>(mem_ptr),
|
||||
lwe_ciphertext_count, params, allocate_gpu_memory, rerand_type);
|
||||
CudaStreams(streams), (int_rerand_mem<uint64_t> **)mem_ptr,
|
||||
lwe_ciphertext_count, params, allocate_gpu_memory);
|
||||
POP_RANGE()
|
||||
return ret;
|
||||
}
|
||||
@@ -29,7 +28,7 @@ void cuda_rerand_64_async(
|
||||
CudaStreamsFFI streams, void *lwe_array,
|
||||
const void *lwe_flattened_encryptions_of_zero_compact_array_in,
|
||||
int8_t *mem_ptr, void *const *ksk) {
|
||||
PUSH_RANGE("rerand")
|
||||
|
||||
auto rerand_buffer = reinterpret_cast<int_rerand_mem<uint64_t> *>(mem_ptr);
|
||||
|
||||
switch (rerand_buffer->params.big_lwe_dimension) {
|
||||
@@ -38,49 +37,49 @@ void cuda_rerand_64_async(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
reinterpret_cast<uint64_t *const *>(ksk), rerand_buffer);
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 512:
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<512>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
reinterpret_cast<uint64_t *const *>(ksk), rerand_buffer);
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 1024:
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<1024>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
reinterpret_cast<uint64_t *const *>(ksk), rerand_buffer);
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 2048:
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<2048>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
reinterpret_cast<uint64_t *const *>(ksk), rerand_buffer);
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 4096:
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<4096>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
reinterpret_cast<uint64_t *const *>(ksk), rerand_buffer);
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 8192:
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<8192>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
reinterpret_cast<uint64_t *const *>(ksk), rerand_buffer);
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
case 16384:
|
||||
host_rerand_inplace<uint64_t, AmortizedDegree<16384>>(
|
||||
streams, static_cast<uint64_t *>(lwe_array),
|
||||
static_cast<const uint64_t *>(
|
||||
lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
reinterpret_cast<uint64_t *const *>(ksk), rerand_buffer);
|
||||
(uint64_t **)(ksk), rerand_buffer);
|
||||
break;
|
||||
default:
|
||||
PANIC("CUDA error: lwe_dimension not supported."
|
||||
@@ -88,12 +87,12 @@ void cuda_rerand_64_async(
|
||||
" in the interval [256..16384].");
|
||||
break;
|
||||
}
|
||||
POP_RANGE()
|
||||
}
|
||||
|
||||
void cleanup_cuda_rerand_64(CudaStreamsFFI streams, int8_t **mem_ptr_void) {
|
||||
PUSH_RANGE("cleanup rerand")
|
||||
auto *mem_ptr = reinterpret_cast<int_rerand_mem<uint64_t> *>(*mem_ptr_void);
|
||||
int_rerand_mem<uint64_t> *mem_ptr =
|
||||
(int_rerand_mem<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release(CudaStreams(streams));
|
||||
delete mem_ptr;
|
||||
*mem_ptr_void = nullptr;
|
||||
|
||||
@@ -14,25 +14,26 @@ void host_rerand_inplace(
|
||||
CudaStreams const streams, Torus *lwe_array,
|
||||
const Torus *lwe_flattened_encryptions_of_zero_compact_array_in,
|
||||
Torus *const *ksk, int_rerand_mem<Torus> *mem_ptr) {
|
||||
auto rerand_mode = mem_ptr->rerand_mode;
|
||||
auto expanded_zero_lwes = mem_ptr->tmp_expanded_zero_lwes;
|
||||
auto zero_lwes = mem_ptr->tmp_zero_lwes;
|
||||
auto num_lwes = mem_ptr->num_lwes;
|
||||
auto ksed_zero_lwes = mem_ptr->tmp_ksed_zero_lwes;
|
||||
auto lwe_trivial_indexes = mem_ptr->lwe_trivial_indexes;
|
||||
auto ksk_params = mem_ptr->params;
|
||||
auto output_dimension = ksk_params.small_lwe_dimension;
|
||||
auto input_dimension = ksk_params.big_lwe_dimension;
|
||||
auto ks_level = ksk_params.ks_level;
|
||||
auto ks_base_log = ksk_params.ks_base_log;
|
||||
auto message_modulus = ksk_params.message_modulus;
|
||||
auto carry_modulus = ksk_params.carry_modulus;
|
||||
|
||||
auto rerand_params = mem_ptr->params;
|
||||
auto message_modulus = rerand_params.message_modulus;
|
||||
auto carry_modulus = rerand_params.carry_modulus;
|
||||
auto input_dimension = rerand_params.big_lwe_dimension;
|
||||
// Default to input dimension; overridden to small_lwe_dimension in the KS
|
||||
// path
|
||||
auto output_dimension = input_dimension;
|
||||
|
||||
static_assert(sizeof(Torus) == 8, "expand is only supported on 64 bits");
|
||||
GPU_ASSERT(sizeof(Torus) == 8,
|
||||
"Cuda error: expand is only supported on 64 bits");
|
||||
|
||||
// Expand encryptions of zero
|
||||
// Wraps the input into a flattened_compact_lwe_lists type
|
||||
auto compact_lwe_lists = flattened_compact_lwe_lists<Torus>(
|
||||
const_cast<Torus *>(lwe_flattened_encryptions_of_zero_compact_array_in),
|
||||
&num_lwes, static_cast<uint32_t>(1), input_dimension);
|
||||
&num_lwes, (uint32_t)1, input_dimension);
|
||||
auto h_expand_jobs = mem_ptr->h_expand_jobs;
|
||||
auto d_expand_jobs = mem_ptr->d_expand_jobs;
|
||||
|
||||
@@ -52,30 +53,20 @@ void host_rerand_inplace(
|
||||
streams.stream(0), streams.gpu_index(0), true);
|
||||
|
||||
host_lwe_expand<Torus, params>(streams.stream(0), streams.gpu_index(0),
|
||||
expanded_zero_lwes, d_expand_jobs, num_lwes);
|
||||
zero_lwes, d_expand_jobs, num_lwes);
|
||||
|
||||
auto lwes_to_be_added = expanded_zero_lwes;
|
||||
if (rerand_mode == RERAND_MODE::RERAND_WITH_KS) {
|
||||
lwes_to_be_added = mem_ptr->tmp_ksed_expanded_zero_lwes;
|
||||
output_dimension = rerand_params.small_lwe_dimension;
|
||||
auto ks_level = rerand_params.ks_level;
|
||||
auto ks_base_log = rerand_params.ks_base_log;
|
||||
auto lwe_trivial_indexes = mem_ptr->lwe_trivial_indexes;
|
||||
|
||||
// Keyswitch
|
||||
execute_keyswitch_async<Torus>(streams.get_ith(0), lwes_to_be_added,
|
||||
lwe_trivial_indexes, expanded_zero_lwes,
|
||||
lwe_trivial_indexes, ksk, input_dimension,
|
||||
output_dimension, ks_base_log, ks_level,
|
||||
num_lwes, true, mem_ptr->ks_tmp_buf_vec);
|
||||
}
|
||||
// Keyswitch
|
||||
execute_keyswitch_async<Torus>(
|
||||
streams.get_ith(0), ksed_zero_lwes, lwe_trivial_indexes, zero_lwes,
|
||||
lwe_trivial_indexes, ksk, input_dimension, output_dimension, ks_base_log,
|
||||
ks_level, num_lwes, true, mem_ptr->ks_tmp_buf_vec);
|
||||
|
||||
// Add ks output to ct
|
||||
// Check sizes
|
||||
CudaRadixCiphertextFFI lwes_ffi;
|
||||
into_radix_ciphertext(&lwes_ffi, lwe_array, num_lwes, output_dimension);
|
||||
CudaRadixCiphertextFFI ksed_zero_lwes_ffi;
|
||||
into_radix_ciphertext(&ksed_zero_lwes_ffi, lwes_to_be_added, num_lwes,
|
||||
into_radix_ciphertext(&ksed_zero_lwes_ffi, ksed_zero_lwes, num_lwes,
|
||||
output_dimension);
|
||||
host_addition<Torus>(streams.stream(0), streams.gpu_index(0), &lwes_ffi,
|
||||
&lwes_ffi, &ksed_zero_lwes_ffi, num_lwes,
|
||||
@@ -90,11 +81,10 @@ __host__ uint64_t scratch_cuda_rerand(CudaStreams streams,
|
||||
int_rerand_mem<Torus> **mem_ptr,
|
||||
uint32_t num_lwes,
|
||||
int_radix_params params,
|
||||
bool allocate_gpu_memory,
|
||||
RERAND_MODE rerand_mode) {
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
uint64_t size_tracker = 0;
|
||||
*mem_ptr = new int_rerand_mem<Torus>(streams, params, num_lwes, rerand_mode,
|
||||
*mem_ptr = new int_rerand_mem<Torus>(streams, params, num_lwes,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
return size_tracker;
|
||||
}
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#include "integer/scalar_bitops.cuh"
|
||||
|
||||
void cuda_integer_scalar_bitop_inplace_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_inout,
|
||||
void const *clear_blocks, void const *h_clear_blocks,
|
||||
uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks) {
|
||||
// In-place variant: lwe_array_inout op= scalar, no aliasing check needed
|
||||
void cuda_integer_scalar_bitop_64_async(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
|
||||
void const *h_clear_blocks, uint32_t num_clear_blocks, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
|
||||
host_scalar_bitop<uint64_t>(
|
||||
CudaStreams(streams), lwe_array_inout, lwe_array_inout,
|
||||
CudaStreams(streams), lwe_array_out, lwe_array_input,
|
||||
static_cast<const uint64_t *>(clear_blocks),
|
||||
static_cast<const uint64_t *>(h_clear_blocks), num_clear_blocks,
|
||||
(int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks));
|
||||
|
||||
@@ -36,9 +36,6 @@ void cuda_integer_scalar_comparison_64_async(
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, void const *scalar_blocks,
|
||||
void const *h_scalar_blocks, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_scalar_blocks) {
|
||||
PANIC_IF_FALSE(lwe_array_out != lwe_array_in,
|
||||
"Output and input pointers must be different for out-of-place "
|
||||
"operations");
|
||||
|
||||
// The output ciphertext might be a boolean block or a radix ciphertext
|
||||
// depending on the case (eq/gt vs max/min) so the amount of blocks to
|
||||
|
||||
@@ -118,9 +118,6 @@ void cuda_integer_unsigned_scalar_div_rem_radix_64_async(
|
||||
uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
|
||||
void const *clear_blocks, void const *h_clear_blocks,
|
||||
uint32_t num_clear_blocks) {
|
||||
PANIC_IF_FALSE(quotient_ct != remainder_ct,
|
||||
"Quotient and remainder pointers must be different for "
|
||||
"out-of-place operations");
|
||||
|
||||
host_integer_unsigned_scalar_div_rem_radix<uint64_t>(
|
||||
CudaStreams(streams), quotient_ct, remainder_ct,
|
||||
@@ -171,9 +168,6 @@ void cuda_integer_signed_scalar_div_rem_radix_64_async(
|
||||
uint64_t const *divisor_has_at_least_one_set,
|
||||
uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
|
||||
uint32_t numerator_bits) {
|
||||
PANIC_IF_FALSE(quotient_ct != remainder_ct,
|
||||
"Quotient and remainder pointers must be different for "
|
||||
"out-of-place operations");
|
||||
|
||||
host_integer_signed_scalar_div_rem_radix<uint64_t>(
|
||||
CudaStreams(streams), quotient_ct, remainder_ct,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user