Compare commits

..

6 Commits

Author SHA1 Message Date
Guillermo Oyarzun
3091ca796c feat(gpu): create noise and pfail tests for rerand 2026-03-28 07:45:19 +01:00
Guillermo Oyarzun
8b6f36286f chore(gpu): coding improvs 2026-03-26 17:25:40 +01:00
Guillermo Oyarzun
beee5b7777 feat(gpu): add pbs128 pattern to multi-bit noise test 2026-03-26 10:39:08 +01:00
Guillermo Oyarzun
0874a19ecf feat(gpu): add cpk ks ms pattern to multi-bit noise tests 2026-03-26 10:38:41 +01:00
Guillermo Oyarzun
4124796b09 feat(gpu): add packing ks multi-bit noise tests 2026-03-26 10:38:41 +01:00
Guillermo Oyarzun
41e6bb4f64 feat(gpu): add br_dp_ks_ms pattern to multi-bit noise tests 2026-03-26 10:38:41 +01:00
357 changed files with 5670 additions and 15573 deletions

View File

@@ -4,9 +4,6 @@ ignore = [
"RUSTSEC-2024-0436",
# Ignoring unmaintained 'bincode' crate. Getting rid of it would be too complex on the short term.
"RUSTSEC-2025-0141",
# Ignoring unsoundness in 'rand' with custom logger. Rand update is currently blocked by
# arkworks and we do not use custom loggers.
"RUSTSEC-2026-0097",
]
[output]

View File

@@ -54,7 +54,7 @@ jobs:
- name: Retrieve data from cache
id: retrieve-data-cache
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: |
utils/tfhe-backward-compat-data/**/*.cbor
@@ -89,7 +89,7 @@ jobs:
- name: Store data in cache
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
continue-on-error: true
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: |
utils/tfhe-backward-compat-data/**/*.cbor

View File

@@ -16,6 +16,7 @@ env:
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
@@ -36,7 +37,6 @@ jobs:
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
versionable_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.versionable_any_changed }}
safe_serialize_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.safe_serialize_any_changed }}
core_crypto_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.core_crypto_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
@@ -64,7 +64,7 @@ jobs:
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
fetch-depth: 0
persist-credentials: "false"
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Check for file changes
@@ -79,7 +79,6 @@ jobs:
- tfhe-zk-pok/**
- utils/tfhe-versionable/**
- utils/tfhe-versionable-derive/**
- utils/tfhe-safe-serialize/**
csprng:
- tfhe-csprng/**
zk_pok:
@@ -87,8 +86,6 @@ jobs:
versionable:
- utils/tfhe-versionable/**
- utils/tfhe-versionable-derive/**
safe_serialize:
- utils/tfhe-safe-serialize/**
core_crypto:
- tfhe/src/core_crypto/**
boolean:
@@ -125,7 +122,6 @@ jobs:
steps.changed-files.outputs.csprng_any_changed == 'true' ||
steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
steps.changed-files.outputs.versionable_any_changed == 'true' ||
steps.changed-files.outputs.safe_serialize_any_changed == 'true' ||
steps.changed-files.outputs.core_crypto_any_changed == 'true' ||
steps.changed-files.outputs.boolean_any_changed == 'true' ||
steps.changed-files.outputs.shortint_any_changed == 'true' ||
@@ -149,7 +145,7 @@ jobs:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
persist-credentials: "false"
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
@@ -174,11 +170,6 @@ jobs:
run: |
make test_versionable
- name: Run tfhe-safe-serialize tests
if: needs.should-run.outputs.safe_serialize_test == 'true'
run: |
make test_safe_serialize
- name: Run core tests
if: needs.should-run.outputs.core_crypto_test == 'true'
run: |
@@ -200,7 +191,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: |
~/.nvm
@@ -213,7 +204,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |

View File

@@ -34,7 +34,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -99,7 +99,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -14,11 +14,12 @@ env:
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
types: [labeled]
types: [ labeled ]
permissions:
contents: read
@@ -31,16 +32,16 @@ jobs:
if: github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved')
runs-on: ubuntu-latest
permissions:
pull-requests: read # Needed to check for file change
pull-requests: read # Needed to check for file change
outputs:
wasm_test: ${{ github.event_name == 'workflow_dispatch' ||
steps.changed-files.outputs.wasm_any_changed }}
steps.changed-files.outputs.wasm_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
fetch-depth: 0
persist-credentials: "false"
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Check for file changes
@@ -62,7 +63,6 @@ jobs:
- tfhe/js_on_wasm_tests/**
- tfhe/web_wasm_parallel_tests/**
- utils/tfhe-versionable/**
- utils/tfhe-safe-serialize/**
- .github/workflows/aws_tfhe_wasm_tests.yml
wasm-tests:
@@ -78,7 +78,7 @@ jobs:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
persist-credentials: "false"
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
@@ -92,7 +92,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: |
~/.nvm
@@ -105,7 +105,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
@@ -128,21 +128,15 @@ jobs:
run: |
make test_nodejs_wasm_api_ci
- name: Run parallel wasm tests
run: |
make test_web_js_api_parallel_chrome_ci
- name: Run wasm_par_mq tests
run: |
make test_wasm_par_mq_chrome_ci
make test_wasm_par_mq_firefox_ci
- name: Run parallel wasm tests
run: |
make test_web_js_api_parallel_chrome_ci
make test_web_js_api_parallel_firefox_ci
- name: Run cross origin wasm tests
run: |
make test_web_js_api_cross_origin_chrome_ci
make test_web_js_api_cross_origin_firefox_ci
- name: Run x86_64/wasm zk compatibility tests
run: |
make test_zk_wasm_x86_compat_ci

View File

@@ -6,9 +6,6 @@ name: backward_compat_pr_change_report
on:
pull_request:
env:
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
permissions:
contents: read
@@ -17,35 +14,9 @@ concurrency:
cancel-in-progress: true
jobs:
should-run:
name: backward_compat_pr_change_report/should-run
runs-on: ubuntu-latest
permissions:
pull-requests: read # Needed to check for file change
outputs:
backward_report: ${{ steps.changed-files.outputs.backward_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
backward:
- utils/tfhe-lints/snapshots/*.json
change-report:
name: backward_compat_pr_change_report/change-report (bpr)
runs-on: ubuntu-latest
needs: should-run
if:
needs.should-run.outputs.backward_report == 'true'
permissions:
pull-requests: write # To send and modify message in the PR
steps:
@@ -79,11 +50,19 @@ jobs:
exit 1
fi
- name: Post/refresh backward-compat report
- name: Find existing comment
if: steps.report.outputs.has_report == 'true'
uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0
id: find-comment
uses: peter-evans/find-comment@b30e6a3c0ed37e7c023ccd3f1db5c6c0b0c23aad # v4.0.0
with:
header: backward-compat-snapshot
hide_and_recreate: true
hide_classify: OUTDATED
path: report.md
issue-number: ${{ github.event.pull_request.number }}
body-includes: '**Backward-compat snapshot:'
- name: Comment on PR
if: steps.report.outputs.has_report == 'true'
uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5.0.0
with:
comment-id: ${{ steps.find-comment.outputs.comment-id }}
issue-number: ${{ github.event.pull_request.number }}
body-path: report.md
edit-mode: replace

View File

@@ -14,12 +14,11 @@ on:
- signed_integer
- integer_compression
- integer_zk
- msm_zk
- shortint
- shortint_oprf
- hlapi_unsigned
- hlapi_signed
- hlapi_erc7984
- hlapi_erc20
- hlapi_dex
- hlapi_noise_squash
- hlapi_kvstore
@@ -93,8 +92,8 @@ jobs:
if inputs_command == "integer_zk":
files_to_parse.append("pke_zk_crs_sizes.csv")
elif inputs_command == "hlapi_erc7984":
files_to_parse.append("erc7984_pbs_count.csv")
elif inputs_command == "hlapi_erc20":
files_to_parse.append("erc20_pbs_count.csv")
elif inputs_command == "hlapi_dex":
files_to_parse.extend(
[

View File

@@ -107,7 +107,7 @@ jobs:
]:
f.write(f"""{env_name}=["{'", "'.join(values_to_join)}"]\n""")
- name: Set matrix arguments outputs
- name: Set martix arguments outputs
id: set_matrix_args
run: | # zizmor: ignore[template-injection] these env variable are safe
{
@@ -126,7 +126,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -223,7 +223,7 @@ jobs:
results_type: ${{ inputs.additional_results_type }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
path: ${{ env.RESULTS_FILENAME }}
@@ -261,7 +261,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -108,14 +108,14 @@ jobs:
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-hlapi-erc7984:
name: benchmark_cpu_weekly/run-benchmarks-hlapi-erc7984
run-benchmarks-hlapi-erc20:
name: benchmark_cpu_weekly/run-benchmarks-hlapi-erc20
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
needs: prepare-inputs
uses: ./.github/workflows/benchmark_cpu_common.yml
with:
command: hlapi_erc7984
additional_file_to_parse: erc7984_pbs_count.csv
command: hlapi_erc20
additional_file_to_parse: erc20_pbs_count.csv
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}

View File

@@ -33,7 +33,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -99,7 +99,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_ct_key_sizes
path: ${{ env.RESULTS_FILENAME }}
@@ -137,7 +137,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -17,10 +17,6 @@ on:
description: "Run GPU core-crypto benchmarks"
type: boolean
default: true
run-gpu-zk-benchmarks:
description: "Run GPU ZK benchmarks"
type: boolean
default: true
run-hpu-benchmarks:
description: "Run HPU benchmarks"
type: boolean
@@ -40,7 +36,7 @@ jobs:
uses: ./.github/workflows/benchmark_cpu_common.yml
if: inputs.run-cpu-benchmarks
with:
command: integer,hlapi_erc7984
command: integer,hlapi_erc20
op_flavor: fast_default
bench_type: both
precisions_set: documentation
@@ -95,7 +91,7 @@ jobs:
with:
profile: multi-h100-sxm5
hardware_name: n3-H100-SXM5x8
command: integer_multi_bit,hlapi_erc7984
command: integer_multi_bit,hlapi_erc20
op_flavor: fast_default
bench_type: both
precisions_set: documentation
@@ -114,7 +110,7 @@ jobs:
uses: ./.github/workflows/benchmark_hpu_common.yml
if: inputs.run-hpu-benchmarks
with:
command: integer,hlapi_erc7984
command: integer,hlapi_erc20
op_flavor: default
bench_type: both
precisions_set: documentation
@@ -169,42 +165,21 @@ jobs:
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-gpu-zk-server:
name: benchmark_documentation/run-benchmarks-gpu-zk-server
uses: ./.github/workflows/benchmark_gpu_common.yml
if: inputs.run-gpu-zk-benchmarks
with:
profile: multi-h100-sxm5
hardware_name: n3-H100-SXM5x8
command: integer_zk
op_flavor: default
bench_type: both
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
generate-svgs-with-benchmarks-run:
name: benchmark-documentation/generate-svgs-with-benchmarks-run
if: ${{ always() &&
(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-gpu-zk-benchmarks || inputs.run-hpu-benchmarks) &&
(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks ||inputs.run-hpu-benchmarks) &&
inputs.generate-svgs }}
needs: [
run-benchmarks-cpu-integer, run-benchmarks-gpu-integer, run-benchmarks-hpu-integer,
run-benchmarks-cpu-zk-server, run-benchmarks-cpu-zk-client,
run-benchmarks-cpu-core-crypto, run-benchmarks-gpu-core-crypto,
run-benchmarks-gpu-zk-server
run-benchmarks-cpu-core-crypto, run-benchmarks-gpu-core-crypto
]
uses: ./.github/workflows/generate_svgs.yml
with:
time_span_days: 5
generate-cpu-svgs: ${{ inputs.run-cpu-benchmarks }}
generate-gpu-svgs: ${{ inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-gpu-zk-benchmarks }}
generate-gpu-svgs: ${{ inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks }}
generate-hpu-svgs: ${{ inputs.run-hpu-benchmarks }}
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
@@ -213,7 +188,7 @@ jobs:
generate-svgs-without-benchmarks-run:
name: benchmark-documentation/generate-svgs-without-benchmarks-run
if: ${{ !(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-gpu-zk-benchmarks || inputs.run-hpu-benchmarks) &&
if: ${{ !(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-hpu-benchmarks) &&
inputs.generate-svgs }}
uses: ./.github/workflows/generate_svgs.yml
with:

View File

@@ -31,13 +31,10 @@ on:
- pbs128
- ks
- ks_pbs
- tfhe_zk_pok
- msm_zk
- integer_zk
- integer_zk_experimental
- integer_aes
- integer_aes256
- hlapi_erc7984
- hlapi_erc20
- hlapi_dex
- hlapi_noise_squash
op_flavor:
@@ -123,8 +120,8 @@ jobs:
if inputs_command == "integer_zk":
files_to_parse.append("pke_zk_crs_sizes.csv")
elif inputs_command == "hlapi_erc7984":
files_to_parse.append("erc7984_pbs_count.csv")
elif inputs_command == "hlapi_erc20":
files_to_parse.append("erc20_pbs_count.csv")
elif inputs_command == "hlapi_dex":
files_to_parse.extend(
[

View File

@@ -89,7 +89,7 @@ jobs:
REF_NAME: ${{ github.ref_name }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_integer_multi_bit_gpu_default
path: ${{ env.RESULTS_FILENAME }}
@@ -173,7 +173,7 @@ jobs:
REF_NAME: ${{ github.ref_name }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_core_crypto
path: ${{ env.RESULTS_FILENAME }}

View File

@@ -111,7 +111,7 @@ jobs:
]:
f.write(f"""{env_name}=["{'", "'.join(values_to_join)}"]\n""")
- name: Set matrix arguments outputs
- name: Set martix arguments outputs
id: set_matrix_args
run: | # zizmor: ignore[template-injection] these env variable are safe
{
@@ -126,11 +126,17 @@ jobs:
needs: prepare-matrix
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
# otherwise we'll try to run the next job on a non-existing on-demand instance.
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
- name: Start remote instance
id: start-remote-instance
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -139,6 +145,25 @@ jobs:
backend: ${{ inputs.backend }}
profile: ${{ inputs.profile }}
- name: Acknowledge remote instance failure
if: steps.start-remote-instance.outcome == 'failure' &&
inputs.profile != 'single-h100'
run: |
echo "Remote instance instance has failed to start (profile provided: '${INPUTS_PROFILE}')"
echo "Permanent instance instance cannot be used as a substitute (profile needed: 'single-h100')"
exit 1
env:
INPUTS_PROFILE: ${{ inputs.profile }}
# This will allow to fallback on permanent instances running on Hyperstack.
- name: Use permanent remote instance
id: use-permanent-instance
if: env.SECRETS_AVAILABLE == 'true' &&
steps.start-remote-instance.outcome == 'failure' &&
inputs.profile == 'single-h100'
run: |
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
# Install dependencies only once since cuda-benchmarks uses a matrix strategy, thus running multiple times.
install-dependencies:
name: benchmark_gpu_common/install-dependencies
@@ -159,6 +184,7 @@ jobs:
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Setup Hyperstack dependencies
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
@@ -270,7 +296,7 @@ jobs:
filenames: ${{ inputs.additional_file_to_parse }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
path: ${{ env.RESULTS_FILENAME }}
@@ -307,13 +333,13 @@ jobs:
teardown-instance:
name: benchmark_gpu_common/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
needs: [ setup-instance, cuda-benchmarks, slack-notify ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -42,7 +42,7 @@ env:
OPTIMIZATION_TARGET: "throughput"
BATCH_SIZE: "5000"
SCHEDULING_POLICY: "MAX_PARALLELISM"
BENCHMARKS: "erc7984"
BENCHMARKS: "erc20"
BRANCH_NAME: ${{ github.ref_name }}
COMMIT_SHA: ${{ github.sha }}
SLAB_SECRET: ${{ secrets.JOB_SECRET }}
@@ -94,7 +94,7 @@ jobs:
steps:
- name: Start remote instance
id: start-remote-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -204,7 +204,7 @@ jobs:
uses: foundry-rs/foundry-toolchain@8789b3e21e6c11b2697f5eb56eddae542f746c10
- name: Cache cargo
uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
with:
path: |
~/.cargo/registry
@@ -214,14 +214,14 @@ jobs:
restore-keys: ${{ runner.os }}-cargo-
- name: Login to GitHub Container Registry
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Chainguard Registry
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: cgr.dev
username: ${{ secrets.CGR_USERNAME }}
@@ -248,13 +248,13 @@ jobs:
npm install && npm run deploy:emptyProxies && npx hardhat compile
working-directory: fhevm/
- name: Profile erc7984 no-cmux benchmark on GPU
- name: Profile erc20 no-cmux benchmark on GPU
run: |
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" \
FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" \
BENCHMARK_TYPE="THROUGHPUT_200" \
OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" \
make -e "profile_erc7984_gpu"
make -e "profile_erc20_gpu"
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
- name: Get nsys profile name
@@ -271,7 +271,7 @@ jobs:
- name: Upload profile artifact
env:
REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ env.REPORT_NAME }}
path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}
@@ -302,7 +302,7 @@ jobs:
working-directory: fhevm/
- name: Upload parsed results artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
path: fhevm/$${{ env.RESULTS_FILENAME }}
@@ -333,7 +333,7 @@ jobs:
steps:
- name: Stop remote instance
id: stop-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -14,7 +14,7 @@ on:
- integer
- hlapi_unsigned
- hlapi_signed
- hlapi_erc7984
- hlapi_erc20
op_flavor:
description: "Operations set to run"
type: choice

View File

@@ -95,7 +95,7 @@ jobs:
]:
f.write(f"""{env_name}=["{'", "'.join(values_to_join)}"]\n""")
- name: Set matrix arguments outputs
- name: Set martix arguments outputs
id: set_matrix_args
run: | # zizmor: ignore[template-injection] these env variable are safe
{
@@ -185,7 +185,7 @@ jobs:
BENCH_TYPE: ${{ matrix.bench_type }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_${{ matrix.bench_type }}_${{ matrix.command }}_benchmarks
path: ${{ env.RESULTS_FILENAME }}

View File

@@ -143,7 +143,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -280,7 +280,7 @@ jobs:
BENCH_TYPE: ${{ env.__TFHE_RS_BENCH_TYPE }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_regression_${{ env.RESULTS_FILE_SHA }} # RESULT_FILE_SHA is needed to avoid collision between matrix.command runs
path: ${{ env.RESULTS_FILENAME }}
@@ -387,7 +387,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -40,7 +40,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -99,7 +99,7 @@ jobs:
REF_NAME: ${{ github.ref_name }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_fft
path: ${{ env.RESULTS_FILENAME }}
@@ -137,7 +137,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -40,7 +40,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -99,7 +99,7 @@ jobs:
REF_NAME: ${{ github.ref_name }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_ntt
path: ${{ env.RESULTS_FILENAME }}
@@ -137,7 +137,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -63,7 +63,7 @@ jobs:
with open(env_file, "a") as f:
f.write(f"""BROWSER=["{'", "'.join(split_browser)}"]\n""")
- name: Set matrix arguments output
- name: Set martix arguments output
id: set_matrix_arg
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "browser=${{ toJSON(env.BROWSER) }}" >> "${GITHUB_OUTPUT}"
@@ -77,7 +77,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -124,7 +124,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: |
~/.nvm
@@ -137,7 +137,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
@@ -158,9 +158,9 @@ jobs:
env:
BROWSER: ${{ matrix.browser }}
- name: Run benchmarks (cross origin)
- name: Run benchmarks (unsafe coop)
run: |
make bench_web_js_api_cross_origin_"${BROWSER}"_ci
make bench_web_js_api_unsafe_coop_"${BROWSER}"_ci
env:
BROWSER: ${{ matrix.browser }}
@@ -180,7 +180,7 @@ jobs:
REF_NAME: ${{ github.ref_name }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
path: ${{ env.RESULTS_FILENAME }}
@@ -218,7 +218,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -94,7 +94,7 @@ jobs:
with open(env_file, "a") as f:
f.write(f"""RUNNERS=["{'", "'.join(runners)}"]\n""")
- name: Set matrix runners outputs
- name: Set martix runners outputs
id: set_matrix_runners
run: | # zizmor: ignore[template-injection] these env variable are safe
echo "runners=${{ toJSON(env.RUNNERS) }}" >> "${GITHUB_OUTPUT}"
@@ -138,7 +138,7 @@ jobs:
- name: Node cache restoration
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2'
id: node-cache
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: |
~/.nvm
@@ -151,7 +151,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
if: inputs.run-pcc-cpu-batch == 'pcc_batch_2' && steps.node-cache.outputs.cache-hit != 'true'
with:
path: |

View File

@@ -63,7 +63,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -146,7 +146,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -50,7 +50,7 @@ jobs:
version: ${{ steps.get_zizmor.outputs.version }}
- name: Ensure SHA pinned actions
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@ca46236c6ce584ae24bc6283ba8dcf4b3ec8a066 # v5.0.4
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@70c4af2ed5282c51ba40566d026d6647852ffa3e # v5.0.1
with:
allowlist: |
slsa-framework/slsa-github-generator

View File

@@ -74,7 +74,7 @@ jobs:
make test_shortint_cov
- name: Upload tfhe coverage to Codecov
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
with:
token: ${{ secrets.CODECOV_TOKEN }}
@@ -88,7 +88,7 @@ jobs:
make test_integer_cov
- name: Upload tfhe coverage to Codecov
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
with:
token: ${{ secrets.CODECOV_TOKEN }}

View File

@@ -87,7 +87,7 @@ jobs:
- name: Upload tables
if: inputs.backend_comparison == false
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_${{ inputs.backend }}_${{ inputs.layer }}_subset_${{inputs.bench_subset}}_${{ inputs.pbs_kind }}_${{ inputs.bench_type }}_tables
# This will upload all the file generated
@@ -111,7 +111,7 @@ jobs:
- name: Upload comparison tables
if: inputs.backend_comparison == true
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
with:
name: ${{ github.sha }}_backends_comparison_tables
# This will upload all the file generated

View File

@@ -209,98 +209,60 @@ jobs:
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
gpu-zk-server-latency-table:
name: generate_documentation_svgs/gpu-zk-server-latency-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-gpu-svgs
with:
backend: gpu
hardware_name: n3-H100-SXM5x8
layer: integer
bench_subset: zk
pbs_kind: multi_bit
grouping_factor: 4
bench_type: latency
time_span_days: ${{ inputs.time_span_days }}
output_filename: gpu-zk-benchmark-latency
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
gpu-zk-server-throughput-table:
name: generate_documentation_svgs/gpu-zk-server-throughput-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-gpu-svgs
with:
backend: gpu
hardware_name: n3-H100-SXM5x8
layer: integer
bench_subset: zk
pbs_kind: multi_bit
grouping_factor: 4
bench_type: throughput
time_span_days: ${{ inputs.time_span_days }}
output_filename: gpu-zk-benchmark-throughput
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
# -----------------------------------------------------------
# ERC7984 benchmarks tables
# ERC20 benchmarks tables
# -----------------------------------------------------------
cpu-erc7984-latency-throughput-table:
name: generate_documentation_svgs/cpu-erc7984-latency-throughput-table
cpu-erc20-latency-throughput-table:
name: generate_documentation_svgs/cpu-erc20-latency-throughput-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-cpu-svgs
with:
backend: cpu
hardware_name: hpc7a.96xlarge
layer: hlapi
bench_subset: erc7984
bench_subset: erc20
pbs_kind: classical
bench_type: both
time_span_days: ${{ inputs.time_span_days }}
output_filename: cpu-hlapi-erc7984-benchmark-latency-throughput
output_filename: cpu-hlapi-erc20-benchmark-latency-throughput
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
gpu-erc7984-latency-throughput-table:
name: generate_documentation_svgs/gpu-erc7984-latency-throughput-table
gpu-erc20-latency-throughput-table:
name: generate_documentation_svgs/gpu-erc20-latency-throughput-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-gpu-svgs
with:
backend: gpu
hardware_name: n3-H100-SXM5x8
layer: hlapi
bench_subset: erc7984
bench_subset: erc20
pbs_kind: multi_bit
grouping_factor: 4
bench_type: both
time_span_days: ${{ inputs.time_span_days }}
output_filename: gpu-hlapi-erc7984-benchmark-h100x8-sxm5-latency-throughput
output_filename: gpu-hlapi-erc20-benchmark-h100x8-sxm5-latency-throughput
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
hpu-erc7984-latency-throughput-table:
name: generate_documentation_svgs/hpu-erc7984-latency-throughput-table
hpu-erc20-latency-throughput-table:
name: generate_documentation_svgs/hpu-erc20-latency-throughput-table
uses: ./.github/workflows/generate_svg_common.yml
if: inputs.generate-hpu-svgs
with:
backend: hpu
hardware_name: hpu_x1
layer: hlapi
bench_subset: erc7984
bench_subset: erc20
pbs_kind: classical
bench_type: both
time_span_days: ${{ inputs.time_span_days }}
output_filename: hpu-hlapi-erc7984-benchmark-hpux1-latency-throughput.svg
output_filename: hpu-hlapi-erc20-benchmark-hpux1-latency-throughput.svg
secrets:
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}

View File

@@ -43,7 +43,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -149,7 +149,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -23,7 +23,7 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
types: [ labeled, opened, synchronize ]
types: [ labeled ]
permissions:
contents: read
@@ -38,7 +38,6 @@ jobs:
pull-requests: read # Needed to check for file change
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -63,24 +62,29 @@ jobs:
- tfhe/src/integer/server_key/radix_parallel/tests_cases_unsigned.rs
- tfhe/src/shortint/parameters/**
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_core_h100_tests.yml'
core_crypto:
- tfhe/src/core_crypto/gpu/**
setup-instance:
name: gpu_core_h100_tests/setup-instance
needs: should-run
if: github.event_name != 'pull_request' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
# otherwise we'll try to run the next job on a non-existing on-demand instance.
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -89,6 +93,13 @@ jobs:
backend: hyperstack
profile: single-h100
# This will allow to fallback on permanent instances running on Hyperstack.
- name: Use permanent remote instance
id: use-permanent-instance
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
run: |
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
@@ -121,6 +132,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Setup Hyperstack dependencies
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
@@ -164,14 +176,14 @@ jobs:
teardown-instance:
name: gpu_core_h100_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -77,7 +77,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -182,7 +182,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -25,11 +25,17 @@ jobs:
name: gpu_full_h100_tests/setup-instance
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
# otherwise we'll try to run the next job on a non-existing on-demand instance.
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
- name: Start remote instance
id: start-remote-instance
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -38,6 +44,13 @@ jobs:
backend: hyperstack
profile: single-h100
# This will allow to fallback on permanent instances running on Hyperstack.
- name: Use permanent remote instance
id: use-permanent-instance
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
run: |
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
cuda-tests-linux:
name: gpu_full_h100_tests/cuda-tests-linux
needs: [ setup-instance ]
@@ -61,6 +74,7 @@ jobs:
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Setup Hyperstack dependencies
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
@@ -104,13 +118,13 @@ jobs:
teardown-instance:
name: gpu_full_h100_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -80,7 +80,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -186,7 +186,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -23,7 +23,7 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
types: [ labeled, opened, synchronize ]
types: [ labeled ]
permissions:
contents: read
@@ -38,7 +38,6 @@ jobs:
pull-requests: read # Needed to check for file change
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -66,23 +65,27 @@ jobs:
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_hlapi_h100_tests.yml'
core_crypto:
- tfhe/src/core_crypto/gpu/**
setup-instance:
name: gpu_hlapi_h100_tests/setup-instance
needs: should-run
if: github.event_name != 'pull_request' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
# otherwise we'll try to run the next job on a non-existing on-demand instance.
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -91,6 +94,13 @@ jobs:
backend: hyperstack
profile: single-h100
# This will allow to fallback on permanent instances running on Hyperstack.
- name: Use permanent remote instance
id: use-permanent-instance
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
run: |
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
@@ -123,6 +133,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Setup Hyperstack dependencies
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
@@ -173,14 +184,14 @@ jobs:
teardown-instance:
name: gpu_hlapi_h100_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -17,8 +17,8 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
schedule:
# Weekly tests will be triggered every Monday at 8p.m.
- cron: "0 20 * * 1"
# Nightly tests will be triggered each evening 8p.m.
- cron: "0 20 * * *"
pull_request:
@@ -28,48 +28,17 @@ permissions:
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
jobs:
should-run:
name: gpu_integer_long_run_tests/should-run
runs-on: ubuntu-latest
permissions:
pull-requests: read # Needed to check for file change
outputs:
is_needed_in_gpu_ci: ${{ env.IS_PR == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
with:
files_yaml: |
gpu:
- tfhe/Cargo.toml
- tfhe/build.rs
- backends/tfhe-cuda-backend/**
- tfhe/src/core_crypto/gpu/**
- tfhe/src/integer/gpu/**
- tfhe/src/shortint/parameters/**
- '.github/workflows/gpu_integer_long_run_tests.yml'
setup-instance:
name: gpu_integer_long_run_tests/setup-instance
needs: [should-run]
if: github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
needs.should-run.outputs.is_needed_in_gpu_ci == 'true'
if: github.event_name != 'schedule' ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -143,7 +112,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -74,7 +74,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -166,7 +166,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -74,7 +74,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -166,7 +166,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -38,7 +38,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -131,10 +131,6 @@ jobs:
env:
GCC_VERSION: ${{ matrix.gcc }}
- name: Run semgrep and lint checks on CUDA code
run: |
make semgrep_and_lint_gpu_code
- name: Run fmt checks
run: |
make check_fmt_gpu
@@ -143,6 +139,10 @@ jobs:
run: |
make pcc_gpu
- name: Run semgrep and lint checks on CUDA code
run: |
make semgrep_and_lint_gpu_code
- name: Run semver checks on tfhe-cuda-backend
run: |
make semver_check_cuda_backend
@@ -176,7 +176,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -63,6 +63,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_signed_integer_classic_tests.yml'
- scripts/integer-tests.sh
@@ -79,7 +80,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -168,7 +169,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -23,7 +23,7 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
types: [ labeled, opened, synchronize ]
types: [ labeled ]
permissions:
contents: read
@@ -38,7 +38,6 @@ jobs:
pull-requests: read # Needed to check for file change
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -64,25 +63,30 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_signed_integer_h100_tests.yml'
- scripts/integer-tests.sh
core_crypto:
- tfhe/src/core_crypto/gpu/**
setup-instance:
name: gpu_signed_integer_h100_tests/setup-instance
needs: should-run
if: github.event_name != 'pull_request' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
# otherwise we'll try to run the next job on a non-existing on-demand instance.
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -91,6 +95,13 @@ jobs:
backend: hyperstack
profile: single-h100
# This will allow to fallback on permanent instances running on Hyperstack.
- name: Use permanent remote instance
id: use-permanent-instance
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
run: |
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
@@ -123,6 +134,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Setup Hyperstack dependencies
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
@@ -164,14 +176,14 @@ jobs:
teardown-instance:
name: gpu_signed_integer_h100_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -64,6 +64,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_signed_integer_tests.yml'
- scripts/integer-tests.sh
@@ -80,7 +81,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -177,7 +178,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -63,6 +63,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_unsigned_integer_classic_tests.yml'
- scripts/integer-tests.sh
@@ -79,7 +80,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -168,7 +169,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -23,7 +23,7 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
types: [ labeled, opened, synchronize ]
types: [ labeled ]
permissions:
contents: read
@@ -38,7 +38,6 @@ jobs:
pull-requests: read # Needed to check for file change
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -64,25 +63,30 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_unsigned_integer_h100_tests.yml'
- scripts/integer-tests.sh
core_crypto:
- tfhe/src/core_crypto/gpu/**
setup-instance:
name: gpu_unsigned_integer_h100_tests/setup-instance
needs: should-run
if: github.event_name == 'workflow_dispatch' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
# otherwise we'll try to run the next job on a non-existing on-demand instance.
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
continue-on-error: true
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -91,6 +95,13 @@ jobs:
backend: hyperstack
profile: single-h100
# This will allow to fallback on permanent instances running on Hyperstack.
- name: Use permanent remote instance
id: use-permanent-instance
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
run: |
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
@@ -123,6 +134,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Setup Hyperstack dependencies
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
@@ -164,14 +176,14 @@ jobs:
teardown-instance:
name: gpu_unsigned_integer_h100_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -64,6 +64,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_unsigned_integer_tests.yml'
- scripts/integer-tests.sh
@@ -80,7 +81,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -177,7 +178,7 @@ jobs:
- name: Stop instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -51,13 +51,7 @@ jobs:
with:
files_yaml: |
gpu:
- tfhe/Cargo.toml
- tfhe/build.rs
- backends/tfhe-cuda-backend/**
- backends/zk-cuda-backend/**
- tfhe/src/shortint/parameters/**
- tfhe/src/zk/**
- tfhe-zk-pok/**
- '.github/workflows/gpu_zk_tests.yml'
- ci/slab.toml
@@ -73,7 +67,7 @@ jobs:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -132,9 +126,6 @@ jobs:
- name: Run zk-cuda-backend integration tests
run: |
make test_zk_cuda_backend
make test_zk_pok_experimental_gpu
make test_integer_zk_gpu
make test_integer_zk_experimental_gpu
slack-notify:
name: gpu_zk_tests/slack-notify
@@ -167,7 +158,7 @@ jobs:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -62,7 +62,7 @@ jobs:
PACKAGE: ${{ inputs.package-name }}
run: |
cargo package -p "${PACKAGE}"
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: crate-${{ inputs.package-name }}
path: target/package/*.crate
@@ -107,7 +107,7 @@ jobs:
path: target/package
- name: Authenticate on registry
uses: rust-lang/crates-io-auth-action@bbd81622f20ce9e2dd9622e3218b975523e45bbe # v1.0.4
uses: rust-lang/crates-io-auth-action@b7e9a28eded4986ec6b1fa40eeee8f8f165559ec # v1.0.3
id: auth
- name: Publish crate.io package

View File

@@ -1,36 +1,12 @@
# Common workflow to make crate release for CUDA backend
name: make_release_common_cuda
name: make_release_cuda
on:
workflow_call:
workflow_dispatch:
inputs:
package-name:
type: string
required: true
dry-run:
dry_run:
description: "Dry-run"
type: boolean
default: true
secrets:
REPO_CHECKOUT_TOKEN:
required: true
SLAB_ACTION_TOKEN:
required: true
SLAB_BASE_URL:
required: true
SLAB_URL:
required: true
JOB_SECRET:
required: true
SLACK_CHANNEL:
required: true
BOT_USERNAME:
required: true
SLACK_WEBHOOK:
required: true
ALLOWED_TEAM:
required: true
READ_ORG_TOKEN:
required: true
env:
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
@@ -45,15 +21,15 @@ permissions: {}
jobs:
verify-triggering-actor:
name: make_release_common_cuda/verify-triggering-actor
name: make_release_cuda/verify-triggering-actor
if: startsWith(github.ref, 'refs/tags/')
uses: ./.github/workflows/verify_triggering_actor.yml
secrets:
ALLOWED_TEAM: ${{ secrets.ALLOWED_TEAM }}
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
setup-instance:
name: make_release_common_cuda/setup-instance
name: make_release_cuda/setup-instance
needs: verify-triggering-actor
runs-on: ubuntu-latest
outputs:
@@ -61,7 +37,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -71,7 +47,7 @@ jobs:
profile: gpu-build
package:
name: make_release_common_cuda/package
name: make_release_cuda/package
needs: setup-instance
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
outputs:
@@ -100,6 +76,7 @@ jobs:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
{
@@ -112,6 +89,7 @@ jobs:
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
@@ -123,14 +101,12 @@ jobs:
GCC_VERSION: ${{ matrix.gcc }}
- name: Prepare package
env:
PACKAGE: ${{ inputs.package-name }}
run: |
cargo package -p "${PACKAGE}"
cargo package -p tfhe-cuda-backend
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: crate-${{ inputs.package-name }}
name: crate-tfhe-cuda-backend
path: target/package/*.crate
- name: generate hash
@@ -138,8 +114,8 @@ jobs:
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
provenance:
name: make_release_common_cuda/provenance
if: ${{ !inputs.dry-run }}
name: make_release_cuda/provenance
if: ${{ !inputs.dry_run }}
needs: [package]
# This action cannot be pinned to a specific commit (see https://github.com/slsa-framework/slsa-github-generator/blob/main/README.md#referencing-slsa-builders-and-generators)
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0 # zizmor: ignore[unpinned-uses] as said above SLSA cannot be pinned by tag today
@@ -152,7 +128,7 @@ jobs:
base64-subjects: ${{ needs.package.outputs.hash }}
publish-cuda-release:
name: make_release_common_cuda/publish-cuda-release
name: make_release_cuda/publish-cuda-release
needs: [setup-instance, package] # for comparing hashes
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
permissions:
@@ -174,6 +150,7 @@ jobs:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
{
@@ -186,6 +163,7 @@ jobs:
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
@@ -196,33 +174,25 @@ jobs:
env:
GCC_VERSION: ${{ matrix.gcc }}
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
persist-credentials: "false"
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Download artifact
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: crate-${{ inputs.package-name }}
name: crate-tfhe-cuda-backend
path: target/package
- name: Authenticate on registry
uses: rust-lang/crates-io-auth-action@bbd81622f20ce9e2dd9622e3218b975523e45bbe # v1.0.4
uses: rust-lang/crates-io-auth-action@b7e9a28eded4986ec6b1fa40eeee8f8f165559ec # v1.0.3
id: auth
- name: Publish crate.io package
env:
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
PACKAGE: ${{ inputs.package-name }}
DRY_RUN: ${{ inputs.dry-run && '--dry-run' || '' }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
# would fail. This is safe since DRY_RUN is handled in the env section above.
# shellcheck disable=SC2086
cargo publish -p "${PACKAGE}" ${DRY_RUN}
cargo publish -p tfhe-cuda-backend ${DRY_RUN}
- name: Generate hash
id: published_hash
@@ -234,7 +204,7 @@ jobs:
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
env:
SLACK_COLOR: failure
SLACK_MESSAGE: "SLSA ${{ inputs.package-name }} crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "SLSA tfhe-cuda-backend crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
@@ -242,17 +212,17 @@ jobs:
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "${{ inputs.package-name }} release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: make_release_common_cuda/teardown-instance
name: make_release_cuda/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [setup-instance, publish-cuda-release]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -262,7 +232,7 @@ jobs:
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (${{ inputs.package-name }} release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (publish-cuda-release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -16,10 +16,6 @@ on:
description: "Push web js package"
type: boolean
default: true
push_web_compat_package:
description: "Push web compat (cross-origin) js package"
type: boolean
default: true
push_node_package:
description: "Push node js package"
type: boolean
@@ -103,23 +99,6 @@ jobs:
tag: ${{ env.NPM_TAG }}
provenance: true
- name: Build web compat (cross-origin) package
if: ${{ inputs.push_web_compat_package }}
run: |
rm -rf tfhe/pkg
make build_web_js_api
sed -i 's/"tfhe"/"tfhe-compat"/g' tfhe/pkg/package.json
- name: Publish web compat (cross-origin) package
if: ${{ inputs.push_web_compat_package }}
uses: JS-DevTools/npm-publish@0fd2f4369c5d6bcfcde6091a7c527d810b9b5c3f
with:
package: tfhe/pkg/package.json
dry-run: ${{ inputs.dry_run }}
tag: ${{ env.NPM_TAG }}
provenance: true
- name: Build Node package
if: ${{ inputs.push_node_package }}
run: |

View File

@@ -1,44 +0,0 @@
# Publish new release of tfhe-rs CUDA backend on crates.io.
name: make_release_tfhe_cuda
on:
workflow_dispatch:
inputs:
dry_run:
description: "Dry-run"
type: boolean
default: true
env:
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
permissions: {}
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
jobs:
make-release:
name: make_release_tfhe_cuda/make-release
uses: ./.github/workflows/make_release_common_cuda.yml
with:
package-name: "tfhe-cuda-backend"
dry-run: ${{ inputs.dry_run }}
permissions:
actions: read # Needed to detect the GitHub Actions environment
id-token: write # Needed to create the provenance via GitHub OIDC
contents: write # Needed to upload assets/artifacts
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
SLAB_URL: ${{ secrets.SLAB_URL }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}

View File

@@ -1,32 +0,0 @@
name: make_release_tfhe_safe_serialize
on:
workflow_dispatch:
inputs:
dry_run:
description: "Dry-run"
type: boolean
default: true
permissions: {}
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
jobs:
make-release:
name: make_release_tfhe_safe_serialize/make-release
uses: ./.github/workflows/make_release_common.yml
with:
package-name: "tfhe-safe-serialize"
dry-run: ${{ inputs.dry_run }}
permissions:
actions: read # Needed to detect the GitHub Actions environment
id-token: write # Needed to create the provenance via GitHub OIDC
contents: write # Needed to upload assets/artifacts
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

View File

@@ -1,44 +0,0 @@
# Publish new release of CUDA Zero-Knowledge primitives on crates.io.
name: make_release_zk_cuda
on:
workflow_dispatch:
inputs:
dry_run:
description: "Dry-run"
type: boolean
default: true
env:
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
permissions: {}
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
jobs:
make-release:
name: make_release_zk_cuda/make-release
uses: ./.github/workflows/make_release_common_cuda.yml
with:
package-name: "zk-cuda-backend"
dry-run: ${{ inputs.dry_run }}
permissions:
actions: read # Needed to detect the GitHub Actions environment
id-token: write # Needed to create the provenance via GitHub OIDC
contents: write # Needed to upload assets/artifacts
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ALLOWED_TEAM: ${{ secrets.RELEASE_TEAM }}
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
SLAB_URL: ${{ secrets.SLAB_URL }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}

View File

@@ -53,7 +53,7 @@ jobs:
- name: Restore Sagemath image from cache
id: docker-cache
uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: /tmp/sagemath_image
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}
@@ -76,7 +76,7 @@ jobs:
- name: Store Sagemath image in cache
if: steps.docker-cache.outputs.cache-hit != 'true'
continue-on-error: true
uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 #v5.0.3
with:
path: /tmp/sagemath_image
key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}

1
.gitignore vendored
View File

@@ -25,7 +25,6 @@ dieharder_run.log
# Cuda local build
backends/tfhe-cuda-backend/cuda/cmake-build-debug/
backends/tfhe-cuda-backend/cuda/build/
# WASM tests
tfhe/web_wasm_parallel_tests/server.PID

View File

@@ -19,7 +19,6 @@ members = [
"utils/tfhe-backward-compat-checker",
"utils/tfhe-backward-compat-data",
"utils/tfhe-backward-compat-data/crates/add_new_version",
"utils/tfhe-safe-serialize",
"utils/tfhe-versionable",
"utils/tfhe-versionable-derive",
"utils/wasm-par-mq",
@@ -45,7 +44,6 @@ rand = "0.8"
rayon = "1.11"
serde = { version = "1.0", default-features = false }
wasm-bindgen = { version = "0.2.114" }
wasm-bindgen-futures = { version = "0.4.56" }
# js-sys (at this point in time) automatically enables the unsafe-eval feature which we do not want
# this does not prevent other deps from enabling it, but it at least conveys our need to not have it
# we still enable std, which was part of default before

View File

@@ -1,6 +1,6 @@
BSD 3-Clause Clear License
Copyright © 2026 ZAMA.
Copyright © 2025 ZAMA.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,

254
Makefile
View File

@@ -122,12 +122,6 @@ install_build_wasm32_target:
( echo "Unable to install wasm32-unknown-unknown target toolchain, check your rustup installation. \
Rustup can be downloaded at https://rustup.rs/" && exit 1 )
.PHONY: install_check_wasm32_target # Install the wasm32 toolchain used for checks
install_check_wasm32_target:
rustup target add wasm32-unknown-unknown --toolchain "$(RS_CHECK_TOOLCHAIN)" || \
( echo "Unable to install wasm32-unknown-unknown target toolchain, check your rustup installation. \
Rustup can be downloaded at https://rustup.rs/" && exit 1 )
.PHONY: install_cargo_nextest # Install cargo nextest used for shortint tests
install_cargo_nextest:
@cargo nextest --version > /dev/null 2>&1 || \
@@ -312,7 +306,7 @@ semgrep_and_lint_gpu_code: semgrep_lint_setup_venv
find "$(TFHECUDA_SRC)" -name '*.h' -o -name '*.cuh' -o -name '*.cu' \
| grep -v '/cmake-build-debug/' \
| grep -v '/build/' \
| xargs venv/bin/semgrep --error --config "$(TFHECUDA_SRC)/.semgrep/release-ordering.yaml" --scan-unknown-extensions
| xargs venv/bin/semgrep --config "$(TFHECUDA_SRC)/.semgrep/release-ordering.yaml" --scan-unknown-extensions
venv/bin/python3 "scripts/check_scratch_cleanup.py"
.PHONY: semver_check_cuda_backend # Run semver checks on tfhe-cuda-backend
@@ -356,23 +350,23 @@ check_fmt_js: check_nvm_installed
.PHONY: check_fmt_toml # Check TOML files format
check_fmt_toml: install_taplo
@RUST_LOG=warn taplo fmt --check || \
{ echo "TOML files format check failed. Please run 'make fmt_toml'"; exit 1; }
echo "TOML files format check failed. Please run 'make fmt_toml'"
.PHONY: check_typos # Check for typos in codebase
check_typos: install_typos_checker
@git ls-files ":!*.png" ":!*.cbor" ":!*.bcode" ":!*.ico" ":!*/twiddles.cu" | typos --file-list - && echo "No typos found"
@typos && echo "No typos found"
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
clippy_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean,shortint,integer,internal-keycache,gpu,gpu-experimental-zk,pbs-stats,extended-types,zk-pok \
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats,extended-types,zk-pok \
--all-targets \
-p tfhe -- --no-deps -D warnings
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
check_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
--features=boolean,shortint,integer,internal-keycache,gpu,gpu-experimental-zk,pbs-stats \
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
--all-targets \
-p tfhe
@@ -386,7 +380,7 @@ clippy_hpu: install_rs_check_toolchain
.PHONY: clippy_gpu_hpu # Run clippy lints on tfhe with "gpu" and "hpu" enabled
clippy_gpu_hpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean,shortint,integer,internal-keycache,gpu,gpu-experimental-zk,hpu,pbs-stats,extended-types,zk-pok \
--features=boolean,shortint,integer,internal-keycache,gpu,hpu,pbs-stats,extended-types,zk-pok \
--all-targets \
-p tfhe -- --no-deps -D warnings
@@ -479,7 +473,7 @@ clippy_rustdoc_gpu: install_rs_check_toolchain
fi && \
CARGO_TERM_QUIET=true CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu,gpu-experimental-zk \
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu \
-p tfhe -- --nocapture
.PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
@@ -490,17 +484,11 @@ clippy_c_api: install_rs_check_toolchain
.PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
clippy_js_wasm_api: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,extended-types \
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,zk-pok,extended-types \
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,zk-pok,extended-types,parallel-wasm-api \
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,zk-pok,extended-types,cross-origin-wasm-api \
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,extended-types \
-p tfhe -- --no-deps -D warnings
.PHONY: clippy_tasks # Run clippy lints on helper tasks crate.
@@ -541,15 +529,6 @@ clippy_zk_pok: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p tfhe-zk-pok --features=experimental -- --no-deps -D warnings
.PHONY: clippy_zk_pok_wasm # Run clippy lints on tfhe-zk-pok for wasm32 target
clippy_zk_pok_wasm: install_rs_check_toolchain install_check_wasm32_target
RUSTFLAGS="$(WASM_RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--target wasm32-unknown-unknown \
-p tfhe-zk-pok -- --no-deps -D warnings
RUSTFLAGS="$(WASM_RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--target wasm32-unknown-unknown \
-p tfhe-zk-pok --features cross-origin-wasm -- --no-deps -D warnings
.PHONY: clippy_versionable # Run clippy lints on tfhe-versionable
clippy_versionable: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
@@ -557,11 +536,6 @@ clippy_versionable: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p tfhe-versionable -- --no-deps -D warnings
.PHONY: clippy_safe_serialize # Run clippy lints on tfhe-safe-serialize
clippy_safe_serialize: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p tfhe-safe-serialize -- --no-deps -D warnings
.PHONY: clippy_param_dedup # Run clippy lints on param_dedup tool
clippy_param_dedup: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
@@ -587,28 +561,15 @@ clippy_backward_compat_data: install_rs_check_toolchain # the toolchain is selec
echo "Cannot run clippy for backward compat crate on non x86 platform for now."; \
fi
.PHONY: check_backward_compat_locks_did_not_change # Check backward compat Cargo.lock files are up to date
check_backward_compat_locks_did_not_change: install_rs_check_toolchain
@for crate in `ls -1 $(BACKWARD_COMPAT_DATA_DIR)/crates/ | grep generate_`; do \
echo "checking Cargo.lock for $$crate"; \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options \
-C $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate metadata --locked --format-version 1 > /dev/null || \
( echo "Cargo.lock for $$crate is out of date. Update it with:" && \
echo " cd $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate && cargo metadata --format-version 1 > /dev/null" && \
echo "then commit the updated Cargo.lock." && exit 1 ); \
done
.PHONY: clippy_test_vectors # Run clippy lints on the test vectors app
clippy_test_vectors: install_rs_check_toolchain
cd apps/test-vectors; RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p tfhe-test-vectors -- --no-deps -D warnings
# WARNING: This target is not directly run in CI. When adding a subtarget here,
# MAKE SURE TO ALSO ADD IT TO A PCC BATCH BELOW
.PHONY: clippy_all # Run all clippy targets
clippy_all: clippy_rustdoc clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets \
clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_tfhe_csprng clippy_zk_pok clippy_zk_pok_wasm clippy_trivium \
clippy_versionable clippy_safe_serialize clippy_tfhe_lints clippy_ws_tests clippy_bench clippy_param_dedup \
clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_tfhe_csprng clippy_zk_pok clippy_trivium \
clippy_versionable clippy_tfhe_lints clippy_ws_tests clippy_bench clippy_param_dedup \
clippy_test_vectors clippy_backward_compat_data clippy_wasm_par_mq
.PHONY: clippy_fast # Run main clippy targets
@@ -705,7 +666,7 @@ build_c_api: install_rs_check_toolchain
.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
build_c_api_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types,gpu,gpu-experimental-zk \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types,gpu \
-p tfhe
.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
@@ -714,14 +675,11 @@ build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
-p tfhe
.PHONY: build_web_js_api # Build the js API targeting the web browser, in sequential or cross origin parallelism modes.
.PHONY: build_web_js_api # Build the js API targeting the web browser
build_web_js_api: install_wasm_pack
cd tfhe && \
RUSTFLAGS="$(WASM_RUSTFLAGS)" wasm-pack build --release --target=web \
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok,extended-types,cross-origin-wasm-api && \
find pkg/snippets -type f -iname worker_helpers.js -exec sed -i 's|import("../../..")|import("../../../tfhe.js")|g' {} \;
cp utils/wasm-par-mq/js/coordinator.js tfhe/pkg/
jq '.files += ["snippets"]' tfhe/pkg/package.json > tmp_pkg.json && mv -f tmp_pkg.json tfhe/pkg/package.json
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok,extended-types
.PHONY: build_web_js_api_parallel # Build the js API targeting the web browser with parallelism support
# parallel wasm requires specific build options, see https://github.com/rust-lang/rust/pull/147225
@@ -807,7 +765,7 @@ test_zk_cuda_backend:
.PHONY: test_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend test_zk_cuda_backend
test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
test_core_crypto_gpu:
@@ -1243,31 +1201,12 @@ test_tfhe_csprng_big_endian: install_cargo_cross
RUSTFLAGS="" cross test --profile $(CARGO_PROFILE) \
-p tfhe-csprng --target=powerpc64-unknown-linux-gnu
.PHONY: test_zk_pok # Run tfhe-zk-pok tests
test_zk_pok:
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
-p tfhe-zk-pok --features experimental
.PHONY: test_zk_pok_experimental_gpu # Run tfhe-zk-pok GPU-accelerated tests
test_zk_pok_experimental_gpu:
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
-p tfhe-zk-pok --features experimental,gpu-experimental -- gpu
.PHONY: test_integer_zk_gpu # Run tfhe-zk-pok tests
test_integer_zk_gpu:
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
--features=integer,zk-pok,gpu -p tfhe -- \
integer::gpu::zk::
.PHONY: test_integer_zk_experimental_gpu # Run tfhe-zk-pok tests
test_integer_zk_experimental_gpu:
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
--features=integer,zk-pok,gpu,gpu-experimental-zk -p tfhe -- \
integer::gpu::zk::
.PHONY: test_zk_cuda # Run all GPU MSM integration tests (CPU vs GPU comparison + integration test)
test_zk_cuda: test_zk_cuda_backend test_zk_pok_experimental_gpu test_integer_zk_gpu test_integer_zk_experimental_gpu
.PHONY: test_zk_wasm_x86_compat_ci
test_zk_wasm_x86_compat_ci: check_nvm_installed
source ~/.nvm/nvm.sh && \
@@ -1286,11 +1225,6 @@ test_versionable:
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
--all-targets -p tfhe-versionable
.PHONY: test_safe_serialize # Run tests for tfhe-safe-serialize subcrate
test_safe_serialize:
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
--all-targets -p tfhe-safe-serialize
# The backward compat data folder holds historical binary data but also rust code to generate and load them.
.PHONY: gen_backward_compat_data # Re-generate backward compatibility data
gen_backward_compat_data:
@@ -1425,19 +1359,6 @@ test_nodejs_wasm_api_ci: build_node_js_api
# This is an internal target, not meant to be called on its own.
run_web_js_api_parallel: build_web_js_api_parallel setup_venv
cd $(WEB_SERVER_DIR) && npm install && npm run build
source venv/bin/activate && \
python ci/webdriver.py \
--browser-path $(browser_path) \
--driver-path $(driver_path) \
--browser-kind $(browser_kind) \
--server-cmd $(server_cmd) \
--server-workdir "$(WEB_SERVER_DIR)" \
--id-pattern $(filter) \
--id-exclude-pattern asyncMainThread
# This is an internal target, not meant to be called on its own.
run_web_js_api_cross_origin: build_web_js_api setup_venv
cd $(WEB_SERVER_DIR) && npm install && npm run build
source venv/bin/activate && \
python ci/webdriver.py \
@@ -1480,38 +1401,6 @@ test_web_js_api_parallel_firefox_ci: setup_venv
nvm use $(NODE_VERSION) && \
$(MAKE) test_web_js_api_parallel_firefox
test_web_js_api_cross_origin_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
test_web_js_api_cross_origin_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
test_web_js_api_cross_origin_chrome: browser_kind = chrome
test_web_js_api_cross_origin_chrome: server_cmd = "npm run server:cross-origin"
test_web_js_api_cross_origin_chrome: filter = ZeroKnowledgeTest # Only run zk proof tests in cross-origin mode
.PHONY: test_web_js_api_cross_origin_chrome # Run tests for the web wasm api in cross-origin mode on Chrome
test_web_js_api_cross_origin_chrome: run_web_js_api_cross_origin
.PHONY: test_web_js_api_cross_origin_chrome_ci # Run tests for the web wasm api in cross-origin mode on Chrome
test_web_js_api_cross_origin_chrome_ci: setup_venv
source ~/.nvm/nvm.sh && \
nvm install $(NODE_VERSION) && \
nvm use $(NODE_VERSION) && \
$(MAKE) test_web_js_api_cross_origin_chrome
test_web_js_api_cross_origin_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
test_web_js_api_cross_origin_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
test_web_js_api_cross_origin_firefox: browser_kind = firefox
test_web_js_api_cross_origin_firefox: server_cmd = "npm run server:cross-origin"
test_web_js_api_cross_origin_firefox: filter = ZeroKnowledgeTest # Only run zk proof tests in cross-origin mode
.PHONY: test_web_js_api_cross_origin_firefox # Run tests for the web wasm api in cross-origin mode on Firefox
test_web_js_api_cross_origin_firefox: run_web_js_api_cross_origin
.PHONY: test_web_js_api_cross_origin_firefox_ci # Run tests for the web wasm api in cross-origin mode on Firefox
test_web_js_api_cross_origin_firefox_ci: setup_venv
source ~/.nvm/nvm.sh && \
nvm install $(NODE_VERSION) && \
nvm use $(NODE_VERSION) && \
$(MAKE) test_web_js_api_cross_origin_firefox
WASM_PAR_MQ_TEST_DIR=utils/wasm-par-mq/web_tests
.PHONY: build_wasm_par_mq_tests # Build the wasm-par-mq test WASM package
@@ -1675,50 +1564,27 @@ bench_integer_rerand_gpu: install_rs_check_toolchain
--bench integer-rerand \
--features=integer,internal-keycache,gpu,pbs-stats -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_msm_zk
bench_msm_zk: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench zk-msm \
--features=zk-pok -p tfhe-benchmark --profile release --
# GPU benchmarks need --profile release for correct measurements
.PHONY: bench_msm_zk_gpu
bench_msm_zk_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench zk-msm \
--features=gpu,gpu-experimental-zk,zk-pok -p tfhe-benchmark --profile release -- zk::cuda::msm
# GPU benchmarks need --profile release for correct measurements
.PHONY: bench_integer_zk_gpu
bench_integer_zk_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,gpu,pbs-stats,zk-pok -p tfhe-benchmark --profile release --
# GPU benchmarks need --profile release for correct measurements
.PHONY: bench_integer_zk_experimental_gpu
bench_integer_zk_experimental_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-zk-pke \
--features=integer,internal-keycache,gpu,gpu-experimental-zk,pbs-stats,zk-pok -p tfhe-benchmark --profile release --
--features=integer,internal-keycache,gpu,pbs-stats,zk-pok -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_integer_aes_gpu # Run benchmarks for AES on GPU backend
bench_integer_aes_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-aes \
--features=integer,internal-keycache,gpu -p tfhe-benchmark --profile release_lto_off --
--features=integer,internal-keycache,gpu, -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_integer_aes256_gpu # Run benchmarks for AES256 on GPU backend
bench_integer_aes256_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-aes256 \
--features=integer,internal-keycache,gpu -p tfhe-benchmark --profile release_lto_off --
--features=integer,internal-keycache,gpu, -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_integer_trivium_gpu # Run benchmarks for trivium on GPU backend
bench_integer_trivium_gpu: install_rs_check_toolchain
@@ -1882,37 +1748,37 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
nvm use $(NODE_VERSION) && \
$(MAKE) bench_web_js_api_parallel_firefox
bench_web_js_api_cross_origin_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
bench_web_js_api_cross_origin_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
bench_web_js_api_cross_origin_chrome: browser_kind = chrome
bench_web_js_api_cross_origin_chrome: server_cmd = "npm run server:cross-origin"
bench_web_js_api_cross_origin_chrome: filter = ZeroKnowledgeBench # Only bench zk with cross-origin workers
bench_web_js_api_unsafe_coop_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
bench_web_js_api_unsafe_coop_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
bench_web_js_api_unsafe_coop_chrome: browser_kind = chrome
bench_web_js_api_unsafe_coop_chrome: server_cmd = "npm run server:unsafe-coop"
bench_web_js_api_unsafe_coop_chrome: filter = ZeroKnowledgeBench # Only bench zk with unsafe coop
.PHONY: bench_web_js_api_cross_origin_chrome # Run benchmarks for the web wasm api without cross-origin isolation
bench_web_js_api_cross_origin_chrome: run_web_js_api_cross_origin
.PHONY: bench_web_js_api_unsafe_coop_chrome # Run benchmarks for the web wasm api without cross-origin isolation
bench_web_js_api_unsafe_coop_chrome: run_web_js_api_parallel
.PHONY: bench_web_js_api_cross_origin_chrome_ci # Run benchmarks for the web wasm api without cross-origin isolation
bench_web_js_api_cross_origin_chrome_ci: setup_venv
.PHONY: bench_web_js_api_unsafe_coop_chrome_ci # Run benchmarks for the web wasm api without cross-origin isolation
bench_web_js_api_unsafe_coop_chrome_ci: setup_venv
source ~/.nvm/nvm.sh && \
nvm install $(NODE_VERSION) && \
nvm use $(NODE_VERSION) && \
$(MAKE) bench_web_js_api_cross_origin_chrome
$(MAKE) bench_web_js_api_unsafe_coop_chrome
bench_web_js_api_cross_origin_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
bench_web_js_api_cross_origin_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
bench_web_js_api_cross_origin_firefox: browser_kind = firefox
bench_web_js_api_cross_origin_firefox: server_cmd = "npm run server:cross-origin"
bench_web_js_api_cross_origin_firefox: filter = ZeroKnowledgeBench # Only bench zk with cross-origin workers
bench_web_js_api_unsafe_coop_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
bench_web_js_api_unsafe_coop_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
bench_web_js_api_unsafe_coop_firefox: browser_kind = firefox
bench_web_js_api_unsafe_coop_firefox: server_cmd = "npm run server:unsafe-coop"
bench_web_js_api_unsafe_coop_firefox: filter = ZeroKnowledgeBench # Only bench zk with unsafe coop
.PHONY: bench_web_js_api_cross_origin_firefox # Run benchmarks for the web wasm api without cross-origin isolation
bench_web_js_api_cross_origin_firefox: run_web_js_api_cross_origin
.PHONY: bench_web_js_api_unsafe_coop_firefox # Run benchmarks for the web wasm api without cross-origin isolation
bench_web_js_api_unsafe_coop_firefox: run_web_js_api_parallel
.PHONY: bench_web_js_api_cross_origin_firefox_ci # Run benchmarks for the web wasm api without cross-origin isolation
bench_web_js_api_cross_origin_firefox_ci: setup_venv
.PHONY: bench_web_js_api_unsafe_coop_firefox_ci # Run benchmarks for the web wasm api without cross-origin isolation
bench_web_js_api_unsafe_coop_firefox_ci: setup_venv
source ~/.nvm/nvm.sh && \
nvm install $(NODE_VERSION) && \
nvm use $(NODE_VERSION) && \
$(MAKE) bench_web_js_api_cross_origin_firefox
$(MAKE) bench_web_js_api_unsafe_coop_firefox
.PHONY: bench_hlapi_unsigned # Run benchmarks for integer operations
bench_hlapi_unsigned: install_rs_check_toolchain
@@ -1945,25 +1811,25 @@ bench_hlapi_hpu: install_rs_check_toolchain
--bench hlapi \
--features=integer,internal-keycache,hpu,hpu-v80,pbs-stats -p tfhe-benchmark --
.PHONY: bench_hlapi_erc7984 # Run benchmarks for ERC7984 operations
bench_hlapi_erc7984: install_rs_check_toolchain
.PHONY: bench_hlapi_erc20 # Run benchmarks for ERC20 operations
bench_hlapi_erc20: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc7984 \
--bench hlapi-erc20 \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
.PHONY: bench_hlapi_erc7984_gpu # Run benchmarks for ERC7984 operations on GPU
bench_hlapi_erc7984_gpu: install_rs_check_toolchain
.PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ERC20 operations on GPU
bench_hlapi_erc20_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc7984 \
--bench hlapi-erc20 \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_hlapi_erc7984_gpu_classical # Run benchmarks for ERC7984 operations on GPU with classical parameters
bench_hlapi_erc7984_gpu_classical: install_rs_check_toolchain
.PHONY: bench_hlapi_erc20_gpu_classical # Run benchmarks for ERC20 operations on GPU with classical parameters
bench_hlapi_erc20_gpu_classical: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=classical \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc7984 \
--bench hlapi-erc20 \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_hlapi_dex # Run benchmarks for DEX operations
@@ -1987,13 +1853,13 @@ bench_hlapi_dex_gpu_classical: install_rs_check_toolchain
--bench hlapi-dex \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
.PHONY: bench_hlapi_erc7984_hpu # Run benchmarks for ECR20 operations on HPU
bench_hlapi_erc7984_hpu: install_rs_check_toolchain
.PHONY: bench_hlapi_erc20_hpu # Run benchmarks for ECR20 operations on HPU
bench_hlapi_erc20_hpu: install_rs_check_toolchain
source ./setup_hpu.sh --config $(HPU_CONFIG); \
export V80_PCIE_DEV=${V80_PCIE_DEV}; \
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc7984 \
--bench hlapi-erc20 \
--features=integer,internal-keycache,hpu,hpu-v80,pbs-stats -p tfhe-benchmark --
.PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
@@ -2001,13 +1867,6 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench -p tfhe-zk-pok --
.PHONY: bench_tfhe_zk_pok_gpu # Run benchmarks for the tfhe_zk_pok crate using GPU acceleration
bench_tfhe_zk_pok_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--package tfhe-zk-pok \
--features=gpu-experimental --profile release
.PHONY: bench_hlapi_noise_squash # Run benchmarks for noise squash operation
bench_hlapi_noise_squash: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
@@ -2049,10 +1908,10 @@ bench_summary: install_rs_check_toolchain
--bench hlapi-noise-squash \
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::decomp_noise_squash_comp::'
# ERC7984
# ERC20
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc7984 \
--bench hlapi-erc20 \
--features=integer,internal-keycache -p tfhe-benchmark -- '::transfer::overflow'
# DEX
@@ -2094,10 +1953,10 @@ bench_summary_gpu: install_rs_check_toolchain
--bench hlapi-noise-squash \
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::decomp_noise_squash_comp::'
# ERC7984
# ERC20
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc7984 \
--bench hlapi-erc20 \
--features=integer,gpu,internal-keycache -p tfhe-benchmark --profile release_lto_off -- '::transfer::overflow'
# DEX
@@ -2276,7 +2135,6 @@ pcc_batch_5:
$(call run_recipe_with_details,clippy_tfhe_lints)
$(call run_recipe_with_details,check_compile_tests)
$(call run_recipe_with_details,clippy_backward_compat_data)
$(call run_recipe_with_details,check_backward_compat_locks_did_not_change)
.PHONY: pcc_batch_6 # duration: 6'32''
pcc_batch_6:
@@ -2285,10 +2143,8 @@ pcc_batch_6:
$(call run_recipe_with_details,clippy_tasks)
$(call run_recipe_with_details,clippy_tfhe_csprng)
$(call run_recipe_with_details,clippy_zk_pok)
$(call run_recipe_with_details,clippy_zk_pok_wasm)
$(call run_recipe_with_details,clippy_trivium)
$(call run_recipe_with_details,clippy_versionable)
$(call run_recipe_with_details,clippy_safe_serialize)
$(call run_recipe_with_details,clippy_param_dedup)
$(call run_recipe_with_details,docs)

View File

@@ -15,3 +15,12 @@ extend-ignore-identifiers-re = [
"0x[0-9a-fA-F]+",
"xrt_coreutil",
]
[files]
extend-exclude = [
"backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cu",
"backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu",
"backends/tfhe-hpu-backend/config_store/**/*.link_summary",
"*.cbor",
"*.bcode",
]

View File

@@ -1,6 +1,6 @@
BSD 3-Clause Clear License
Copyright © 2026 ZAMA.
Copyright © 2025 ZAMA.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,

View File

@@ -1,14 +1,5 @@
use std::path::PathBuf;
fn get_linux_distribution_name() -> Option<String> {
let content = std::fs::read_to_string("/etc/os-release").ok()?;
for line in content.lines() {
if let Some(value) = line.strip_prefix("NAME=") {
return Some(value.trim_matches('"').to_string());
}
}
None
}
use std::process::Command;
fn main() {
if let Ok(val) = std::env::var("DOCS_RS") {
@@ -37,7 +28,9 @@ fn main() {
println!("cargo::rerun-if-changed=src");
if std::env::consts::OS == "linux" {
if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
let output = Command::new("./get_os_name.sh").output().unwrap();
let distribution = String::from_utf8(output.stdout).unwrap();
if distribution != "Ubuntu\n" {
println!(
"cargo:warning=This Linux distribution is not officially supported. \
Only Ubuntu is supported by tfhe-cuda-backend at this time. Build may fail\n"

View File

@@ -62,29 +62,3 @@ rules:
cuda_synchronize_stream(...);
...
}
- id: tfhe-cuda-unwrapped-cuda-runtime-call
message: "CUDA runtime API call is not wrapped in `check_cuda_error(...)`."
severity: WARNING
languages: [c, cpp]
options:
generic_ellipsis_max_span: 500
paths:
include:
- "*.cu"
- "*.cuh"
- "*.cpp"
- "*.h"
exclude:
- backends/tfhe-cuda-backend/cuda/check_cuda.cu # contains cuda checking functions
- backends/tfhe-cuda-backend/cuda/include/device.h # contains the cuda_check_error macro (and others)
patterns:
- pattern: $FUNC(...)
- metavariable-regex:
metavariable: $FUNC
regex: "^cuda[A-Z][A-Za-z0-9]*$" # matches cudaMalloc/cudaMemcpy/... (not project helpers like cuda_set_device)
- pattern-not-inside: check_cuda_error(...)
- pattern-not-inside: |
$FUNC(...);
check_cuda_error(cudaGetLastError());
- pattern-not-inside: $FUNC(...) == $VAL

View File

@@ -382,17 +382,14 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
->use_sequential_algorithm_to_resolve_group_carries;
cuda_set_device(0);
check_cuda_error(
cudaEventCreateWithFlags(&create_indexes_done, cudaEventDisableTiming));
cudaEventCreateWithFlags(&create_indexes_done, cudaEventDisableTiming);
create_indexes_for_overflow_sub(streams.get_ith(0), num_blocks, group_size,
use_seq, allocate_gpu_memory, size_tracker);
check_cuda_error(cudaEventRecord(create_indexes_done, streams.stream(0)));
cudaEventRecord(create_indexes_done, streams.stream(0));
cuda_set_device(1);
check_cuda_error(
cudaStreamWaitEvent(streams.stream(1), create_indexes_done, 0));
cudaStreamWaitEvent(streams.stream(1), create_indexes_done, 0);
cuda_set_device(2);
check_cuda_error(
cudaStreamWaitEvent(streams.stream(2), create_indexes_done, 0));
cudaStreamWaitEvent(streams.stream(2), create_indexes_done, 0);
scatter_indexes_for_overflowing_sub(
streams.stream(1), streams.gpu_index(1),
@@ -845,7 +842,7 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
free(second_indexes_for_overflow_sub_gpu_2);
free(scalars_for_overflow_sub_gpu_2);
check_cuda_error(cudaEventDestroy(create_indexes_done));
cudaEventDestroy(create_indexes_done);
// release sub streams
sub_streams_1.release();

View File

@@ -721,7 +721,7 @@ void cuda_integer_grouped_oprf_custom_range_64_async(
uint32_t num_blocks_intermediate, const void *seeded_lwe_input,
const uint64_t *decomposed_scalar, const uint64_t *has_at_least_one_set,
uint32_t num_scalars, uint32_t shift, int8_t *mem, void *const *bsks,
void *const *compute_bsks, void *const *ksks);
void *const *ksks);
void cleanup_cuda_integer_grouped_oprf_custom_range_64(CudaStreamsFFI streams,
int8_t **mem_ptr_void);

View File

@@ -390,7 +390,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
XOR(&wires_a[6], &wires_a[15], &input_bits[7]);
XOR(&wires_a[10], &wires_a[15], &wires_b[0]);
XOR(&wires_a[11], &wires_a[20], &wires_a[9]);
FLUSH(&wires_a[6], &wires_a[10], &wires_a[11]);
FLUSH(&wires_a[6], &wires_a[10]);
XOR(&wires_a[7], &input_bits[7], &wires_a[11]);
FLUSH(&wires_a[7]);
XOR(&wires_a[17], &wires_a[10], &wires_a[11]);
@@ -426,7 +426,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
XOR(&wires_b[22], &wires_b[18], &wires_a[19]);
XOR(&wires_b[23], &wires_b[19], &wires_a[21]);
XOR(&wires_b[24], &wires_b[20], &wires_a[18]);
FLUSH(&wires_b[21], &wires_b[22], &wires_b[23], &wires_b[24]);
FLUSH(&wires_b[21], &wires_b[23], &wires_b[24]);
XOR(&wires_b[25], &wires_b[21], &wires_b[22]);
FLUSH(&wires_b[25]);
@@ -468,7 +468,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
XOR(&wires_b[37], &wires_b[36], &wires_b[34]);
XOR(&wires_b[38], &wires_b[27], &wires_b[36]);
FLUSH(&wires_b[38], &wires_b[37]);
FLUSH(&wires_b[38]);
XOR(&wires_b[44], &wires_b[33], &wires_b[37]);
CudaRadixCiphertextFFI *and_outs_6[] = {&wires_b[39]};
@@ -479,7 +479,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
XOR(&wires_b[40], &wires_b[25], &wires_b[39]);
XOR(&wires_b[41], &wires_b[40], &wires_b[37]);
XOR(&wires_b[43], &wires_b[29], &wires_b[40]);
FLUSH(&wires_b[41], &wires_b[40], &wires_b[43], &wires_b[44]);
FLUSH(&wires_b[41]);
XOR(&wires_b[45], &wires_b[42], &wires_b[41]);
FLUSH(&wires_b[45]);
@@ -514,7 +514,6 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
XOR(&wires_b[57], &wires_b[50], &wires_b[53]);
XOR(&wires_b[58], &wires_c[4], &wires_b[46]);
XOR(&wires_b[59], &wires_c[3], &wires_b[54]);
FLUSH(&wires_b[57], &wires_b[58]);
XOR(&wires_b[60], &wires_b[46], &wires_b[57]);
XOR(&wires_b[61], &wires_c[14], &wires_b[57]);
XOR(&wires_b[62], &wires_b[52], &wires_b[58]);
@@ -590,7 +589,6 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
#undef FLUSH
#undef AND
#undef ADD_ONE_FLUSH
#undef ADD_ONE
}
/**

View File

@@ -489,7 +489,7 @@ template <typename Torus>
__host__ void host_modulus_switch_multi_bit(
cudaStream_t stream, uint32_t gpu_index, Torus *array_out, Torus *array_in,
int size, uint32_t log_modulus, uint32_t degree, uint32_t grouping_factor) {
check_cuda_error(cudaSetDevice(gpu_index));
cudaSetDevice(gpu_index);
int multibit_size = size / grouping_factor;
int num_threads = 0, num_blocks = 0;
getNumBlocksAndThreads(multibit_size, 1024, num_blocks, num_threads);

View File

@@ -326,10 +326,6 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
uint32_t gpu_index) {
if (size == 0)
return;
GPU_ASSERT(src != nullptr, "Cuda error: null device ptr");
GPU_ASSERT(dest != nullptr, "Cuda error: null device ptr");
cudaPointerAttributes attr_dest;
check_cuda_error(cudaPointerGetAttributes(&attr_dest, dest));
PANIC_IF_FALSE(

View File

@@ -72,13 +72,13 @@ void cuda_integer_grouped_oprf_custom_range_64_async(
uint32_t num_blocks_intermediate, const void *seeded_lwe_input,
const uint64_t *decomposed_scalar, const uint64_t *has_at_least_one_set,
uint32_t num_scalars, uint32_t shift, int8_t *mem, void *const *bsks,
void *const *compute_bsks, void *const *ksks) {
void *const *ksks) {
host_integer_grouped_oprf_custom_range<uint64_t>(
CudaStreams(streams), radix_lwe_out, num_blocks_intermediate,
(const uint64_t *)seeded_lwe_input, decomposed_scalar,
has_at_least_one_set, num_scalars, shift,
(int_grouped_oprf_custom_range_memory<uint64_t> *)mem, bsks, compute_bsks,
(int_grouped_oprf_custom_range_memory<uint64_t> *)mem, bsks,
(uint64_t *const *)ksks);
}

View File

@@ -114,7 +114,7 @@ void host_integer_grouped_oprf_custom_range(
const Torus *decomposed_scalar, const Torus *has_at_least_one_set,
uint32_t num_scalars, uint32_t shift,
int_grouped_oprf_custom_range_memory<Torus> *mem_ptr, void *const *bsks,
void *const *compute_bsks, Torus *const *ksks) {
Torus *const *ksks) {
CudaRadixCiphertextFFI *computation_buffer = mem_ptr->tmp_oprf_output;
set_zero_radix_ciphertext_slice_async<Torus>(
@@ -127,12 +127,12 @@ void host_integer_grouped_oprf_custom_range(
host_integer_scalar_mul_radix<Torus>(
streams, computation_buffer, decomposed_scalar, has_at_least_one_set,
mem_ptr->scalar_mul_buffer, compute_bsks, ksks,
mem_ptr->params.message_modulus, num_scalars);
mem_ptr->scalar_mul_buffer, bsks, ksks, mem_ptr->params.message_modulus,
num_scalars);
host_logical_scalar_shift_inplace<Torus>(
streams, computation_buffer, shift, mem_ptr->logical_scalar_shift_buffer,
compute_bsks, ksks, num_blocks_intermediate);
host_logical_scalar_shift_inplace<Torus>(streams, computation_buffer, shift,
mem_ptr->logical_scalar_shift_buffer,
bsks, ksks, num_blocks_intermediate);
uint32_t num_blocks_output = radix_lwe_out->num_radix_blocks;
uint32_t blocks_to_copy =

View File

@@ -373,8 +373,7 @@ __host__ bool verify_cuda_programmable_bootstrap_cg_grid_size(
// Get the number of streaming multiprocessors
int number_of_sm = 0;
check_cuda_error(
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0));
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0);
return number_of_blocks <= max_active_blocks_per_sm * number_of_sm;
}

View File

@@ -517,8 +517,7 @@ __host__ bool verify_cuda_programmable_bootstrap_cg_multi_bit_grid_size(
// Get the number of streaming multiprocessors
int number_of_sm = 0;
check_cuda_error(
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0));
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0);
return number_of_blocks <= max_active_blocks_per_sm * number_of_sm;
}

View File

@@ -784,9 +784,9 @@ __host__ uint64_t scratch_programmable_bootstrap_tbc_128(
device_programmable_bootstrap_tbc_128<InputTorus, params, FULLSM>,
cudaFuncAttributeMaxDynamicSharedMemorySize,
full_sm)); // full_sm + minimum_sm_tbc));
check_cuda_error(cudaFuncSetCacheConfig(
cudaFuncSetCacheConfig(
device_programmable_bootstrap_tbc_128<InputTorus, params, FULLSM>,
cudaFuncCachePreferShared));
cudaFuncCachePreferShared);
check_cuda_error(cudaFuncSetAttribute(
device_programmable_bootstrap_tbc_128<InputTorus, params, FULLSM>,
cudaFuncAttributeNonPortableClusterSizeAllowed, true));
@@ -1271,8 +1271,7 @@ __host__ bool verify_cuda_programmable_bootstrap_128_cg_grid_size(
// Get the number of streaming multiprocessors
int number_of_sm = 0;
check_cuda_error(
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0));
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0);
return number_of_blocks <= max_active_blocks_per_sm * number_of_sm;
}

View File

@@ -308,7 +308,6 @@ void cleanup_cuda_multi_bit_programmable_bootstrap_noise_tests_128(
void *stream, uint32_t gpu_index, int8_t **pbs_buffer) {
cleanup_cuda_multi_bit_programmable_bootstrap_128(stream, gpu_index,
pbs_buffer);
cuda_synchronize_stream(static_cast<cudaStream_t>(stream), gpu_index);
}
// Noise tests variant of the 128-bit multi-bit PBS, restricted to

View File

@@ -1212,47 +1212,46 @@ __host__ bool verify_cuda_programmable_bootstrap_cg_multi_bit_grid_size_128(
int max_active_blocks_per_sm;
if (max_shared_memory < partial_sm_cg_accumulate) {
check_cuda_error(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&max_active_blocks_per_sm,
(void *)device_multi_bit_programmable_bootstrap_cg_accumulate_128<
Torus, params, NOSM>,
thds, 0));
thds, 0);
} else if (max_shared_memory < full_sm_cg_accumulate) {
check_cuda_error(cudaFuncSetAttribute(
device_multi_bit_programmable_bootstrap_cg_accumulate_128<Torus, params,
PARTIALSM>,
cudaFuncAttributeMaxDynamicSharedMemorySize, partial_sm_cg_accumulate));
check_cuda_error(cudaFuncSetCacheConfig(
cudaFuncSetCacheConfig(
device_multi_bit_programmable_bootstrap_cg_accumulate_128<Torus, params,
PARTIALSM>,
cudaFuncCachePreferShared));
check_cuda_error(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
cudaFuncCachePreferShared);
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&max_active_blocks_per_sm,
(void *)device_multi_bit_programmable_bootstrap_cg_accumulate_128<
Torus, params, PARTIALSM>,
thds, partial_sm_cg_accumulate));
thds, partial_sm_cg_accumulate);
check_cuda_error(cudaGetLastError());
} else {
check_cuda_error(cudaFuncSetAttribute(
device_multi_bit_programmable_bootstrap_cg_accumulate_128<Torus, params,
FULLSM>,
cudaFuncAttributeMaxDynamicSharedMemorySize, full_sm_cg_accumulate));
check_cuda_error(cudaFuncSetCacheConfig(
cudaFuncSetCacheConfig(
device_multi_bit_programmable_bootstrap_cg_accumulate_128<Torus, params,
FULLSM>,
cudaFuncCachePreferShared));
check_cuda_error(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
cudaFuncCachePreferShared);
cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&max_active_blocks_per_sm,
(void *)device_multi_bit_programmable_bootstrap_cg_accumulate_128<
Torus, params, FULLSM>,
thds, full_sm_cg_accumulate));
thds, full_sm_cg_accumulate);
check_cuda_error(cudaGetLastError());
}
// Get the number of streaming multiprocessors
int number_of_sm = 0;
check_cuda_error(
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0));
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0);
return number_of_blocks <= max_active_blocks_per_sm * number_of_sm;
}

View File

@@ -739,8 +739,7 @@ __host__ bool verify_cuda_programmable_bootstrap_tbc_grid_size(
// Get the number of streaming multiprocessors
int number_of_sm = 0;
check_cuda_error(
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0));
cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 0);
return number_of_blocks <= max_active_blocks_per_sm * number_of_sm;
}

View File

@@ -0,0 +1,3 @@
#!/usr/bin/env bash
cat /etc/os-release | grep "\<NAME\>" | sed "s/NAME=\"//g" | sed "s/\"//g"

View File

@@ -136,6 +136,9 @@ pub type Direction = ffi::c_uint;
pub const BitValue_Zero: BitValue = 0;
pub const BitValue_One: BitValue = 1;
pub type BitValue = ffi::c_uint;
pub const RERAND_MODE_RERAND_WITH_KS: RERAND_MODE = 0;
pub const RERAND_MODE_RERAND_WITHOUT_KS: RERAND_MODE = 1;
pub type RERAND_MODE = ffi::c_uint;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct CudaStreamsFFI {
@@ -1647,7 +1650,6 @@ unsafe extern "C" {
shift: u32,
mem: *mut i8,
bsks: *const *mut ffi::c_void,
compute_bsks: *const *mut ffi::c_void,
ksks: *const *mut ffi::c_void,
);
}
@@ -2477,9 +2479,6 @@ unsafe extern "C" {
glwe_index: u32,
);
}
pub const RERAND_MODE_RERAND_WITH_KS: RERAND_MODE = 0;
pub const RERAND_MODE_RERAND_WITHOUT_KS: RERAND_MODE = 1;
pub type RERAND_MODE = ffi::c_uint;
unsafe extern "C" {
pub fn scratch_cuda_rerand_64_async(
streams: CudaStreamsFFI,
@@ -2492,7 +2491,7 @@ unsafe extern "C" {
message_modulus: u32,
carry_modulus: u32,
allocate_gpu_memory: bool,
rerand_type: RERAND_MODE,
rerand_type: u32,
) -> u64;
}
unsafe extern "C" {

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-hpu-backend"
version = "0.5.0"
version = "0.4.0"
edition = "2021"
license = "BSD-3-Clause-Clear"
description = "HPU implementation on FPGA of TFHE-rs primitives."
@@ -36,7 +36,7 @@ thiserror = "1.0.61"
bytemuck = { workspace = true }
anyhow = "1.0.82"
lazy_static = "1.4.0"
rand = "0.10.1"
rand = "0.8.5"
regex = "1.10.4"
bitflags = { version = "2.5.0", features = ["serde"] }
itertools = "0.11.0"

View File

@@ -1,6 +1,6 @@
BSD 3-Clause Clear License
Copyright © 2026 ZAMA.
Copyright © 2025 ZAMA.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,

View File

@@ -297,8 +297,8 @@ source setup_hpu.sh --config v80 -p
# Run hlapi benches
make test_high_level_api_hpu
# Run hlapi erc7984 benches
make bench_hlapi_erc7984_hpu
# Run hlapi erc20 benches
make bench_hlapi_erc20_hpu
# Run integer level benches
make bench_integer_hpu

View File

@@ -109,7 +109,7 @@
flush_behaviour = "Patient"
flush = true
[firmware.op_cfg.by_op.ERC_7984]
[firmware.op_cfg.by_op.ERC_20]
fill_batch_fifo = true
min_batch_size = false
use_tiers = true

View File

@@ -121,7 +121,7 @@
flush_behaviour = "Patient"
flush = true
[firmware.op_cfg.by_op.ERC_7984]
[firmware.op_cfg.by_op.ERC_20]
fill_batch_fifo = true
min_batch_size = false
use_tiers = true

View File

@@ -230,7 +230,7 @@ iop!(
[IOP_CMP -> "CMP_NEQ", opcode::CMP_NEQ],
[IOP_CT_F_CT_BOOL -> "IF_THEN_ZERO", opcode::IF_THEN_ZERO],
[IOP_CT_F_2CT_BOOL -> "IF_THEN_ELSE", opcode::IF_THEN_ELSE],
[IOP_2CT_F_3CT -> "ERC_7984", opcode::ERC_7984],
[IOP_2CT_F_3CT -> "ERC_20", opcode::ERC_20],
[IOP_CT_F_CT -> "MEMCPY", opcode::MEMCPY],
[IOP_CT_F_CT -> "ILOG2", opcode::ILOG2],
[IOP_CT_F_CT -> "COUNT0", opcode::COUNT0],
@@ -240,5 +240,5 @@ iop!(
[IOP_CT_F_CT -> "TRAIL0", opcode::TRAIL0],
[IOP_CT_F_CT -> "TRAIL1", opcode::TRAIL1],
[IOP_NCT_F_2NCT -> "ADD_SIMD", opcode::ADD_SIMD],
[IOP_2NCT_F_3NCT -> "ERC_7984_SIMD", opcode::ERC_7984_SIMD],
[IOP_2NCT_F_3NCT -> "ERC_20_SIMD", opcode::ERC_20_SIMD],
);

View File

@@ -74,9 +74,9 @@ pub const IF_THEN_ZERO: u8 = 0xCA;
pub const IF_THEN_ELSE: u8 = 0xCB;
// Custom algorithm
// ERC7984 -> Found xfer algorithm
// ERC20 -> Found xfer algorithm
// 2Ct <- func(3Ct)
pub const ERC_7984: u8 = 0x80;
pub const ERC_20: u8 = 0x80;
// Count bits
pub const COUNT0: u8 = 0x81;
@@ -89,7 +89,7 @@ pub const TRAIL1: u8 = 0x87;
// SIMD for maximum throughput
pub const ADD_SIMD: u8 = 0xF0;
pub const ERC_7984_SIMD: u8 = 0xF1;
pub const ERC_20_SIMD: u8 = 0xF1;
//
// Utility operations
// Used to handle real clone of ciphertext already uploaded in the Hpu memory

View File

@@ -24,7 +24,7 @@ use mem_alloc::{MemAlloc, MemChunk};
mod qdma;
use qdma::QdmaDriver;
use rand::RngExt;
use rand::Rng;
const DMA_XFER_ALIGN: usize = 4096_usize;
@@ -148,8 +148,8 @@ impl HpuHw {
tracing::debug!("Load stage1 through JTAG");
let pdi_stg1_tmp = format!(
"hpu_stg1_{}.pdi",
rand::rng()
.sample_iter(rand::distr::Alphanumeric)
rand::thread_rng()
.sample_iter(rand::distributions::Alphanumeric)
.take(5)
.map(char::from)
.collect::<String>()

View File

@@ -31,7 +31,7 @@ crate::impl_fw!("Demo" [
IF_THEN_ZERO => fw_impl::ilp::iop_if_then_zero;
IF_THEN_ELSE => fw_impl::ilp::iop_if_then_else;
ERC_7984 => fw_impl::ilp::iop_erc_7984;
ERC_20 => fw_impl::ilp::iop_erc_20;
CMP_GT => cmp_gt;
CMP_GTE => cmp_gte;

View File

@@ -61,7 +61,7 @@ crate::impl_fw!("Ilp" [
IF_THEN_ZERO => fw_impl::ilp::iop_if_then_zero;
IF_THEN_ELSE => fw_impl::ilp::iop_if_then_else;
ERC_7984 => fw_impl::ilp::iop_erc_7984;
ERC_20 => fw_impl::ilp::iop_erc_20;
MEMCPY => fw_impl::ilp::iop_memcpy;
@@ -74,7 +74,7 @@ crate::impl_fw!("Ilp" [
TRAIL1 => fw_impl::ilp_log::iop_trail1;
// SIMD Implementations
ADD_SIMD => fw_impl::llt::iop_add_simd;
ERC_7984_SIMD => fw_impl::llt::iop_erc_7984_simd;
ERC_20_SIMD => fw_impl::llt::iop_erc_20_simd;
]);
#[instrument(level = "trace", skip(prog))]
@@ -1296,13 +1296,13 @@ pub fn iop_if_then_else(prog: &mut Program) {
});
}
/// Implement erc_7984 fund xfer
/// Implement erc_20 fund xfer
/// Targeted algorithm is as follow:
/// 1. Check that from has enough funds
/// 2. Compute real_amount to xfer (i.e. amount or 0)
/// 3. Compute new amount (from - new_amount, to + new_amount)
#[instrument(level = "info", skip(prog))]
pub fn iop_erc_7984(prog: &mut Program) {
pub fn iop_erc_20(prog: &mut Program) {
// Allocate metavariables:
// Dest -> Operand
let mut dst_from = prog.iop_template_var(OperandKind::Dst, 0);
@@ -1314,7 +1314,7 @@ pub fn iop_erc_7984(prog: &mut Program) {
let src_amount = prog.iop_template_var(OperandKind::Src, 2);
// Add Comment header
prog.push_comment("ERC_7984 (new_from, new_to) <- (from, to, amount)".to_string());
prog.push_comment("ERC_20 (new_from, new_to) <- (from, to, amount)".to_string());
let props = prog.params();
let tfhe_params: asm::DigitParameters = props.clone().into();

View File

@@ -70,7 +70,7 @@ crate::impl_fw!("Llt" [
IF_THEN_ZERO => fw_impl::ilp::iop_if_then_zero;
IF_THEN_ELSE => fw_impl::ilp::iop_if_then_else;
ERC_7984 => fw_impl::llt::iop_erc_7984;
ERC_20 => fw_impl::llt::iop_erc_20;
MEMCPY => fw_impl::ilp::iop_memcpy;
COUNT0 => fw_impl::ilp_log::iop_count0;
@@ -83,7 +83,7 @@ crate::impl_fw!("Llt" [
// SIMD Implementations
ADD_SIMD => fw_impl::llt::iop_add_simd;
ERC_7984_SIMD => fw_impl::llt::iop_erc_7984_simd;
ERC_20_SIMD => fw_impl::llt::iop_erc_20_simd;
]);
// ----------------------------------------------------------------------------
@@ -225,24 +225,24 @@ pub fn iop_muls(prog: &mut Program) {
}
#[instrument(level = "trace", skip(prog))]
pub fn iop_erc_7984(prog: &mut Program) {
pub fn iop_erc_20(prog: &mut Program) {
// Add Comment header
prog.push_comment("ERC_7984 (new_from, new_to) <- (from, to, amount)".to_string());
prog.push_comment("ERC_20 (new_from, new_to) <- (from, to, amount)".to_string());
// TODO: Make sweep of kogge_blk_w
// All these little parameters would be very handy to write an
// exploration/compilation program which would try to minimize latency by
// playing with these.
iop_erc_7984_rtl(prog, 0, Some(10)).add_to_prog(prog);
iop_erc_20_rtl(prog, 0, Some(10)).add_to_prog(prog);
}
#[instrument(level = "trace", skip(prog))]
pub fn iop_erc_7984_simd(prog: &mut Program) {
pub fn iop_erc_20_simd(prog: &mut Program) {
// Add Comment header
prog.push_comment("ERC_7984_SIMD (new_from, new_to) <- (from, to, amount)".to_string());
prog.push_comment("ERC_20_SIMD (new_from, new_to) <- (from, to, amount)".to_string());
simd(
prog,
crate::asm::iop::SIMD_N,
fw_impl::llt::iop_erc_7984_rtl,
fw_impl::llt::iop_erc_20_rtl,
None,
);
}
@@ -379,7 +379,7 @@ pub fn iop_rotate_scalar_left(prog: &mut Program) {
// Helper Functions
// ----------------------------------------------------------------------------
/// Implement erc_7984 fund xfer
/// Implement erc_20 fund xfer
/// Targeted algorithm is as follow:
/// 1. Check that from has enough funds
/// 2. Compute real_amount to xfer (i.e. amount or 0)
@@ -391,7 +391,7 @@ pub fn iop_rotate_scalar_left(prog: &mut Program) {
/// (dst_from[0], dst_to[0], ..., dst_from[N-1], dst_to[N-1])
/// Where N is the batch size
#[instrument(level = "trace", skip(prog))]
pub fn iop_erc_7984_rtl(prog: &mut Program, batch_index: u8, kogge_blk_w: Option<usize>) -> Rtl {
pub fn iop_erc_20_rtl(prog: &mut Program, batch_index: u8, kogge_blk_w: Option<usize>) -> Rtl {
// Allocate metavariables:
// Dest -> Operand
let dst_from = prog.iop_template_var(OperandKind::Dst, 2 * batch_index);

View File

@@ -24,7 +24,7 @@ bindgen.workspace = true
[dependencies]
ark-ec.workspace = true
ark-ff.workspace = true
tfhe-cuda-backend = { version = "0.14.0", path = "../tfhe-cuda-backend" }
tfhe-cuda-backend = { version = "=0.14.0", path = "../tfhe-cuda-backend" }
[features]
default = []

View File

@@ -1,14 +1,5 @@
use std::path::PathBuf;
fn get_linux_distribution_name() -> Option<String> {
let content = std::fs::read_to_string("/etc/os-release").ok()?;
for line in content.lines() {
if let Some(value) = line.strip_prefix("NAME=") {
return Some(value.trim_matches('"').to_string());
}
}
None
}
use std::process::Command;
fn main() {
// Handle docs.rs builds (no CUDA available)
@@ -38,10 +29,16 @@ fn main() {
println!("cargo:rustc-link-arg=-Wl,--allow-multiple-definition");
println!("cargo:rustc-link-arg=-Wl,--no-as-needed");
// Check Linux distribution (reuse script from tfhe-cuda-backend)
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
.expect("CARGO_MANIFEST_DIR must be set by cargo during build");
if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
let script_path = PathBuf::from(&manifest_dir).join("../tfhe-cuda-backend/get_os_name.sh");
let output = Command::new(&script_path)
.output()
.expect("Failed to run get_os_name.sh — is tfhe-cuda-backend present?");
let distribution =
String::from_utf8(output.stdout).expect("get_os_name.sh output must be valid UTF-8");
if distribution != "Ubuntu\n" {
println!(
"cargo:warning=This Linux distribution is not officially supported. \
Only Ubuntu is supported by zk-cuda-backend at this time. Build may fail\n"

View File

@@ -71,6 +71,11 @@ set(CMAKE_CUDA_FLAGS_DEBUG "-g -O0 -G")
# Additional CUDA flags (aligned with tfhe-cuda-backend)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -Xcompiler -Wextra --use_fast_math --expt-relaxed-constexpr")
# =============================================================================
# Path to tfhe-cuda-backend for device utilities
# =============================================================================
set(TFHE_CUDA_BACKEND_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../tfhe-cuda-backend/cuda)
# Core source files (without device utilities) Device utilities come from tfhe-cuda-backend.
set(FP_CORE_SOURCES src/primitives/fp.cu src/primitives/fp2.cu src/curve.cu src/msm/pippenger/msm_pippenger.cu
src/msm/msm.cu)
@@ -107,7 +112,7 @@ endif()
target_link_libraries(zk_cuda_backend PUBLIC cudart)
# Include both local headers and tfhe-cuda-backend headers (for device.h)
target_include_directories(zk_cuda_backend PUBLIC include ../src/include)
target_include_directories(zk_cuda_backend PUBLIC include ../src/include ${TFHE_CUDA_BACKEND_DIR}/include)
# =============================================================================
# Tests and Benchmarks (optional, controlled by ZK_CUDA_BACKEND_BUILD_TESTS/BENCHMARKS)
@@ -130,3 +135,4 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS "CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
message(STATUS "C++ standard: ${CMAKE_CXX_STANDARD}")
message(STATUS "CUDA standard: ${CMAKE_CUDA_STANDARD}")
message(STATUS "tfhe-cuda-backend path: ${TFHE_CUDA_BACKEND_DIR}")

View File

@@ -1,35 +0,0 @@
#pragma once
#include <cstddef>
#include <cstdio>
#include "device.h"
// Variadic checked multiplication of size_t values.
// Folds left-to-right using __builtin_mul_overflow, returning true on overflow.
// On overflow the value written to *out is unspecified.
template <typename... Args>
inline bool checked_mul(size_t *out, size_t first, Args... rest) {
size_t result = first;
for (size_t value : {static_cast<size_t>(rest)...}) {
if (__builtin_mul_overflow(result, value, &result))
return true;
}
*out = result;
return false;
}
// Variadic safe multiplication: computes the product and panics on overflow.
template <typename... Args> inline size_t safe_mul(size_t first, Args... rest) {
size_t result;
bool overflow = checked_mul(&result, first, rest...);
PANIC_IF_FALSE(!overflow, "multiplication overflow wraps size_t");
return result;
}
// Variadic safe multiplication with an appended sizeof(T) factor.
// Computes (args... * sizeof(T)) with overflow checking.
template <typename T, typename... Args>
inline size_t safe_mul_sizeof(Args... args) {
return safe_mul(args..., sizeof(T));
}

View File

@@ -1,145 +0,0 @@
#ifndef DEVICE_H
#define DEVICE_H
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cuda_runtime.h>
extern "C" {
#define check_cuda_error(ans) \
{ cuda_error((ans), __FILE__, __LINE__); }
inline void cuda_error(cudaError_t code, const char *file, int line) {
if (code != cudaSuccess) {
std::fprintf(stderr, "Cuda error: %s %s %d\n", cudaGetErrorString(code),
file, line);
std::abort();
}
}
// The PANIC macro should be used to validate user-inputs to GPU functions
// it will execute in all targets, including production settings
// e.g., cudaMemCopy to the device should check that the destination pointer is
// a device pointer
#define PANIC(format, ...) \
{ \
std::fprintf(stderr, "%s::%d::%s: panic.\n" format "\n", __FILE__, \
__LINE__, __func__, ##__VA_ARGS__); \
std::abort(); \
}
// This is a generic assertion checking macro with user defined printf-style
// message
#define PANIC_IF_FALSE(cond, format, ...) \
do { \
if (!(cond)) { \
PANIC(format "\n\n %s\n", ##__VA_ARGS__, #cond); \
} \
} while (0)
#ifndef GPU_ASSERTS_DISABLE
// The GPU assert should be used to validate assumptions in algorithms,
// for example, checking that two user-provided quantities have a certain
// relationship or that the size of the buffer provided to a function is
// sufficient when it is filled with some algorithm that depends on
// user-provided inputs e.g., OPRF corrections buffer should not have a size
// higher than the number of blocks in the datatype that is generated
#define GPU_ASSERT(cond, format, ...) \
PANIC_IF_FALSE(cond, format, ##__VA_ARGS__)
#else
#define GPU_ASSERT(cond) \
do { \
} while (0)
#endif
uint32_t cuda_get_device();
void cuda_set_device(uint32_t gpu_index);
cudaEvent_t cuda_create_event(uint32_t gpu_index);
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
uint32_t gpu_index);
void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
uint32_t gpu_index);
void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index);
cudaStream_t cuda_create_stream(uint32_t gpu_index);
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index);
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index);
uint32_t cuda_is_available();
void *cuda_malloc(uint64_t size, uint32_t gpu_index);
void *cuda_malloc_with_size_tracking_async(uint64_t size, cudaStream_t stream,
uint32_t gpu_index,
uint64_t &size_tracker,
bool allocate_gpu_memory);
void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
bool cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
uint64_t cuda_device_total_memory(uint32_t gpu_index);
void cuda_memcpy_with_size_tracking_async_to_gpu(void *dest, const void *src,
uint64_t size,
cudaStream_t stream,
uint32_t gpu_index,
bool gpu_memory_allocated);
void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
cudaStream_t stream, uint32_t gpu_index);
void cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
void *dest, void const *src, uint64_t size, cudaStream_t stream,
uint32_t gpu_index, bool gpu_memory_allocated);
void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
cudaStream_t stream, uint32_t gpu_index);
void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
uint32_t gpu_index);
void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
cudaStream_t stream, uint32_t gpu_index);
void cuda_memset_with_size_tracking_async(void *dest, uint64_t val,
uint64_t size, cudaStream_t stream,
uint32_t gpu_index,
bool gpu_memory_allocated);
void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
cudaStream_t stream, uint32_t gpu_index);
int cuda_get_number_of_gpus();
int cuda_get_number_of_sms();
void cuda_synchronize_device(uint32_t gpu_index);
void cuda_drop(void *ptr, uint32_t gpu_index);
void cuda_drop_with_size_tracking_async(void *ptr, cudaStream_t stream,
uint32_t gpu_index,
bool gpu_memory_allocated);
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index);
}
uint32_t cuda_get_max_shared_memory(uint32_t gpu_index);
uint32_t cuda_get_max_shared_memory_per_block(uint32_t gpu_index);
bool cuda_check_support_cooperative_groups();
bool cuda_check_support_thread_block_clusters();
template <typename Torus>
void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
Torus *d_array, Torus value, Torus n);
#endif

View File

@@ -1,16 +0,0 @@
#ifndef HELPER_PROFILE
#define HELPER_PROFILE
#ifdef USE_NVTOOLS
#include <nvtx3/nvToolsExt.h>
#endif
void cuda_nvtx_label_with_color(const char *name);
void cuda_nvtx_pop();
#define PUSH_RANGE(name) \
{ cuda_nvtx_label_with_color(name); }
#define POP_RANGE() \
{ cuda_nvtx_pop(); }
#endif

View File

@@ -97,23 +97,28 @@ size_t pippenger_scratch_size_g2(uint32_t n, uint32_t gpu_index);
// d_scalars: Device pointer to input BigInt scalars (array of n scalars)
// n: Number of points/scalars
// d_scratch: Caller-provided device scratch buffer for intermediate results
// size_tracker: Reference for tracking GPU memory allocation sizes
void point_msm_g1_async(cudaStream_t stream, uint32_t gpu_index,
G1Projective *h_result, const G1Affine *d_points,
const Scalar *d_scalars, uint32_t n,
G1Projective *d_scratch);
G1Projective *d_scratch, uint64_t &size_tracker,
bool gpu_memory_allocated);
void point_msm_g1(cudaStream_t stream, uint32_t gpu_index,
G1Projective *h_result, const G1Affine *d_points,
const Scalar *d_scalars, uint32_t n, G1Projective *d_scratch);
const Scalar *d_scalars, uint32_t n, G1Projective *d_scratch,
uint64_t &size_tracker, bool gpu_memory_allocated);
// MSM for G2 points with BigInt scalars (projective result)
// Result is written directly to a host pointer.
void point_msm_g2_async(cudaStream_t stream, uint32_t gpu_index,
G2ProjectivePoint *h_result, const G2Point *d_points,
const Scalar *d_scalars, uint32_t n,
G2ProjectivePoint *d_scratch);
G2ProjectivePoint *d_scratch, uint64_t &size_tracker,
bool gpu_memory_allocated);
void point_msm_g2(cudaStream_t stream, uint32_t gpu_index,
G2ProjectivePoint *h_result, const G2Point *d_points,
const Scalar *d_scalars, uint32_t n,
G2ProjectivePoint *d_scratch);
G2ProjectivePoint *d_scratch, uint64_t &size_tracker,
bool gpu_memory_allocated);

View File

@@ -1,43 +0,0 @@
#include "helper_profile.cuh"
#include <stdint.h>
uint32_t adler32(const unsigned char *data) {
const uint32_t MOD_ADLER = 65521;
uint32_t a = 1, b = 0;
size_t index;
for (index = 0; data[index] != 0; ++index) {
a = (a + data[index] * 2) % MOD_ADLER;
b = (b + a) % MOD_ADLER;
}
return (b << 16) | a;
}
void cuda_nvtx_label_with_color(const char *name) {
#ifdef USE_NVTOOLS
int color_id = adler32((const unsigned char *)name);
int r, g, b;
r = color_id & 0x000000ff;
g = (color_id & 0x000ff000) >> 12;
b = (color_id & 0x0ff00000) >> 20;
if (r < 64 & g < 64 & b < 64) {
r = r * 3;
g = g * 3 + 64;
b = b * 4;
}
color_id = 0xff000000 | (r << 16) | (g << 8) | (b);
nvtxEventAttributes_t eventAttrib = {0};
eventAttrib.version = NVTX_VERSION;
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
eventAttrib.colorType = NVTX_COLOR_ARGB;
eventAttrib.color = color_id;
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
eventAttrib.message.ascii = name;
nvtxRangePushEx(&eventAttrib);
#endif
}
void cuda_nvtx_pop() {
#ifdef USE_NVTOOLS
nvtxRangePop();
#endif
}

View File

@@ -8,16 +8,17 @@
// Multi-Scalar Multiplication (MSM) using Pippenger algorithm for BLS12-446
// Forward declarations for Pippenger implementations
void point_msm_g1_pippenger_async(cudaStream_t stream, uint32_t gpu_index,
G1Projective *h_result,
const G1Affine *d_points,
const Scalar *d_scalars, uint32_t n,
G1Projective *d_scratch);
void point_msm_g1_pippenger_async(
cudaStream_t stream, uint32_t gpu_index, G1Projective *h_result,
const G1Affine *d_points, const Scalar *d_scalars, uint32_t n,
G1Projective *d_scratch, uint64_t &size_tracker, bool gpu_memory_allocated);
void point_msm_g2_pippenger_async(cudaStream_t stream, uint32_t gpu_index,
G2ProjectivePoint *h_result,
const G2Point *d_points,
const Scalar *d_scalars, uint32_t n,
G2ProjectivePoint *d_scratch);
G2ProjectivePoint *d_scratch,
uint64_t &size_tracker,
bool gpu_memory_allocated);
// ============================================================================
// Public MSM API for BigInt scalars
@@ -28,9 +29,11 @@ void point_msm_g2_pippenger_async(cudaStream_t stream, uint32_t gpu_index,
void point_msm_g1_async(cudaStream_t stream, uint32_t gpu_index,
G1Projective *h_result, const G1Affine *d_points,
const Scalar *d_scalars, uint32_t n,
G1Projective *d_scratch) {
G1Projective *d_scratch, uint64_t &size_tracker,
bool gpu_memory_allocated) {
point_msm_g1_pippenger_async(stream, gpu_index, h_result, d_points, d_scalars,
n, d_scratch);
n, d_scratch, size_tracker,
gpu_memory_allocated);
}
// MSM with BigInt scalars for G2 (projective coordinates internally)
@@ -38,17 +41,19 @@ void point_msm_g1_async(cudaStream_t stream, uint32_t gpu_index,
void point_msm_g2_async(cudaStream_t stream, uint32_t gpu_index,
G2ProjectivePoint *h_result, const G2Point *d_points,
const Scalar *d_scalars, uint32_t n,
G2ProjectivePoint *d_scratch) {
G2ProjectivePoint *d_scratch, uint64_t &size_tracker,
bool gpu_memory_allocated) {
point_msm_g2_pippenger_async(stream, gpu_index, h_result, d_points, d_scalars,
n, d_scratch);
n, d_scratch, size_tracker,
gpu_memory_allocated);
}
void point_msm_g1(cudaStream_t stream, uint32_t gpu_index,
G1Projective *h_result, const G1Affine *d_points,
const Scalar *d_scalars, uint32_t n,
G1Projective *d_scratch) {
const Scalar *d_scalars, uint32_t n, G1Projective *d_scratch,
uint64_t &size_tracker, bool gpu_memory_allocated) {
point_msm_g1_async(stream, gpu_index, h_result, d_points, d_scalars, n,
d_scratch);
d_scratch, size_tracker, gpu_memory_allocated);
// The async impl already syncs internally before the CPU-side Horner phase,
// so the stream is idle here. This sync is kept for defensive correctness.
cuda_synchronize_stream(stream, gpu_index);
@@ -57,9 +62,10 @@ void point_msm_g1(cudaStream_t stream, uint32_t gpu_index,
void point_msm_g2(cudaStream_t stream, uint32_t gpu_index,
G2ProjectivePoint *h_result, const G2Point *d_points,
const Scalar *d_scalars, uint32_t n,
G2ProjectivePoint *d_scratch) {
G2ProjectivePoint *d_scratch, uint64_t &size_tracker,
bool gpu_memory_allocated) {
point_msm_g2_async(stream, gpu_index, h_result, d_points, d_scalars, n,
d_scratch);
d_scratch, size_tracker, gpu_memory_allocated);
// See comment in point_msm_g1 above.
cuda_synchronize_stream(stream, gpu_index);
}

View File

@@ -493,13 +493,12 @@ void horner_combine_cpu(ProjectiveType &result,
// window sums. The caller is responsible for allocating and freeing this
// buffer.
template <typename AffineType, typename ProjectiveType>
void point_msm_pippenger_impl_async(cudaStream_t stream, uint32_t gpu_index,
ProjectiveType *h_result,
const AffineType *d_points,
const Scalar *d_scalars, uint32_t n,
uint32_t threads_per_block,
uint32_t window_size, uint32_t bucket_count,
ProjectiveType *d_scratch) {
void point_msm_pippenger_impl_async(
cudaStream_t stream, uint32_t gpu_index, ProjectiveType *h_result,
const AffineType *d_points, const Scalar *d_scalars, uint32_t n,
uint32_t threads_per_block, uint32_t window_size, uint32_t bucket_count,
ProjectiveType *d_scratch, uint64_t &size_tracker,
bool gpu_memory_allocated) {
using ProjectivePoint = Projective<ProjectiveType>;
if (n == 0) {
@@ -706,13 +705,16 @@ void point_msm_g1_pippenger_async(cudaStream_t stream, uint32_t gpu_index,
G1Projective *h_result,
const G1Affine *d_points,
const Scalar *d_scalars, uint32_t n,
G1Projective *d_scratch) {
G1Projective *d_scratch,
uint64_t &size_tracker,
bool gpu_memory_allocated) {
uint32_t window_size, bucket_count;
get_g1_window_params(n, window_size, bucket_count);
point_msm_pippenger_impl_async<G1Affine, G1Projective>(
stream, gpu_index, h_result, d_points, d_scalars, n,
msm_threads_per_block<G1Affine>(n), window_size, bucket_count, d_scratch);
msm_threads_per_block<G1Affine>(n), window_size, bucket_count, d_scratch,
size_tracker, gpu_memory_allocated);
}
// MSM with BigInt scalars for G2 (projective coordinates internally)
@@ -722,11 +724,14 @@ void point_msm_g2_pippenger_async(cudaStream_t stream, uint32_t gpu_index,
G2ProjectivePoint *h_result,
const G2Point *d_points,
const Scalar *d_scalars, uint32_t n,
G2ProjectivePoint *d_scratch) {
G2ProjectivePoint *d_scratch,
uint64_t &size_tracker,
bool gpu_memory_allocated) {
uint32_t window_size, bucket_count;
get_g2_window_params(n, window_size, bucket_count);
point_msm_pippenger_impl_async<G2Point, G2ProjectivePoint>(
stream, gpu_index, h_result, d_points, d_scalars, n,
msm_threads_per_block<G2Point>(n), window_size, bucket_count, d_scratch);
msm_threads_per_block<G2Point>(n), window_size, bucket_count, d_scratch,
size_tracker, gpu_memory_allocated);
}

View File

@@ -187,82 +187,37 @@ __host__ __device__ void fp_copy(Fp &dst, const Fp &src) {
// "Raw" means without modular reduction - performs a + b and returns carry.
// This is an internal helper used by fp_add() which handles reduction.
__host__ __device__ UNSIGNED_LIMB fp_add_raw(Fp &c, const Fp &a, const Fp &b) {
#if defined(__CUDA_ARCH__) && LIMB_BITS_CONFIG == 64
// PTX carry-chain: add.cc sets the hardware carry flag, addc.cc propagates
// it. This replaces 2 software carry-detect comparisons per limb (~14 extra
// instructions across 7 limbs) with zero-cost hardware flag propagation.
uint64_t carry_out;
asm("add.cc.u64 %0, %8, %15;\n\t" // c[0] = a[0] + b[0], set CF
"addc.cc.u64 %1, %9, %16;\n\t" // c[1] = a[1] + b[1] + CF
"addc.cc.u64 %2, %10, %17;\n\t" // c[2] = a[2] + b[2] + CF
"addc.cc.u64 %3, %11, %18;\n\t" // c[3] = a[3] + b[3] + CF
"addc.cc.u64 %4, %12, %19;\n\t" // c[4] = a[4] + b[4] + CF
"addc.cc.u64 %5, %13, %20;\n\t" // c[5] = a[5] + b[5] + CF
"addc.cc.u64 %6, %14, %21;\n\t" // c[6] = a[6] + b[6] + CF
"addc.u64 %7, 0, 0;\n\t" // carry_out = 0 + 0 + CF
: "=l"(c.limb[0]), "=l"(c.limb[1]), "=l"(c.limb[2]), "=l"(c.limb[3]),
"=l"(c.limb[4]), "=l"(c.limb[5]), "=l"(c.limb[6]), "=l"(carry_out)
: "l"(a.limb[0]), "l"(a.limb[1]), "l"(a.limb[2]), "l"(a.limb[3]),
"l"(a.limb[4]), "l"(a.limb[5]), "l"(a.limb[6]), "l"(b.limb[0]),
"l"(b.limb[1]), "l"(b.limb[2]), "l"(b.limb[3]), "l"(b.limb[4]),
"l"(b.limb[5]), "l"(b.limb[6]));
return carry_out;
#else
// Host path: portable software carry detection
UNSIGNED_LIMB carry = 0;
for (int i = 0; i < FP_LIMBS; i++) {
// Add with carry: c = a + b + carry
UNSIGNED_LIMB sum = a.limb[i] + carry;
carry = (sum < a.limb[i]) ? 1 : 0;
carry = (sum < a.limb[i]) ? 1 : 0; // Check for overflow
sum += b.limb[i];
carry += (sum < b.limb[i]) ? 1 : 0;
carry += (sum < b.limb[i]) ? 1 : 0; // Check for overflow
c.limb[i] = sum;
}
return carry;
#endif
}
// Subtraction with borrow propagation
// "Raw" means without modular reduction - performs a - b and returns borrow.
// This is an internal helper used by fp_sub() which handles reduction.
__host__ __device__ UNSIGNED_LIMB fp_sub_raw(Fp &c, const Fp &a, const Fp &b) {
#if defined(__CUDA_ARCH__) && LIMB_BITS_CONFIG == 64
// PTX borrow-chain: sub.cc sets the hardware borrow flag, subc.cc propagates
// it. Same benefit as fp_add_raw -- eliminates 2 comparisons per limb.
uint64_t borrow_out;
asm("sub.cc.u64 %0, %8, %15;\n\t" // c[0] = a[0] - b[0], set CF
"subc.cc.u64 %1, %9, %16;\n\t" // c[1] = a[1] - b[1] - CF
"subc.cc.u64 %2, %10, %17;\n\t" // c[2] = a[2] - b[2] - CF
"subc.cc.u64 %3, %11, %18;\n\t" // c[3] = a[3] - b[3] - CF
"subc.cc.u64 %4, %12, %19;\n\t" // c[4] = a[4] - b[4] - CF
"subc.cc.u64 %5, %13, %20;\n\t" // c[5] = a[5] - b[5] - CF
"subc.cc.u64 %6, %14, %21;\n\t" // c[6] = a[6] - b[6] - CF
"subc.u64 %7, 0, 0;\n\t" // borrow_out = 0 - 0 - CF
: "=l"(c.limb[0]), "=l"(c.limb[1]), "=l"(c.limb[2]), "=l"(c.limb[3]),
"=l"(c.limb[4]), "=l"(c.limb[5]), "=l"(c.limb[6]), "=l"(borrow_out)
: "l"(a.limb[0]), "l"(a.limb[1]), "l"(a.limb[2]), "l"(a.limb[3]),
"l"(a.limb[4]), "l"(a.limb[5]), "l"(a.limb[6]), "l"(b.limb[0]),
"l"(b.limb[1]), "l"(b.limb[2]), "l"(b.limb[3]), "l"(b.limb[4]),
"l"(b.limb[5]), "l"(b.limb[6]));
// subc.u64 with 0-0-CF produces 0 if no borrow, or 0xFFFFFFFFFFFFFFFF if
// borrow. Normalize to 0/1 for callers that check (borrow != 0) or add it.
return borrow_out & 1;
#else
// Host path: portable software borrow detection
UNSIGNED_LIMB borrow = 0;
for (int i = 0; i < FP_LIMBS; i++) {
// Subtract with borrow: c = a - b - borrow
UNSIGNED_LIMB diff = a.limb[i] - borrow;
borrow = (diff > a.limb[i]) ? 1 : 0;
borrow = (diff > a.limb[i]) ? 1 : 0; // Check for underflow
UNSIGNED_LIMB old_diff = diff;
diff -= b.limb[i];
borrow += (diff > old_diff) ? 1 : 0;
borrow += (diff > old_diff) ? 1 : 0; // Check for underflow
c.limb[i] = diff;
}
return borrow;
#endif
}
// Addition with modular reduction: c = (a + b) mod p
@@ -271,27 +226,7 @@ __host__ __device__ void fp_add(Fp &c, const Fp &a, const Fp &b) {
Fp sum;
UNSIGNED_LIMB carry = fp_add_raw(sum, a, b);
#if defined(__CUDA_ARCH__) && LIMB_BITS_CONFIG == 64
// Branchless reduction: always compute sum - p, then select based on
// whether reduction was needed. This avoids divergent branches that stall
// warps when some threads need reduction and others don't.
//
// Decision logic:
// carry=1 -> sum overflowed 448 bits, definitely >= p -> use reduced
// carry=0, borrow=0 -> sum >= p in 448 bits -> use reduced
// carry=0, borrow=1 -> sum < p -> use original sum
// So: use_original = (!carry) & borrow
Fp reduced;
UNSIGNED_LIMB borrow = fp_sub_raw(reduced, sum, fp_modulus());
UNSIGNED_LIMB use_original = ((carry ^ 1) & borrow);
UNSIGNED_LIMB mask =
-use_original; // all-ones if keep sum, all-zeros if keep reduced
for (int i = 0; i < FP_LIMBS; i++) {
c.limb[i] = (sum.limb[i] & mask) | (reduced.limb[i] & ~mask);
}
#else
// Host path: branching is fine on CPU (branch predictor handles it well)
// If there's a carry or sum >= MODULUS, we need to reduce
const Fp &p = fp_modulus();
if (carry || fp_cmp(sum, p) != ComparisonType::Less) {
Fp reduced;
@@ -300,7 +235,6 @@ __host__ __device__ void fp_add(Fp &c, const Fp &a, const Fp &b) {
} else {
fp_copy(c, sum);
}
#endif
}
// Subtraction with modular reduction: c = (a - b) mod p
@@ -309,28 +243,13 @@ __host__ __device__ void fp_sub(Fp &c, const Fp &a, const Fp &b) {
Fp diff;
UNSIGNED_LIMB borrow = fp_sub_raw(diff, a, b);
#if defined(__CUDA_ARCH__) && LIMB_BITS_CONFIG == 64
// Branchless correction: always compute diff + p, select based on borrow.
// Same rationale as fp_add -- avoids warp divergence.
// borrow=1 -> a < b, need to add p -> use corrected
// borrow=0 -> a >= b, result is valid -> use diff
Fp corrected;
fp_add_raw(corrected, diff, fp_modulus());
UNSIGNED_LIMB mask =
-borrow; // all-ones if borrow (use corrected), all-zeros if not
for (int i = 0; i < FP_LIMBS; i++) {
c.limb[i] = (corrected.limb[i] & mask) | (diff.limb[i] & ~mask);
}
#else
// Host path: branching is fine on CPU
// If there was a borrow, we need to add MODULUS
const Fp &p = fp_modulus();
if (borrow) {
fp_add_raw(c, diff, p);
} else {
fp_copy(c, diff);
}
#endif
}
// Small-constant multiplication via addition chains.
@@ -534,223 +453,23 @@ __host__ __device__ void fp_mont_reduce(Fp &c, const UNSIGNED_LIMB *a) {
}
}
// ============================================================================
// PTX-accelerated CIOS Montgomery multiplication (device path)
// ============================================================================
// The CIOS algorithm for 7 x 64-bit limbs executes 98 multiply-accumulate
// steps across 7 outer iterations. Each step computes:
// (carry, t[j]) = t[j] + a[j] * b_i + carry
// which is a 64x64->128 multiply plus a three-operand addition with carry.
//
// The C++ path uses software carry detection: carry = (sum < old) ? 1 : 0.
// The PTX path below uses hardware carry flags via the .cc suffix:
// - mul.lo.u64 / mul.hi.u64 : 64x64->128 wide multiply
// - add.cc.u64 / addc.u64 : addition chain with hardware carry flag
//
// Each multiply-accumulate step uses 6 PTX instructions instead of ~10+ in
// the software-carry version. The 7 outer iterations are fully unrolled, and
// the limb-shift loop (t[j] = t[j+1]) is eliminated by register renaming.
//
// REGISTER ALIASING NOTE: All PTX temporaries (_lo, _hi) are declared as
// .reg inside the asm block. This prevents nvcc's register allocator from
// aliasing them with C operands (t_j, carry), which was the root cause of
// previous correctness bugs where "+l" outputs could share registers with
// "l" inputs in the same asm statement.
// ============================================================================
#ifdef __CUDA_ARCH__
#if LIMB_BITS_CONFIG == 64
// Multiply-accumulate one limb: (carry_out, t_j) = t_j + a_j * b_i + carry_in
//
// All intermediates (_lo, _hi) are PTX .reg temporaries inside a { } scope
// block to avoid: (1) nvcc register aliasing between C operands, and (2)
// duplicate .reg definitions when the macro is expanded multiple times.
// The 6-instruction sequence:
// mul.lo.u64 _lo, a_j, b_i -- low 64 bits of product
// mul.hi.u64 _hi, a_j, b_i -- high 64 bits of product
// add.cc.u64 t_j, t_j, _lo -- t_j += _lo, set CF
// addc.u64 _hi, _hi, 0 -- _hi += CF
// add.cc.u64 t_j, t_j, carry -- t_j += carry_in, set CF
// addc.u64 carry, _hi, 0 -- carry_out = _hi + CF
#define LIMB_MACC(t_j, carry, a_j, b_i) \
asm volatile("{\n\t" \
".reg .u64 _lo, _hi;\n\t" \
"mul.lo.u64 _lo, %2, %3;\n\t" \
"mul.hi.u64 _hi, %2, %3;\n\t" \
"add.cc.u64 %0, %0, _lo;\n\t" \
"addc.u64 _hi, _hi, 0;\n\t" \
"add.cc.u64 %0, %0, %1;\n\t" \
"addc.u64 %1, _hi, 0;\n\t" \
"}\n\t" \
: "+l"(t_j), "+l"(carry) \
: "l"(a_j), "l"(b_i))
// Single CIOS iteration: multiply-accumulate, reduce, and shift.
//
// Computes:
// 1. t += a * b_i (7 limb multiply-accumulate with carry chain)
// 2. m = t[0] * p_prime (Montgomery reduction factor)
// 3. t += m * p (reduction, zeros out t[0])
// 4. Shift t right by one limb (via register renaming into r0..r7)
//
// The macro lets the compiler allocate registers across all 7 unrolled
// iterations, avoiding spills to local memory.
#define CIOS_ITERATION_PTX(t0, t1, t2, t3, t4, t5, t6, t7, a0, a1, a2, a3, a4, \
a5, a6, b_i, p0, p1, p2, p3, p4, p5, p6, p_prime, \
r0, r1, r2, r3, r4, r5, r6, r7) \
do { \
uint64_t _carry = 0; \
/* Step 1: t += a * b_i */ \
LIMB_MACC(t0, _carry, a0, b_i); \
LIMB_MACC(t1, _carry, a1, b_i); \
LIMB_MACC(t2, _carry, a2, b_i); \
LIMB_MACC(t3, _carry, a3, b_i); \
LIMB_MACC(t4, _carry, a4, b_i); \
LIMB_MACC(t5, _carry, a5, b_i); \
LIMB_MACC(t6, _carry, a6, b_i); \
/* Accumulate final carry into overflow limb t7 */ \
uint64_t _overflow; \
asm("add.cc.u64 %0, %0, %2;\n\t" \
"addc.u64 %1, 0, 0;\n\t" \
: "+l"(t7), "=l"(_overflow) \
: "l"(_carry)); \
\
/* Step 2: m = t0 * p_prime mod 2^64 */ \
uint64_t _m = t0 * p_prime; \
\
/* Step 3: t += m * p (zeros out t0) */ \
_carry = 0; \
LIMB_MACC(t0, _carry, _m, p0); \
LIMB_MACC(t1, _carry, _m, p1); \
LIMB_MACC(t2, _carry, _m, p2); \
LIMB_MACC(t3, _carry, _m, p3); \
LIMB_MACC(t4, _carry, _m, p4); \
LIMB_MACC(t5, _carry, _m, p5); \
LIMB_MACC(t6, _carry, _m, p6); \
/* Finalize overflow: t7 = t7 + _carry + _overflow */ \
/* Plain adds (no carry chain) -- the CIOS invariant guarantees this */ \
/* sum fits in 64 bits so intermediate overflow does not matter. */ \
t7 += _carry; \
t7 += _overflow; \
\
/* Step 4: Shift right by one limb via register renaming */ \
/* t0 is now zero (by construction of m), discard it */ \
r0 = t1; \
r1 = t2; \
r2 = t3; \
r3 = t4; \
r4 = t5; \
r5 = t6; \
r6 = t7; \
r7 = 0; \
} while (0)
__device__ __noinline__ void fp_mont_mul_cios_ptx(Fp &c, const Fp &a,
const Fp &b) {
const uint64_t p0 = DEVICE_MODULUS.limb[0];
const uint64_t p1 = DEVICE_MODULUS.limb[1];
const uint64_t p2 = DEVICE_MODULUS.limb[2];
const uint64_t p3 = DEVICE_MODULUS.limb[3];
const uint64_t p4 = DEVICE_MODULUS.limb[4];
const uint64_t p5 = DEVICE_MODULUS.limb[5];
const uint64_t p6 = DEVICE_MODULUS.limb[6];
const uint64_t pp = DEVICE_P_PRIME;
const uint64_t a0 = a.limb[0], a1 = a.limb[1], a2 = a.limb[2];
const uint64_t a3 = a.limb[3], a4 = a.limb[4], a5 = a.limb[5];
const uint64_t a6 = a.limb[6];
// Accumulator: 7 limbs + 1 overflow, initialized to zero
uint64_t t0 = 0, t1 = 0, t2 = 0, t3 = 0;
uint64_t t4 = 0, t5 = 0, t6 = 0, t7 = 0;
// 7 fully-unrolled CIOS iterations with register renaming for the shift.
// Each iteration processes one limb of b, accumulates a*b[i], reduces,
// and shifts. The output registers become the input for the next iteration.
CIOS_ITERATION_PTX(t0, t1, t2, t3, t4, t5, t6, t7, a0, a1, a2, a3, a4, a5, a6,
b.limb[0], p0, p1, p2, p3, p4, p5, p6, pp, t0, t1, t2, t3,
t4, t5, t6, t7);
CIOS_ITERATION_PTX(t0, t1, t2, t3, t4, t5, t6, t7, a0, a1, a2, a3, a4, a5, a6,
b.limb[1], p0, p1, p2, p3, p4, p5, p6, pp, t0, t1, t2, t3,
t4, t5, t6, t7);
CIOS_ITERATION_PTX(t0, t1, t2, t3, t4, t5, t6, t7, a0, a1, a2, a3, a4, a5, a6,
b.limb[2], p0, p1, p2, p3, p4, p5, p6, pp, t0, t1, t2, t3,
t4, t5, t6, t7);
CIOS_ITERATION_PTX(t0, t1, t2, t3, t4, t5, t6, t7, a0, a1, a2, a3, a4, a5, a6,
b.limb[3], p0, p1, p2, p3, p4, p5, p6, pp, t0, t1, t2, t3,
t4, t5, t6, t7);
CIOS_ITERATION_PTX(t0, t1, t2, t3, t4, t5, t6, t7, a0, a1, a2, a3, a4, a5, a6,
b.limb[4], p0, p1, p2, p3, p4, p5, p6, pp, t0, t1, t2, t3,
t4, t5, t6, t7);
CIOS_ITERATION_PTX(t0, t1, t2, t3, t4, t5, t6, t7, a0, a1, a2, a3, a4, a5, a6,
b.limb[5], p0, p1, p2, p3, p4, p5, p6, pp, t0, t1, t2, t3,
t4, t5, t6, t7);
CIOS_ITERATION_PTX(t0, t1, t2, t3, t4, t5, t6, t7, a0, a1, a2, a3, a4, a5, a6,
b.limb[6], p0, p1, p2, p3, p4, p5, p6, pp, t0, t1, t2, t3,
t4, t5, t6, t7);
// Final reduction: if t[0..7] >= p (extended to 8 limbs), subtract p.
// Compute (t[0..6] - p[0..6]) with borrow, then subtract borrow from t7.
// If t7 after subtraction is non-negative, the reduced result is valid;
// otherwise the original t[0..6] is already in [0, p).
uint64_t r0, r1, r2, r3, r4, r5, r6, mask;
asm("sub.cc.u64 %0, %8, %15;\n\t" // r0 = t0 - p0
"subc.cc.u64 %1, %9, %16;\n\t" // r1 = t1 - p1 - borrow
"subc.cc.u64 %2, %10, %17;\n\t" // r2 = t2 - p2 - borrow
"subc.cc.u64 %3, %11, %18;\n\t" // r3 = t3 - p3 - borrow
"subc.cc.u64 %4, %12, %19;\n\t" // r4 = t4 - p4 - borrow
"subc.cc.u64 %5, %13, %20;\n\t" // r5 = t5 - p5 - borrow
"subc.cc.u64 %6, %14, %21;\n\t" // r6 = t6 - p6 - borrow
"subc.u64 %7, %22, 0;\n\t" // mask_src = t7 - 0 - borrow
"shr.s64 %7, %7, 63;\n\t" // mask = sign-extend: -1 if negative, 0 if
// >= 0
: "=l"(r0), "=l"(r1), "=l"(r2), "=l"(r3), "=l"(r4), "=l"(r5), "=l"(r6),
"=l"(mask)
: "l"(t0), "l"(t1), "l"(t2), "l"(t3), "l"(t4), "l"(t5), "l"(t6), "l"(p0),
"l"(p1), "l"(p2), "l"(p3), "l"(p4), "l"(p5), "l"(p6), "l"(t7));
// Branchless selection:
// mask = 0 -> t >= p (use reduced r[0..6])
// mask = -1 -> t < p (keep original t[0..6])
c.limb[0] = (t0 & mask) | (r0 & ~mask);
c.limb[1] = (t1 & mask) | (r1 & ~mask);
c.limb[2] = (t2 & mask) | (r2 & ~mask);
c.limb[3] = (t3 & mask) | (r3 & ~mask);
c.limb[4] = (t4 & mask) | (r4 & ~mask);
c.limb[5] = (t5 & mask) | (r5 & ~mask);
c.limb[6] = (t6 & mask) | (r6 & ~mask);
}
#undef LIMB_MACC
#undef CIOS_ITERATION_PTX
#endif // LIMB_BITS_CONFIG == 64
#endif // __CUDA_ARCH__
// CIOS (Coarsely Integrated Operand Scanning) Montgomery multiplication
// Fuses multiplication and reduction in a single pass for better efficiency.
// Uses only FP_LIMBS+1 limbs of working space instead of 2*FP_LIMBS.
// Both a and b are in Montgomery form, result is in Montgomery form.
__host__ __device__ void fp_mont_mul_cios(Fp &c, const Fp &a, const Fp &b) {
#if defined(__CUDA_ARCH__) && LIMB_BITS_CONFIG == 64
// Device path: fully unrolled PTX with hardware carry flags
fp_mont_mul_cios_ptx(c, a, b);
#else
// Host path: portable C++ implementation
const Fp &p = fp_modulus();
UNSIGNED_LIMB p_prime = fp_p_prime();
// Working array: only n+1 limbs needed (vs 2n for separate mul+reduce)
UNSIGNED_LIMB t[FP_LIMBS + 1];
#ifdef __CUDA_ARCH__
for (int i = 0; i < FP_LIMBS + 1; i++) {
t[i] = 0;
}
#else
memset(t, 0, (FP_LIMBS + 1) * sizeof(UNSIGNED_LIMB));
#endif
// Main CIOS loop: for each limb of b
for (int i = 0; i < FP_LIMBS; i++) {
@@ -810,7 +529,14 @@ __host__ __device__ void fp_mont_mul_cios(Fp &c, const Fp &a, const Fp &b) {
}
// Copy result to output
#ifdef __CUDA_ARCH__
#pragma unroll
for (int i = 0; i < FP_LIMBS; i++) {
c.limb[i] = t[i];
}
#else
memcpy(&c.limb[0], t, FP_LIMBS * sizeof(UNSIGNED_LIMB));
#endif
// Final reduction: if result >= p or there's overflow, subtract p
if (t[FP_LIMBS] != 0 || fp_cmp(c, p) != ComparisonType::Less) {
@@ -819,7 +545,6 @@ __host__ __device__ void fp_mont_mul_cios(Fp &c, const Fp &a, const Fp &b) {
fp_copy(c, reduced);
}
// Result is in Montgomery form
#endif
}
// Montgomery multiplication: c = (a * b * R_INV) mod p

View File

@@ -23,8 +23,7 @@ set(ZK_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
set(ZK_PRIMITIVES_DIR ${ZK_SRC_DIR}/primitives)
# Build device library from tfhe-cuda-backend
add_library(tfhe_device_bench STATIC ${TFHE_CUDA_BACKEND_DIR}/src/device.cu
${TFHE_CUDA_BACKEND_DIR}/src/utils/helper_profile.cu)
add_library(tfhe_device_bench STATIC ${TFHE_CUDA_BACKEND_DIR}/src/device.cu)
set_target_properties(
tfhe_device_bench
PROPERTIES CUDA_SEPARABLE_COMPILATION ON

View File

@@ -140,14 +140,14 @@ static void BM_G1_MSM(benchmark::State &state) {
// Warm-up iterations
for (int i = 0; i < WARMUP_ITERATIONS; i++) {
point_msm_g1_async(g_benchmark_stream, g_gpu_index, &h_result, d_points,
d_scalars, n, d_scratch);
d_scalars, n, d_scratch, size_tracker, true);
}
cuda_synchronize_stream(g_benchmark_stream, g_gpu_index);
// Benchmark loop: only measure the MSM computation, no memory operations
for (auto _ : state) {
point_msm_g1_async(g_benchmark_stream, g_gpu_index, &h_result, d_points,
d_scalars, n, d_scratch);
d_scalars, n, d_scratch, size_tracker, true);
benchmark::ClobberMemory();
}
@@ -221,14 +221,14 @@ static void BM_G2_MSM(benchmark::State &state) {
// Warm-up iterations
for (int i = 0; i < WARMUP_ITERATIONS; i++) {
point_msm_g2_async(g_benchmark_stream, g_gpu_index, &h_result, d_points,
d_scalars, n, d_scratch);
d_scalars, n, d_scratch, size_tracker, true);
}
cuda_synchronize_stream(g_benchmark_stream, g_gpu_index);
// Benchmark loop: only measure the MSM computation, no memory operations
for (auto _ : state) {
point_msm_g2_async(g_benchmark_stream, g_gpu_index, &h_result, d_points,
d_scalars, n, d_scratch);
d_scalars, n, d_scratch, size_tracker, true);
benchmark::ClobberMemory();
}

View File

@@ -20,8 +20,7 @@ set(ZK_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
set(ZK_PRIMITIVES_DIR ${ZK_SRC_DIR}/primitives)
# Build device library from tfhe-cuda-backend
add_library(tfhe_device STATIC ${TFHE_CUDA_BACKEND_DIR}/src/device.cu
${TFHE_CUDA_BACKEND_DIR}/src/utils/helper_profile.cu)
add_library(tfhe_device STATIC ${TFHE_CUDA_BACKEND_DIR}/src/device.cu)
set_target_properties(
tfhe_device
PROPERTIES CUDA_SEPARABLE_COMPILATION ON

View File

@@ -13,8 +13,8 @@
// ./build/tests_and_benchmarks/tests/basic/basic_curve_ops
#include "curve.h"
#include "device.h"
#include "fp.h"
#include <cassert>
#include <cstdio>
#include <cstring>
@@ -24,7 +24,7 @@ int main() {
// (non-Montgomery) form. Convert to Montgomery, then lift to projective for
// host-side arithmetic.
const G1Affine &gen_normal = g1_generator();
PANIC_IF_FALSE(!g1_is_infinity(gen_normal), "generator must not be infinity");
assert(!g1_is_infinity(gen_normal));
G1Affine gen_affine = gen_normal;
point_to_montgomery_inplace(gen_affine);
@@ -37,21 +37,21 @@ int main() {
// G + (-G) = identity (Z = 0 in the projective convention)
G1Projective identity = G + neg_G;
PANIC_IF_FALSE(fp_is_zero(identity.Z), "G + (-G) must be identity (Z = 0)");
assert(fp_is_zero(identity.Z));
printf("Negation (-G) and G + (-G) = identity: OK\n");
// ---- Addition: 2*G = G + G, 3*G = 2*G + G ----
G1Projective two_G = G + G;
PANIC_IF_FALSE(!(two_G == G1Projective{}), "2*G must not be identity");
assert(!(two_G == G1Projective())); // not the identity
G1Projective three_G = two_G + G;
PANIC_IF_FALSE(!(three_G == G1Projective{}), "3*G must not be identity");
assert(!(three_G == G1Projective()));
printf("Addition (2*G, 3*G): OK\n");
// ---- Compound assignment: G += G ----
G1Projective acc = G;
acc += G; // acc = 2*G
PANIC_IF_FALSE(acc == two_G, "G += G must equal 2*G");
assert(acc == two_G);
printf("Compound assignment (+=): OK\n");
// ---- Scalar multiplication: 3*G using Scalar type ----
@@ -61,22 +61,19 @@ int main() {
scalar_3.limb[0] = 3;
G1Projective three_G_via_scalar = G * scalar_3;
PANIC_IF_FALSE(!(three_G_via_scalar == G1Projective{}),
"3*G via scalar must not be identity");
assert(!(three_G_via_scalar == G1Projective()));
// Normalise both to Z = 1 (Montgomery) before comparing coordinates.
normalize_projective_g1(three_G);
normalize_projective_g1(three_G_via_scalar);
PANIC_IF_FALSE(three_G == three_G_via_scalar,
"3*G via addition must equal 3*G via scalar multiply");
assert(three_G == three_G_via_scalar);
printf("Scalar multiplication (3*G == G + G + G): OK\n");
// ---- Projective -> affine conversion ----
// projective_to_affine_g1 keeps coordinates in Montgomery form.
G1Affine three_G_affine;
projective_to_affine_g1(three_G_affine, three_G);
PANIC_IF_FALSE(!g1_is_infinity(three_G_affine),
"3*G in affine must not be infinity");
assert(!g1_is_infinity(three_G_affine));
printf("Projective -> affine conversion: OK\n");
// ---- Convert to normal-form coordinates ----
@@ -85,8 +82,7 @@ int main() {
G1Projective result = three_G_via_scalar;
normalize_from_montgomery_g1(
result); // coordinates now in normal (non-Montgomery) form
PANIC_IF_FALSE(!fp_is_zero(result.Z),
"normalized result must have non-zero Z");
assert(!fp_is_zero(result.Z)); // Z = 1 (non-zero)
printf("Conversion to normal-form projective: OK\n");
printf("All G1 curve operations passed.\n");

Some files were not shown because too many files have changed in this diff Show More