chore: fix cleatext -> cleartext typo

chore: custom fft base_n variable rename
- base_n has a different meaning in the tfhe-fft code, to make it less confusing when working on such code, renamed base_n
2026-04-28 03:01:21 -04:00 · 2026-04-27 16:14:50 +02:00 · 2026-04-27 16:14:49 +02:00 · 2026-04-27 14:30:11 +02:00 · 2026-04-27 11:03:30 +02:00 · 2026-04-27 10:29:38 +02:00
274 changed files with 9587 additions and 2993 deletions
--- a/.github/workflows/aws_data_tests.yml
+++ b/.github/workflows/aws_data_tests.yml
@@ -54,7 +54,7 @@ jobs:

      - name: Retrieve data from cache
        id: retrieve-data-cache
-        uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        with:
          path: |
            utils/tfhe-backward-compat-data/**/*.cbor
@@ -89,7 +89,7 @@ jobs:
      - name: Store data in cache
        if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
        continue-on-error: true
-        uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        with:
          path: |
            utils/tfhe-backward-compat-data/**/*.cbor
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -69,7 +69,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            dependencies:
@@ -200,7 +200,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        with:
          path: |
            ~/.nvm
@@ -213,7 +213,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -56,7 +56,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            integer:
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -57,7 +57,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            integer:
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -78,7 +78,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            dependencies:
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -45,7 +45,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            wasm:
@@ -92,7 +92,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        with:
          path: |
            ~/.nvm
@@ -105,7 +105,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
--- a/.github/workflows/backward_compat_pr_change_report.yml
+++ b/.github/workflows/backward_compat_pr_change_report.yml
@@ -34,7 +34,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            backward:
@@ -79,19 +79,11 @@ jobs:
            exit 1
          fi

-      - name: Find existing comment
+      - name: Post/refresh backward-compat report
        if: steps.report.outputs.has_report == 'true'
-        id: find-comment
-        uses: peter-evans/find-comment@b30e6a3c0ed37e7c023ccd3f1db5c6c0b0c23aad # v4.0.0
+        uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0
        with:
-          issue-number: ${{ github.event.pull_request.number }}
-          body-includes: '**Backward-compat snapshot:'
-
-      - name: Comment on PR
-        if: steps.report.outputs.has_report == 'true'
-        uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5.0.0
-        with:
-          comment-id: ${{ steps.find-comment.outputs.comment-id }}
-          issue-number: ${{ github.event.pull_request.number }}
-          body-path: report.md
-          edit-mode: replace
+          header: backward-compat-snapshot
+          hide_and_recreate: true
+          hide_classify: OUTDATED
+          path: report.md
--- a/.github/workflows/benchmark_cpu.yml
+++ b/.github/workflows/benchmark_cpu.yml
@@ -19,7 +19,7 @@ on:
          - shortint_oprf
          - hlapi_unsigned
          - hlapi_signed
-          - hlapi_erc20
+          - hlapi_erc7984
          - hlapi_dex
          - hlapi_noise_squash
          - hlapi_kvstore
@@ -93,8 +93,8 @@ jobs:

          if inputs_command == "integer_zk":
            files_to_parse.append("pke_zk_crs_sizes.csv")
-          elif inputs_command == "hlapi_erc20":
-            files_to_parse.append("erc20_pbs_count.csv")
+          elif inputs_command == "hlapi_erc7984":
+            files_to_parse.append("erc7984_pbs_count.csv")
          elif inputs_command == "hlapi_dex":
            files_to_parse.extend(
              [
--- a/.github/workflows/benchmark_cpu_common.yml
+++ b/.github/workflows/benchmark_cpu_common.yml
@@ -223,7 +223,7 @@ jobs:
          results_type: ${{ inputs.additional_results_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_cpu_weekly.yml
+++ b/.github/workflows/benchmark_cpu_weekly.yml
@@ -108,14 +108,14 @@ jobs:
      SLAB_URL: ${{ secrets.SLAB_URL }}
      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}

-  run-benchmarks-hlapi-erc20:
-    name: benchmark_cpu_weekly/run-benchmarks-hlapi-erc20
+  run-benchmarks-hlapi-erc7984:
+    name: benchmark_cpu_weekly/run-benchmarks-hlapi-erc7984
    if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
    needs: prepare-inputs
    uses: ./.github/workflows/benchmark_cpu_common.yml
    with:
-      command: hlapi_erc20
-      additional_file_to_parse: erc20_pbs_count.csv
+      command: hlapi_erc7984
+      additional_file_to_parse: erc7984_pbs_count.csv
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/benchmark_ct_key_sizes.yml
+++ b/.github/workflows/benchmark_ct_key_sizes.yml
@@ -99,7 +99,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_ct_key_sizes
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_documentation.yml
+++ b/.github/workflows/benchmark_documentation.yml
@@ -17,6 +17,10 @@ on:
        description: "Run GPU core-crypto benchmarks"
        type: boolean
        default: true
+      run-gpu-zk-benchmarks:
+        description: "Run GPU ZK benchmarks"
+        type: boolean
+        default: true
      run-hpu-benchmarks:
        description: "Run HPU benchmarks"
        type: boolean
@@ -36,7 +40,7 @@ jobs:
    uses: ./.github/workflows/benchmark_cpu_common.yml
    if: inputs.run-cpu-benchmarks
    with:
-      command: integer,hlapi_erc20
+      command: integer,hlapi_erc7984
      op_flavor: fast_default
      bench_type: both
      precisions_set: documentation
@@ -91,7 +95,7 @@ jobs:
    with:
      profile: multi-h100-sxm5
      hardware_name: n3-H100-SXM5x8
-      command: integer_multi_bit,hlapi_erc20
+      command: integer_multi_bit,hlapi_erc7984
      op_flavor: fast_default
      bench_type: both
      precisions_set: documentation
@@ -110,7 +114,7 @@ jobs:
    uses: ./.github/workflows/benchmark_hpu_common.yml
    if: inputs.run-hpu-benchmarks
    with:
-      command: integer,hlapi_erc20
+      command: integer,hlapi_erc7984
      op_flavor: default
      bench_type: both
      precisions_set: documentation
@@ -165,21 +169,42 @@ jobs:
      SLAB_URL: ${{ secrets.SLAB_URL }}
      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}

+  run-benchmarks-gpu-zk-server:
+    name: benchmark_documentation/run-benchmarks-gpu-zk-server
+    uses: ./.github/workflows/benchmark_gpu_common.yml
+    if: inputs.run-gpu-zk-benchmarks
+    with:
+      profile: multi-h100-sxm5
+      hardware_name: n3-H100-SXM5x8
+      command: integer_zk
+      op_flavor: default
+      bench_type: both
+    secrets:
+      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
+      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+      JOB_SECRET: ${{ secrets.JOB_SECRET }}
+      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
+      SLAB_URL: ${{ secrets.SLAB_URL }}
+      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
+
  generate-svgs-with-benchmarks-run:
    name: benchmark-documentation/generate-svgs-with-benchmarks-run
    if: ${{ always() &&
-      (inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks ||inputs.run-hpu-benchmarks) &&
+      (inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-gpu-zk-benchmarks || inputs.run-hpu-benchmarks) &&
      inputs.generate-svgs }}
    needs: [
      run-benchmarks-cpu-integer, run-benchmarks-gpu-integer, run-benchmarks-hpu-integer,
      run-benchmarks-cpu-zk-server, run-benchmarks-cpu-zk-client,
-      run-benchmarks-cpu-core-crypto, run-benchmarks-gpu-core-crypto
+      run-benchmarks-cpu-core-crypto, run-benchmarks-gpu-core-crypto,
+      run-benchmarks-gpu-zk-server
    ]
    uses: ./.github/workflows/generate_svgs.yml
    with:
      time_span_days: 5
      generate-cpu-svgs: ${{ inputs.run-cpu-benchmarks }}
-      generate-gpu-svgs: ${{ inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks }}
+      generate-gpu-svgs: ${{ inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-gpu-zk-benchmarks }}
      generate-hpu-svgs: ${{ inputs.run-hpu-benchmarks }}
    secrets:
      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
@@ -188,7 +213,7 @@ jobs:

  generate-svgs-without-benchmarks-run:
    name: benchmark-documentation/generate-svgs-without-benchmarks-run
-    if: ${{ !(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-hpu-benchmarks) &&
+    if: ${{ !(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-gpu-zk-benchmarks || inputs.run-hpu-benchmarks) &&
      inputs.generate-svgs }}
    uses: ./.github/workflows/generate_svgs.yml
    with:
--- a/.github/workflows/benchmark_gpu.yml
+++ b/.github/workflows/benchmark_gpu.yml
@@ -37,7 +37,7 @@ on:
          - integer_zk_experimental
          - integer_aes
          - integer_aes256
-          - hlapi_erc20
+          - hlapi_erc7984
          - hlapi_dex
          - hlapi_noise_squash
      op_flavor:
@@ -123,8 +123,8 @@ jobs:

          if inputs_command == "integer_zk":
            files_to_parse.append("pke_zk_crs_sizes.csv")
-          elif inputs_command == "hlapi_erc20":
-            files_to_parse.append("erc20_pbs_count.csv")
+          elif inputs_command == "hlapi_erc7984":
+            files_to_parse.append("erc7984_pbs_count.csv")
          elif inputs_command == "hlapi_dex":
            files_to_parse.extend(
              [
--- a/.github/workflows/benchmark_gpu_4090.yml
+++ b/.github/workflows/benchmark_gpu_4090.yml
@@ -89,7 +89,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_integer_multi_bit_gpu_default
          path: ${{ env.RESULTS_FILENAME }}
@@ -173,7 +173,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_gpu_common.yml
+++ b/.github/workflows/benchmark_gpu_common.yml
@@ -126,16 +126,10 @@ jobs:
    needs: prepare-matrix
    runs-on: ubuntu-latest
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        continue-on-error: true
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
        with:
          mode: start
@@ -145,25 +139,6 @@ jobs:
          backend: ${{ inputs.backend }}
          profile: ${{ inputs.profile }}

-      - name: Acknowledge remote instance failure
-        if: steps.start-remote-instance.outcome == 'failure' &&
-          inputs.profile != 'single-h100'
-        run: |
-          echo "Remote instance instance has failed to start (profile provided: '${INPUTS_PROFILE}')"
-          echo "Permanent instance instance cannot be used as a substitute (profile needed: 'single-h100')"
-          exit 1
-        env:
-          INPUTS_PROFILE: ${{ inputs.profile }}
-
-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' &&
-          steps.start-remote-instance.outcome == 'failure' &&
-          inputs.profile == 'single-h100'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
  # Install dependencies only once since cuda-benchmarks uses a matrix strategy, thus running multiple times.
  install-dependencies:
    name: benchmark_gpu_common/install-dependencies
@@ -184,7 +159,6 @@ jobs:
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
        uses: ./.github/actions/gpu_setup
        with:
          cuda-version: ${{ matrix.cuda }}
@@ -296,7 +270,7 @@ jobs:
          filenames: ${{ inputs.additional_file_to_parse }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -333,7 +307,7 @@ jobs:

  teardown-instance:
    name: benchmark_gpu_common/teardown-instance
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_gpu_coprocessor.yml
+++ b/.github/workflows/benchmark_gpu_coprocessor.yml
@@ -42,7 +42,7 @@ env:
  OPTIMIZATION_TARGET: "throughput"
  BATCH_SIZE: "5000"
  SCHEDULING_POLICY: "MAX_PARALLELISM"
-  BENCHMARKS: "erc20"
+  BENCHMARKS: "erc7984"
  BRANCH_NAME: ${{ github.ref_name }}
  COMMIT_SHA: ${{ github.sha }}
  SLAB_SECRET: ${{ secrets.JOB_SECRET }}
@@ -204,7 +204,7 @@ jobs:
        uses: foundry-rs/foundry-toolchain@8789b3e21e6c11b2697f5eb56eddae542f746c10

      - name: Cache cargo
-        uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
+        uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
        with:
          path: |
            ~/.cargo/registry
@@ -214,14 +214,14 @@ jobs:
          restore-keys: ${{ runner.os }}-cargo-

      - name: Login to GitHub Container Registry
-        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Login to Chainguard Registry
-        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
+        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
        with:
          registry: cgr.dev
          username: ${{ secrets.CGR_USERNAME }}
@@ -232,7 +232,7 @@ jobs:
        working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker

      - name: Use Node.js
-        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
        with:
          node-version: 20.x

@@ -248,13 +248,13 @@ jobs:
          npm install && npm run deploy:emptyProxies && npx hardhat compile
        working-directory: fhevm/

-      - name: Profile erc20 no-cmux benchmark on GPU
+      - name: Profile erc7984 no-cmux benchmark on GPU
        run: |
          BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" \
          FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" \
          BENCHMARK_TYPE="THROUGHPUT_200" \
          OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" \
-          make -e "profile_erc20_gpu"
+          make -e "profile_erc7984_gpu"
        working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker

      - name: Get nsys profile name
@@ -271,7 +271,7 @@ jobs:
      - name: Upload profile artifact
        env:
          REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ env.REPORT_NAME }}
          path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}
@@ -302,7 +302,7 @@ jobs:
        working-directory: fhevm/

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
          path: fhevm/$${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_hpu.yml
+++ b/.github/workflows/benchmark_hpu.yml
@@ -14,7 +14,7 @@ on:
          - integer
          - hlapi_unsigned
          - hlapi_signed
-          - hlapi_erc20
+          - hlapi_erc7984
      op_flavor:
        description: "Operations set to run"
        type: choice
--- a/.github/workflows/benchmark_hpu_common.yml
+++ b/.github/workflows/benchmark_hpu_common.yml
@@ -185,7 +185,7 @@ jobs:
          BENCH_TYPE: ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_${{ matrix.bench_type }}_${{ matrix.command }}_benchmarks
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_perf_regression.yml
+++ b/.github/workflows/benchmark_perf_regression.yml
@@ -280,7 +280,7 @@ jobs:
          BENCH_TYPE: ${{ env.__TFHE_RS_BENCH_TYPE }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_regression_${{ env.RESULTS_FILE_SHA }} # RESULT_FILE_SHA is needed to avoid collision between matrix.command runs
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_tfhe_fft.yml
+++ b/.github/workflows/benchmark_tfhe_fft.yml
@@ -99,7 +99,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_fft
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_tfhe_ntt.yml
+++ b/.github/workflows/benchmark_tfhe_ntt.yml
@@ -99,7 +99,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_ntt
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_wasm_client.yml
+++ b/.github/workflows/benchmark_wasm_client.yml
@@ -46,7 +46,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            wasm_bench:
--- a/.github/workflows/benchmark_wasm_client_common.yml
+++ b/.github/workflows/benchmark_wasm_client_common.yml
@@ -124,7 +124,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        with:
          path: |
            ~/.nvm
@@ -137,7 +137,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -180,7 +180,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_wasm_${{ matrix.browser }}
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/cargo_build_common.yml
+++ b/.github/workflows/cargo_build_common.yml
@@ -138,7 +138,7 @@ jobs:
      - name: Node cache restoration
        if: inputs.run-pcc-cpu-batch == 'pcc_batch_2'
        id: node-cache
-        uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        with:
          path: |
            ~/.nvm
@@ -151,7 +151,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        if: inputs.run-pcc-cpu-batch == 'pcc_batch_2' && steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
--- a/.github/workflows/cargo_test_fft.yml
+++ b/.github/workflows/cargo_test_fft.yml
@@ -40,7 +40,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            fft:
--- a/.github/workflows/cargo_test_ntt.yml
+++ b/.github/workflows/cargo_test_ntt.yml
@@ -42,7 +42,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            ntt:
--- a/.github/workflows/ci_lint.yml
+++ b/.github/workflows/ci_lint.yml
@@ -43,14 +43,14 @@ jobs:
          echo "version=$(make zizmor_version)" >> "${GITHUB_OUTPUT}"

      - name: Check workflows security
-        uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
+        uses: zizmorcore/zizmor-action@b1d7e1fb5de872772f31590499237e7cce841e8e # v0.5.3
        with:
          advanced-security: 'false' # Print results directly in logs
          persona: pedantic
          version: ${{ steps.get_zizmor.outputs.version }}

      - name: Ensure SHA pinned actions
-        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@70c4af2ed5282c51ba40566d026d6647852ffa3e # v5.0.1
+        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@ca46236c6ce584ae24bc6283ba8dcf4b3ec8a066 # v5.0.4
        with:
          allowlist: |
            slsa-framework/slsa-github-generator
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -44,7 +44,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            tfhe:
@@ -74,7 +74,7 @@ jobs:
          make test_shortint_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de
+        uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -88,7 +88,7 @@ jobs:
          make test_integer_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de
+        uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
--- a/.github/workflows/csprng_randomness_tests.yml
+++ b/.github/workflows/csprng_randomness_tests.yml
@@ -46,7 +46,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            csprng:
--- a/.github/workflows/generate_svg_common.yml
+++ b/.github/workflows/generate_svg_common.yml
@@ -87,7 +87,7 @@ jobs:

      - name: Upload tables
        if: inputs.backend_comparison == false
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_${{ inputs.backend }}_${{ inputs.layer }}_subset_${{inputs.bench_subset}}_${{ inputs.pbs_kind }}_${{ inputs.bench_type }}_tables
          # This will upload all the file generated
@@ -111,7 +111,7 @@ jobs:

      - name: Upload comparison tables
        if: inputs.backend_comparison == true
-        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a
        with:
          name: ${{ github.sha }}_backends_comparison_tables
          # This will upload all the file generated
--- a/.github/workflows/generate_svgs.yml
+++ b/.github/workflows/generate_svgs.yml
@@ -209,60 +209,98 @@ jobs:
      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}

+  gpu-zk-server-latency-table:
+    name: generate_documentation_svgs/gpu-zk-server-latency-table
+    uses: ./.github/workflows/generate_svg_common.yml
+    if: inputs.generate-gpu-svgs
+    with:
+      backend: gpu
+      hardware_name: n3-H100-SXM5x8
+      layer: integer
+      bench_subset: zk
+      pbs_kind: multi_bit
+      grouping_factor: 4
+      bench_type: latency
+      time_span_days: ${{ inputs.time_span_days }}
+      output_filename: gpu-zk-benchmark-latency
+    secrets:
+      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
+      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
+      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
+
+  gpu-zk-server-throughput-table:
+    name: generate_documentation_svgs/gpu-zk-server-throughput-table
+    uses: ./.github/workflows/generate_svg_common.yml
+    if: inputs.generate-gpu-svgs
+    with:
+      backend: gpu
+      hardware_name: n3-H100-SXM5x8
+      layer: integer
+      bench_subset: zk
+      pbs_kind: multi_bit
+      grouping_factor: 4
+      bench_type: throughput
+      time_span_days: ${{ inputs.time_span_days }}
+      output_filename: gpu-zk-benchmark-throughput
+    secrets:
+      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
+      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
+      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
+
  # -----------------------------------------------------------
-  # ERC20 benchmarks tables
+  # ERC7984 benchmarks tables
  # -----------------------------------------------------------

-  cpu-erc20-latency-throughput-table:
-    name: generate_documentation_svgs/cpu-erc20-latency-throughput-table
+  cpu-erc7984-latency-throughput-table:
+    name: generate_documentation_svgs/cpu-erc7984-latency-throughput-table
    uses: ./.github/workflows/generate_svg_common.yml
    if: inputs.generate-cpu-svgs
    with:
      backend: cpu
      hardware_name: hpc7a.96xlarge
      layer: hlapi
-      bench_subset: erc20
+      bench_subset: erc7984
      pbs_kind: classical
      bench_type: both
      time_span_days: ${{ inputs.time_span_days }}
-      output_filename: cpu-hlapi-erc20-benchmark-latency-throughput
+      output_filename: cpu-hlapi-erc7984-benchmark-latency-throughput
    secrets:
      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}

-  gpu-erc20-latency-throughput-table:
-    name: generate_documentation_svgs/gpu-erc20-latency-throughput-table
+  gpu-erc7984-latency-throughput-table:
+    name: generate_documentation_svgs/gpu-erc7984-latency-throughput-table
    uses: ./.github/workflows/generate_svg_common.yml
    if: inputs.generate-gpu-svgs
    with:
      backend: gpu
      hardware_name: n3-H100-SXM5x8
      layer: hlapi
-      bench_subset: erc20
+      bench_subset: erc7984
      pbs_kind: multi_bit
      grouping_factor: 4
      bench_type: both
      time_span_days: ${{ inputs.time_span_days }}
-      output_filename: gpu-hlapi-erc20-benchmark-h100x8-sxm5-latency-throughput
+      output_filename: gpu-hlapi-erc7984-benchmark-h100x8-sxm5-latency-throughput
    secrets:
      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}

-  hpu-erc20-latency-throughput-table:
-    name: generate_documentation_svgs/hpu-erc20-latency-throughput-table
+  hpu-erc7984-latency-throughput-table:
+    name: generate_documentation_svgs/hpu-erc7984-latency-throughput-table
    uses: ./.github/workflows/generate_svg_common.yml
    if: inputs.generate-hpu-svgs
    with:
      backend: hpu
      hardware_name: hpu_x1
      layer: hlapi
-      bench_subset: erc20
+      bench_subset: erc7984
      pbs_kind: classical
      bench_type: both
      time_span_days: ${{ inputs.time_span_days }}
-      output_filename: hpu-hlapi-erc20-benchmark-hpux1-latency-throughput.svg
+      output_filename: hpu-hlapi-erc7984-benchmark-hpux1-latency-throughput.svg
    secrets:
      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
--- a/.github/workflows/gpu_core_h100_tests.yml
+++ b/.github/workflows/gpu_core_h100_tests.yml
@@ -23,7 +23,7 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [ labeled, opened, synchronize ]

 permissions:
  contents: read
@@ -38,6 +38,7 @@ jobs:
      pull-requests: read  # Needed to check for file change
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
+      core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -48,7 +49,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
@@ -62,28 +63,23 @@ jobs:
              - tfhe/src/integer/server_key/radix_parallel/tests_cases_unsigned.rs
              - tfhe/src/shortint/parameters/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_core_h100_tests.yml'
+            core_crypto:
+              - tfhe/src/core_crypto/gpu/**

  setup-instance:
    name: gpu_core_h100_tests/setup-instance
    needs: should-run
    if: github.event_name != 'pull_request' ||
-      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
-      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
+      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
+      (github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
    runs-on: ubuntu-latest
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
      - name: Start remote instance
        id: start-remote-instance
        if: env.SECRETS_AVAILABLE == 'true'
-        continue-on-error: true
        uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
        with:
          mode: start
@@ -93,13 +89,6 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
      # This instance will be spawned especially for pull-request from forked repository
      - name: Start GitHub instance
        id: start-github-instance
@@ -132,7 +121,6 @@ jobs:
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
        uses: ./.github/actions/gpu_setup
        with:
          cuda-version: ${{ matrix.cuda }}
@@ -176,7 +164,7 @@ jobs:

  teardown-instance:
    name: gpu_core_h100_tests/teardown-instance
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_fast_tests.yml
+++ b/.github/workflows/gpu_fast_tests.yml
@@ -47,7 +47,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_full_h100_tests.yml
+++ b/.github/workflows/gpu_full_h100_tests.yml
@@ -25,16 +25,10 @@ jobs:
    name: gpu_full_h100_tests/setup-instance
    runs-on: ubuntu-latest
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        continue-on-error: true
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
        with:
          mode: start
@@ -44,13 +38,6 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: gpu_full_h100_tests/cuda-tests-linux
    needs: [ setup-instance ]
@@ -74,7 +61,6 @@ jobs:
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
        uses: ./.github/actions/gpu_setup
        with:
          cuda-version: ${{ matrix.cuda }}
@@ -118,7 +104,7 @@ jobs:

  teardown-instance:
    name: gpu_full_h100_tests/teardown-instance
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_full_multi_gpu_tests.yml
+++ b/.github/workflows/gpu_full_multi_gpu_tests.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_hlapi_h100_tests.yml
+++ b/.github/workflows/gpu_hlapi_h100_tests.yml
@@ -23,7 +23,7 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [ labeled, opened, synchronize ]

 permissions:
  contents: read
@@ -38,6 +38,7 @@ jobs:
      pull-requests: read  # Needed to check for file change
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
+      core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -48,7 +49,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
@@ -65,26 +66,22 @@ jobs:
              - tfhe/src/c_api/**
              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_hlapi_h100_tests.yml'
+            core_crypto:
+              - tfhe/src/core_crypto/gpu/**

  setup-instance:
    name: gpu_hlapi_h100_tests/setup-instance
    needs: should-run
    if: github.event_name != 'pull_request' ||
-      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
-      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
+      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
+      (github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
    runs-on: ubuntu-latest
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
      - name: Start remote instance
        id: start-remote-instance
        if: env.SECRETS_AVAILABLE == 'true'
-        continue-on-error: true
        uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
        with:
          mode: start
@@ -94,13 +91,6 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
      # This instance will be spawned especially for pull-request from forked repository
      - name: Start GitHub instance
        id: start-github-instance
@@ -133,7 +123,6 @@ jobs:
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
        uses: ./.github/actions/gpu_setup
        with:
          cuda-version: ${{ matrix.cuda }}
@@ -184,7 +173,7 @@ jobs:

  teardown-instance:
    name: gpu_hlapi_h100_tests/teardown-instance
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_integer_long_run_tests.yml
+++ b/.github/workflows/gpu_integer_long_run_tests.yml
@@ -17,8 +17,8 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  schedule:
-    # Nightly tests will be triggered each evening 8p.m.
-    - cron: "0 20 * * *"
+    # Weekly tests will be triggered every Monday at 8p.m.
+    - cron: "0 20 * * 1"
  pull_request:


@@ -28,10 +28,41 @@ permissions:
 # zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning

 jobs:
+  should-run:
+    name: gpu_integer_long_run_tests/should-run
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: read  # Needed to check for file change
+    outputs:
+      is_needed_in_gpu_ci: ${{ env.IS_PR == 'false' || steps.changed-files.outputs.gpu_any_changed }}
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}
+
+      - name: Check for file changes
+        id: changed-files
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
+        with:
+          files_yaml: |
+            gpu:
+              - tfhe/Cargo.toml
+              - tfhe/build.rs
+              - backends/tfhe-cuda-backend/**
+              - tfhe/src/core_crypto/gpu/**
+              - tfhe/src/integer/gpu/**
+              - tfhe/src/shortint/parameters/**
+              - '.github/workflows/gpu_integer_long_run_tests.yml'
+
  setup-instance:
    name: gpu_integer_long_run_tests/setup-instance
-    if: github.event_name != 'schedule' ||
-      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
+    needs: [should-run]
+    if: github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
+      needs.should-run.outputs.is_needed_in_gpu_ci == 'true'
    runs-on: ubuntu-latest
    outputs:
      runner-name: ${{ steps.start-instance.outputs.label }}
--- a/.github/workflows/gpu_memory_sanitizer.yml
+++ b/.github/workflows/gpu_memory_sanitizer.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_memory_sanitizer_h100.yml
+++ b/.github/workflows/gpu_memory_sanitizer_h100.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_pcc.yml
+++ b/.github/workflows/gpu_pcc.yml
@@ -131,6 +131,10 @@ jobs:
        env:
          GCC_VERSION: ${{ matrix.gcc }}

+      - name: Run semgrep and lint checks on CUDA code
+        run: |
+          make semgrep_and_lint_gpu_code
+
      - name: Run fmt checks
        run: |
          make check_fmt_gpu
@@ -139,10 +143,6 @@ jobs:
        run: |
          make pcc_gpu

-      - name: Run semgrep and lint checks on CUDA code
-        run: |
-          make semgrep_and_lint_gpu_code
-
      - name: Run semver checks on tfhe-cuda-backend
        run: |
          make semver_check_cuda_backend
--- a/.github/workflows/gpu_signed_integer_classic_tests.yml
+++ b/.github/workflows/gpu_signed_integer_classic_tests.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
@@ -63,7 +63,6 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_signed_integer_classic_tests.yml'
              - scripts/integer-tests.sh

--- a/.github/workflows/gpu_signed_integer_h100_tests.yml
+++ b/.github/workflows/gpu_signed_integer_h100_tests.yml
@@ -23,7 +23,7 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [ labeled, opened, synchronize ]

 permissions:
  contents: read
@@ -38,6 +38,7 @@ jobs:
      pull-requests: read  # Needed to check for file change
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
+      core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -48,7 +49,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
@@ -63,29 +64,24 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_signed_integer_h100_tests.yml'
              - scripts/integer-tests.sh
+            core_crypto:
+              - tfhe/src/core_crypto/gpu/**

  setup-instance:
    name: gpu_signed_integer_h100_tests/setup-instance
    needs: should-run
    if: github.event_name != 'pull_request' ||
-      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
-      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
+      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
+      (github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
    runs-on: ubuntu-latest
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
      - name: Start remote instance
        id: start-remote-instance
        if: env.SECRETS_AVAILABLE == 'true'
-        continue-on-error: true
        uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
        with:
          mode: start
@@ -95,13 +91,6 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
      # This instance will be spawned especially for pull-request from forked repository
      - name: Start GitHub instance
        id: start-github-instance
@@ -134,7 +123,6 @@ jobs:
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
        uses: ./.github/actions/gpu_setup
        with:
          cuda-version: ${{ matrix.cuda }}
@@ -176,7 +164,7 @@ jobs:

  teardown-instance:
    name: gpu_signed_integer_h100_tests/teardown-instance
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_signed_integer_tests.yml
+++ b/.github/workflows/gpu_signed_integer_tests.yml
@@ -49,7 +49,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
@@ -64,7 +64,6 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_signed_integer_tests.yml'
              - scripts/integer-tests.sh

--- a/.github/workflows/gpu_unsigned_integer_classic_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_classic_tests.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
@@ -63,7 +63,6 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_unsigned_integer_classic_tests.yml'
              - scripts/integer-tests.sh

--- a/.github/workflows/gpu_unsigned_integer_h100_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_h100_tests.yml
@@ -23,7 +23,7 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [ labeled, opened, synchronize ]

 permissions:
  contents: read
@@ -38,6 +38,7 @@ jobs:
      pull-requests: read  # Needed to check for file change
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
+      core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
@@ -48,7 +49,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
@@ -63,29 +64,24 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_unsigned_integer_h100_tests.yml'
              - scripts/integer-tests.sh
+            core_crypto:
+              - tfhe/src/core_crypto/gpu/**

  setup-instance:
    name: gpu_unsigned_integer_h100_tests/setup-instance
    needs: should-run
    if: github.event_name == 'workflow_dispatch' ||
-      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
-      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
+      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
+      (github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
    runs-on: ubuntu-latest
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
      - name: Start remote instance
        id: start-remote-instance
        if: env.SECRETS_AVAILABLE == 'true'
-        continue-on-error: true
        uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
        with:
          mode: start
@@ -95,13 +91,6 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
      # This instance will be spawned especially for pull-request from forked repository
      - name: Start GitHub instance
        id: start-github-instance
@@ -134,7 +123,6 @@ jobs:
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
        uses: ./.github/actions/gpu_setup
        with:
          cuda-version: ${{ matrix.cuda }}
@@ -176,7 +164,7 @@ jobs:

  teardown-instance:
    name: gpu_unsigned_integer_h100_tests/teardown-instance
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_unsigned_integer_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_tests.yml
@@ -49,7 +49,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
@@ -64,7 +64,6 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_unsigned_integer_tests.yml'
              - scripts/integer-tests.sh

--- a/.github/workflows/gpu_zk_tests.yml
+++ b/.github/workflows/gpu_zk_tests.yml
@@ -47,7 +47,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            gpu:
@@ -55,12 +55,9 @@ jobs:
              - tfhe/build.rs
              - backends/tfhe-cuda-backend/**
              - backends/zk-cuda-backend/**
-              - tfhe/src/core_crypto/gpu/**
-              - tfhe/src/integer/gpu/**
              - tfhe/src/shortint/parameters/**
              - tfhe/src/zk/**
              - tfhe-zk-pok/**
-              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_zk_tests.yml'
              - ci/slab.toml

--- a/.github/workflows/hpu_hlapi_tests.yml
+++ b/.github/workflows/hpu_hlapi_tests.yml
@@ -41,7 +41,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
+        uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
        with:
          files_yaml: |
            hpu:
--- a/.github/workflows/make_release_common.yml
+++ b/.github/workflows/make_release_common.yml
@@ -62,7 +62,7 @@ jobs:
          PACKAGE: ${{ inputs.package-name }}
        run: |
          cargo package -p "${PACKAGE}"
-      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+      - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
          name: crate-${{ inputs.package-name }}
          path: target/package/*.crate
@@ -107,7 +107,7 @@ jobs:
          path: target/package

      - name: Authenticate on registry
-        uses: rust-lang/crates-io-auth-action@b7e9a28eded4986ec6b1fa40eeee8f8f165559ec # v1.0.3
+        uses: rust-lang/crates-io-auth-action@bbd81622f20ce9e2dd9622e3218b975523e45bbe # v1.0.4
        id: auth

      - name: Publish crate.io package
--- a/.github/workflows/make_release_common_cuda.yml
+++ b/.github/workflows/make_release_common_cuda.yml
@@ -128,7 +128,7 @@ jobs:
        run: |
          cargo package -p "${PACKAGE}"

-      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+      - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
        with:
          name: crate-${{ inputs.package-name }}
          path: target/package/*.crate
@@ -196,6 +196,13 @@ jobs:
        env:
          GCC_VERSION: ${{ matrix.gcc }}

+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
+          persist-credentials: "false"
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
      - name: Download artifact
        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
        with:
@@ -203,19 +210,19 @@ jobs:
          path: target/package

      - name: Authenticate on registry
-        uses: rust-lang/crates-io-auth-action@b7e9a28eded4986ec6b1fa40eeee8f8f165559ec # v1.0.3
+        uses: rust-lang/crates-io-auth-action@bbd81622f20ce9e2dd9622e3218b975523e45bbe # v1.0.4
        id: auth

      - name: Publish crate.io package
        env:
          CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
          PACKAGE: ${{ inputs.package-name }}
-          DRY-RUN: ${{ inputs.dry-run && '--dry-run' || '' }}
+          DRY_RUN: ${{ inputs.dry-run && '--dry-run' || '' }}
        run: |
-          # dry-run expansion cannot be double quoted when variable contains empty string otherwise cargo publish
-          # would fail. This is safe since dry-run is handled in the env section above.
+          # DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
+          # would fail. This is safe since DRY_RUN is handled in the env section above.
          # shellcheck disable=SC2086
-          cargo publish -p "${PACKAGE}" ${DRY-RUN}
+          cargo publish -p "${PACKAGE}" ${DRY_RUN}

      - name: Generate hash
        id: published_hash
@@ -255,7 +262,7 @@ jobs:

      - name: Slack Notification
        if: ${{ failure() }}
-        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 # v2.3.3
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "Instance teardown (${{ inputs.package-name }} release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/make_release_tfhe.yml
+++ b/.github/workflows/make_release_tfhe.yml
@@ -89,7 +89,7 @@ jobs:
          make build_web_js_api_parallel

      - name: Authenticate on NPM
-        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
        with:
          node-version: '24'
          registry-url: 'https://registry.npmjs.org'
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -53,7 +53,7 @@ jobs:

      - name: Restore Sagemath image from cache
        id: docker-cache
-        uses: actions/cache/restore@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        with:
          path: /tmp/sagemath_image
          key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}
@@ -76,7 +76,7 @@ jobs:
      - name: Store Sagemath image in cache
        if: steps.docker-cache.outputs.cache-hit != 'true'
        continue-on-error: true
-        uses: actions/cache/save@668228422ae6a00e4ad889ee87cd7109ec5666a7 #v5.0.4
+        uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5
        with:
          path: /tmp/sagemath_image
          key: sagemath-image-${{ env.SAGEMATH_VERSION }}-${{ github.sha }}
--- a/48
+++ b/48
@@ -312,7 +312,7 @@ semgrep_and_lint_gpu_code: semgrep_lint_setup_venv
 	find "$(TFHECUDA_SRC)" -name '*.h' -o -name '*.cuh' -o -name '*.cu' \
 		| grep -v '/cmake-build-debug/' \
 		| grep -v '/build/' \
-		| xargs venv/bin/semgrep --config "$(TFHECUDA_SRC)/.semgrep/release-ordering.yaml" --scan-unknown-extensions
+		| xargs venv/bin/semgrep --error --config "$(TFHECUDA_SRC)/.semgrep/release-ordering.yaml" --scan-unknown-extensions
 	venv/bin/python3 "scripts/check_scratch_cleanup.py"

 .PHONY: semver_check_cuda_backend # Run semver checks on tfhe-cuda-backend
@@ -360,7 +360,7 @@ check_fmt_toml: install_taplo

 .PHONY: check_typos # Check for typos in codebase
 check_typos: install_typos_checker
-	@git ls-files ":!*.png" ":!*.cbor" ":!*.bcode" ":!*.ico" ":!*/twiddles.cu" | typos --file-list - && echo "No typos found"
+	@git ls-files ":!*.png" ":!*.cbor" ":!*.bcode" ":!*.ico" ":!*/twiddles.cu" ":!*.hpu" | typos --file-list - && echo "No typos found"

 .PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
 clippy_gpu: install_rs_check_toolchain
@@ -587,6 +587,17 @@ clippy_backward_compat_data: install_rs_check_toolchain # the toolchain is selec
 		echo "Cannot run clippy for backward compat crate on non x86 platform for now."; \
 	fi

+.PHONY: check_backward_compat_locks_did_not_change # Check backward compat Cargo.lock files are up to date
+check_backward_compat_locks_did_not_change: install_rs_check_toolchain
+	@for crate in `ls -1 $(BACKWARD_COMPAT_DATA_DIR)/crates/ | grep generate_`; do \
+		echo "checking Cargo.lock for $$crate"; \
+		cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options \
+			-C $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate metadata --locked --format-version 1 > /dev/null || \
+		( echo "Cargo.lock for $$crate is out of date. Update it with:" && \
+		  echo "  cd $(BACKWARD_COMPAT_DATA_DIR)/crates/$$crate && cargo metadata --format-version 1 > /dev/null" && \
+		  echo "then commit the updated Cargo.lock." && exit 1 ); \
+	done
+
 .PHONY: clippy_test_vectors # Run clippy lints on the test vectors app
 clippy_test_vectors: install_rs_check_toolchain
 	cd apps/test-vectors; RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
@@ -1934,25 +1945,25 @@ bench_hlapi_hpu: install_rs_check_toolchain
 	--bench hlapi \
 	--features=integer,internal-keycache,hpu,hpu-v80,pbs-stats -p tfhe-benchmark --

-.PHONY: bench_hlapi_erc20 # Run benchmarks for ERC20 operations
-bench_hlapi_erc20: install_rs_check_toolchain
+.PHONY: bench_hlapi_erc7984 # Run benchmarks for ERC7984 operations
+bench_hlapi_erc7984: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench hlapi-erc20 \
+	--bench hlapi-erc7984 \
 	--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --

-.PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ERC20 operations on GPU
-bench_hlapi_erc20_gpu: install_rs_check_toolchain
+.PHONY: bench_hlapi_erc7984_gpu # Run benchmarks for ERC7984 operations on GPU
+bench_hlapi_erc7984_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench hlapi-erc20 \
+	--bench hlapi-erc7984 \
 	--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --

-.PHONY: bench_hlapi_erc20_gpu_classical # Run benchmarks for ERC20 operations on GPU with classical parameters
-bench_hlapi_erc20_gpu_classical: install_rs_check_toolchain
+.PHONY: bench_hlapi_erc7984_gpu_classical # Run benchmarks for ERC7984 operations on GPU with classical parameters
+bench_hlapi_erc7984_gpu_classical: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=classical \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench hlapi-erc20 \
+	--bench hlapi-erc7984 \
 	--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --

 .PHONY: bench_hlapi_dex # Run benchmarks for DEX operations
@@ -1976,13 +1987,13 @@ bench_hlapi_dex_gpu_classical: install_rs_check_toolchain
 	--bench hlapi-dex \
 	--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --

-.PHONY: bench_hlapi_erc20_hpu # Run benchmarks for ECR20 operations on HPU
-bench_hlapi_erc20_hpu: install_rs_check_toolchain
+.PHONY: bench_hlapi_erc7984_hpu # Run benchmarks for ECR20 operations on HPU
+bench_hlapi_erc7984_hpu: install_rs_check_toolchain
 	source ./setup_hpu.sh --config $(HPU_CONFIG); \
 	export V80_PCIE_DEV=${V80_PCIE_DEV}; \
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench hlapi-erc20 \
+	--bench hlapi-erc7984 \
 	--features=integer,internal-keycache,hpu,hpu-v80,pbs-stats -p tfhe-benchmark --

 .PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
@@ -2038,10 +2049,10 @@ bench_summary: install_rs_check_toolchain
 	--bench hlapi-noise-squash \
 	--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark -- '::decomp_noise_squash_comp::'

-	# ERC20
+	# ERC7984
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench hlapi-erc20 \
+	--bench hlapi-erc7984 \
 	--features=integer,internal-keycache -p tfhe-benchmark -- '::transfer::overflow'

 	# DEX
@@ -2083,10 +2094,10 @@ bench_summary_gpu: install_rs_check_toolchain
 	--bench hlapi-noise-squash \
 	--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off -- '::decomp_noise_squash_comp::'

-	# ERC20
+	# ERC7984
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench hlapi-erc20 \
+	--bench hlapi-erc7984 \
 	--features=integer,gpu,internal-keycache -p tfhe-benchmark --profile release_lto_off -- '::transfer::overflow'

 	# DEX
@@ -2265,6 +2276,7 @@ pcc_batch_5:
 	$(call run_recipe_with_details,clippy_tfhe_lints)
 	$(call run_recipe_with_details,check_compile_tests)
 	$(call run_recipe_with_details,clippy_backward_compat_data)
+	$(call run_recipe_with_details,check_backward_compat_locks_did_not_change)

 .PHONY: pcc_batch_6  # duration: 6'32''
 pcc_batch_6:
--- a/backends/tfhe-cuda-backend/build.rs
+++ b/backends/tfhe-cuda-backend/build.rs
@@ -1,5 +1,14 @@
 use std::path::PathBuf;
-use std::process::Command;
+
+fn get_linux_distribution_name() -> Option<String> {
+    let content = std::fs::read_to_string("/etc/os-release").ok()?;
+    for line in content.lines() {
+        if let Some(value) = line.strip_prefix("NAME=") {
+            return Some(value.trim_matches('"').to_string());
+        }
+    }
+    None
+}

 fn main() {
    if let Ok(val) = std::env::var("DOCS_RS") {
@@ -28,9 +37,7 @@ fn main() {
    println!("cargo::rerun-if-changed=src");

    if std::env::consts::OS == "linux" {
-        let output = Command::new("./get_os_name.sh").output().unwrap();
-        let distribution = String::from_utf8(output.stdout).unwrap();
-        if distribution != "Ubuntu\n" {
+        if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
            println!(
                "cargo:warning=This Linux distribution is not officially supported. \
                Only Ubuntu is supported by tfhe-cuda-backend at this time. Build may fail\n"
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
@@ -721,7 +721,7 @@ void cuda_integer_grouped_oprf_custom_range_64_async(
    uint32_t num_blocks_intermediate, const void *seeded_lwe_input,
    const uint64_t *decomposed_scalar, const uint64_t *has_at_least_one_set,
    uint32_t num_scalars, uint32_t shift, int8_t *mem, void *const *bsks,
-    void *const *ksks);
+    void *const *compute_bsks, void *const *ksks);

 void cleanup_cuda_integer_grouped_oprf_custom_range_64(CudaStreamsFFI streams,
                                                       int8_t **mem_ptr_void);
--- a/backends/tfhe-cuda-backend/cuda/src/aes/aes.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/aes/aes.cuh
@@ -390,7 +390,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
  XOR(&wires_a[6], &wires_a[15], &input_bits[7]);
  XOR(&wires_a[10], &wires_a[15], &wires_b[0]);
  XOR(&wires_a[11], &wires_a[20], &wires_a[9]);
-  FLUSH(&wires_a[6], &wires_a[10]);
+  FLUSH(&wires_a[6], &wires_a[10], &wires_a[11]);
  XOR(&wires_a[7], &input_bits[7], &wires_a[11]);
  FLUSH(&wires_a[7]);
  XOR(&wires_a[17], &wires_a[10], &wires_a[11]);
@@ -426,7 +426,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
  XOR(&wires_b[22], &wires_b[18], &wires_a[19]);
  XOR(&wires_b[23], &wires_b[19], &wires_a[21]);
  XOR(&wires_b[24], &wires_b[20], &wires_a[18]);
-  FLUSH(&wires_b[21], &wires_b[23], &wires_b[24]);
+  FLUSH(&wires_b[21], &wires_b[22], &wires_b[23], &wires_b[24]);
  XOR(&wires_b[25], &wires_b[21], &wires_b[22]);
  FLUSH(&wires_b[25]);

@@ -468,7 +468,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,

  XOR(&wires_b[37], &wires_b[36], &wires_b[34]);
  XOR(&wires_b[38], &wires_b[27], &wires_b[36]);
-  FLUSH(&wires_b[38]);
+  FLUSH(&wires_b[38], &wires_b[37]);
  XOR(&wires_b[44], &wires_b[33], &wires_b[37]);

  CudaRadixCiphertextFFI *and_outs_6[] = {&wires_b[39]};
@@ -479,7 +479,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
  XOR(&wires_b[40], &wires_b[25], &wires_b[39]);
  XOR(&wires_b[41], &wires_b[40], &wires_b[37]);
  XOR(&wires_b[43], &wires_b[29], &wires_b[40]);
-  FLUSH(&wires_b[41]);
+  FLUSH(&wires_b[41], &wires_b[40], &wires_b[43], &wires_b[44]);
  XOR(&wires_b[45], &wires_b[42], &wires_b[41]);
  FLUSH(&wires_b[45]);

@@ -514,6 +514,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
  XOR(&wires_b[57], &wires_b[50], &wires_b[53]);
  XOR(&wires_b[58], &wires_c[4], &wires_b[46]);
  XOR(&wires_b[59], &wires_c[3], &wires_b[54]);
+  FLUSH(&wires_b[57], &wires_b[58]);
  XOR(&wires_b[60], &wires_b[46], &wires_b[57]);
  XOR(&wires_b[61], &wires_c[14], &wires_b[57]);
  XOR(&wires_b[62], &wires_b[52], &wires_b[58]);
@@ -589,6 +590,7 @@ __host__ void vectorized_sbox_n_bytes(CudaStreams streams,
 #undef FLUSH
 #undef AND
 #undef ADD_ONE_FLUSH
+#undef ADD_ONE
 }

 /**
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
@@ -489,7 +489,7 @@ template <typename Torus>
 __host__ void host_modulus_switch_multi_bit(
    cudaStream_t stream, uint32_t gpu_index, Torus *array_out, Torus *array_in,
    int size, uint32_t log_modulus, uint32_t degree, uint32_t grouping_factor) {
-  cudaSetDevice(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  int multibit_size = size / grouping_factor;
  int num_threads = 0, num_blocks = 0;
  getNumBlocksAndThreads(multibit_size, 1024, num_blocks, num_threads);
--- a/backends/tfhe-cuda-backend/cuda/src/integer/oprf.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/oprf.cu
@@ -72,13 +72,13 @@ void cuda_integer_grouped_oprf_custom_range_64_async(
    uint32_t num_blocks_intermediate, const void *seeded_lwe_input,
    const uint64_t *decomposed_scalar, const uint64_t *has_at_least_one_set,
    uint32_t num_scalars, uint32_t shift, int8_t *mem, void *const *bsks,
-    void *const *ksks) {
+    void *const *compute_bsks, void *const *ksks) {

  host_integer_grouped_oprf_custom_range<uint64_t>(
      CudaStreams(streams), radix_lwe_out, num_blocks_intermediate,
      (const uint64_t *)seeded_lwe_input, decomposed_scalar,
      has_at_least_one_set, num_scalars, shift,
-      (int_grouped_oprf_custom_range_memory<uint64_t> *)mem, bsks,
+      (int_grouped_oprf_custom_range_memory<uint64_t> *)mem, bsks, compute_bsks,
      (uint64_t *const *)ksks);
 }

--- a/backends/tfhe-cuda-backend/cuda/src/integer/oprf.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/oprf.cuh
@@ -114,7 +114,7 @@ void host_integer_grouped_oprf_custom_range(
    const Torus *decomposed_scalar, const Torus *has_at_least_one_set,
    uint32_t num_scalars, uint32_t shift,
    int_grouped_oprf_custom_range_memory<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks) {
+    void *const *compute_bsks, Torus *const *ksks) {

  CudaRadixCiphertextFFI *computation_buffer = mem_ptr->tmp_oprf_output;
  set_zero_radix_ciphertext_slice_async<Torus>(
@@ -127,12 +127,12 @@ void host_integer_grouped_oprf_custom_range(

  host_integer_scalar_mul_radix<Torus>(
      streams, computation_buffer, decomposed_scalar, has_at_least_one_set,
-      mem_ptr->scalar_mul_buffer, bsks, ksks, mem_ptr->params.message_modulus,
-      num_scalars);
+      mem_ptr->scalar_mul_buffer, compute_bsks, ksks,
+      mem_ptr->params.message_modulus, num_scalars);

-  host_logical_scalar_shift_inplace<Torus>(streams, computation_buffer, shift,
-                                           mem_ptr->logical_scalar_shift_buffer,
-                                           bsks, ksks, num_blocks_intermediate);
+  host_logical_scalar_shift_inplace<Torus>(
+      streams, computation_buffer, shift, mem_ptr->logical_scalar_shift_buffer,
+      compute_bsks, ksks, num_blocks_intermediate);

  uint32_t num_blocks_output = radix_lwe_out->num_radix_blocks;
  uint32_t blocks_to_copy =
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit_128.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit_128.cu
@@ -308,6 +308,7 @@ void cleanup_cuda_multi_bit_programmable_bootstrap_noise_tests_128(
    void *stream, uint32_t gpu_index, int8_t **pbs_buffer) {
  cleanup_cuda_multi_bit_programmable_bootstrap_128(stream, gpu_index,
                                                    pbs_buffer);
+  cuda_synchronize_stream(static_cast<cudaStream_t>(stream), gpu_index);
 }

 // Noise tests variant of the 128-bit multi-bit PBS, restricted to
--- a/backends/tfhe-cuda-backend/get_os_name.sh
+++ b/backends/tfhe-cuda-backend/get_os_name.sh
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-
-cat /etc/os-release | grep "\<NAME\>" | sed "s/NAME=\"//g" | sed "s/\"//g"
--- a/backends/tfhe-cuda-backend/src/bindings.rs
+++ b/backends/tfhe-cuda-backend/src/bindings.rs
@@ -1647,6 +1647,7 @@ unsafe extern "C" {
        shift: u32,
        mem: *mut i8,
        bsks: *const *mut ffi::c_void,
+        compute_bsks: *const *mut ffi::c_void,
        ksks: *const *mut ffi::c_void,
    );
 }
--- a/backends/tfhe-hpu-backend/Cargo.toml
+++ b/backends/tfhe-hpu-backend/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tfhe-hpu-backend"
-version = "0.4.0"
+version = "0.5.0"
 edition = "2021"
 license = "BSD-3-Clause-Clear"
 description = "HPU implementation on FPGA of TFHE-rs primitives."
@@ -36,7 +36,7 @@ thiserror = "1.0.61"
 bytemuck = { workspace = true }
 anyhow = "1.0.82"
 lazy_static = "1.4.0"
-rand = "0.8.5"
+rand = "0.10.1"
 regex = "1.10.4"
 bitflags = { version = "2.5.0", features = ["serde"] }
 itertools = "0.11.0"
--- a/backends/tfhe-hpu-backend/README.md
+++ b/backends/tfhe-hpu-backend/README.md
@@ -297,8 +297,8 @@ source setup_hpu.sh --config v80 -p
 # Run hlapi benches
 make test_high_level_api_hpu

-# Run hlapi erc20 benches
-make bench_hlapi_erc20_hpu 
+# Run hlapi erc7984 benches
+make bench_hlapi_erc7984_hpu 

 # Run integer level benches
 make bench_integer_hpu
--- a/backends/tfhe-hpu-backend/config_store/sim/hpu_config.toml
+++ b/backends/tfhe-hpu-backend/config_store/sim/hpu_config.toml
@@ -109,7 +109,7 @@
  flush_behaviour = "Patient"
  flush = true

-[firmware.op_cfg.by_op.ERC_20]
+[firmware.op_cfg.by_op.ERC_7984]
  fill_batch_fifo = true
  min_batch_size = false
  use_tiers = true
--- a/backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml
+++ b/backends/tfhe-hpu-backend/config_store/v80/hpu_config.toml
@@ -121,7 +121,7 @@
  flush_behaviour = "Patient"
  flush = true

-[firmware.op_cfg.by_op.ERC_20]
+[firmware.op_cfg.by_op.ERC_7984]
  fill_batch_fifo = true
  min_batch_size = false
  use_tiers = true
--- a/backends/tfhe-hpu-backend/src/asm/iop/mod.rs
+++ b/backends/tfhe-hpu-backend/src/asm/iop/mod.rs
@@ -230,7 +230,7 @@ iop!(
    [IOP_CMP -> "CMP_NEQ", opcode::CMP_NEQ],
    [IOP_CT_F_CT_BOOL -> "IF_THEN_ZERO", opcode::IF_THEN_ZERO],
    [IOP_CT_F_2CT_BOOL -> "IF_THEN_ELSE", opcode::IF_THEN_ELSE],
-    [IOP_2CT_F_3CT -> "ERC_20", opcode::ERC_20],
+    [IOP_2CT_F_3CT -> "ERC_7984", opcode::ERC_7984],
    [IOP_CT_F_CT -> "MEMCPY", opcode::MEMCPY],
    [IOP_CT_F_CT -> "ILOG2", opcode::ILOG2],
    [IOP_CT_F_CT -> "COUNT0", opcode::COUNT0],
@@ -240,5 +240,5 @@ iop!(
    [IOP_CT_F_CT -> "TRAIL0", opcode::TRAIL0],
    [IOP_CT_F_CT -> "TRAIL1", opcode::TRAIL1],
    [IOP_NCT_F_2NCT -> "ADD_SIMD", opcode::ADD_SIMD],
-    [IOP_2NCT_F_3NCT -> "ERC_20_SIMD", opcode::ERC_20_SIMD],
+    [IOP_2NCT_F_3NCT -> "ERC_7984_SIMD", opcode::ERC_7984_SIMD],
 );
--- a/backends/tfhe-hpu-backend/src/asm/iop/opcode.rs
+++ b/backends/tfhe-hpu-backend/src/asm/iop/opcode.rs
@@ -74,9 +74,9 @@ pub const IF_THEN_ZERO: u8 = 0xCA;
 pub const IF_THEN_ELSE: u8 = 0xCB;

 // Custom algorithm
-// ERC20 -> Found xfer algorithm
+// ERC7984 -> Found xfer algorithm
 // 2Ct <- func(3Ct)
-pub const ERC_20: u8 = 0x80;
+pub const ERC_7984: u8 = 0x80;

 // Count bits
 pub const COUNT0: u8 = 0x81;
@@ -89,7 +89,7 @@ pub const TRAIL1: u8 = 0x87;

 // SIMD for maximum throughput
 pub const ADD_SIMD: u8 = 0xF0;
-pub const ERC_20_SIMD: u8 = 0xF1;
+pub const ERC_7984_SIMD: u8 = 0xF1;
 //
 // Utility operations
 // Used to handle real clone of ciphertext already uploaded in the Hpu memory
--- a/backends/tfhe-hpu-backend/src/ffi/v80/mod.rs
+++ b/backends/tfhe-hpu-backend/src/ffi/v80/mod.rs
@@ -24,7 +24,7 @@ use mem_alloc::{MemAlloc, MemChunk};

 mod qdma;
 use qdma::QdmaDriver;
-use rand::Rng;
+use rand::RngExt;

 const DMA_XFER_ALIGN: usize = 4096_usize;

@@ -148,8 +148,8 @@ impl HpuHw {
        tracing::debug!("Load stage1 through JTAG");
        let pdi_stg1_tmp = format!(
            "hpu_stg1_{}.pdi",
-            rand::thread_rng()
-                .sample_iter(rand::distributions::Alphanumeric)
+            rand::rng()
+                .sample_iter(rand::distr::Alphanumeric)
                .take(5)
                .map(char::from)
                .collect::<String>()
--- a/backends/tfhe-hpu-backend/src/fw/fw_impl/demo.rs
+++ b/backends/tfhe-hpu-backend/src/fw/fw_impl/demo.rs
@@ -31,7 +31,7 @@ crate::impl_fw!("Demo" [
    IF_THEN_ZERO => fw_impl::ilp::iop_if_then_zero;
    IF_THEN_ELSE => fw_impl::ilp::iop_if_then_else;

-    ERC_20 => fw_impl::ilp::iop_erc_20;
+    ERC_7984 => fw_impl::ilp::iop_erc_7984;

    CMP_GT  => cmp_gt;
    CMP_GTE => cmp_gte;
--- a/backends/tfhe-hpu-backend/src/fw/fw_impl/ilp.rs
+++ b/backends/tfhe-hpu-backend/src/fw/fw_impl/ilp.rs
@@ -61,7 +61,7 @@ crate::impl_fw!("Ilp" [
    IF_THEN_ZERO => fw_impl::ilp::iop_if_then_zero;
    IF_THEN_ELSE => fw_impl::ilp::iop_if_then_else;

-    ERC_20 => fw_impl::ilp::iop_erc_20;
+    ERC_7984 => fw_impl::ilp::iop_erc_7984;

    MEMCPY => fw_impl::ilp::iop_memcpy;

@@ -74,7 +74,7 @@ crate::impl_fw!("Ilp" [
    TRAIL1 => fw_impl::ilp_log::iop_trail1;
    // SIMD Implementations
    ADD_SIMD     => fw_impl::llt::iop_add_simd;
-    ERC_20_SIMD  => fw_impl::llt::iop_erc_20_simd;
+    ERC_7984_SIMD  => fw_impl::llt::iop_erc_7984_simd;
 ]);

 #[instrument(level = "trace", skip(prog))]
@@ -1296,13 +1296,13 @@ pub fn iop_if_then_else(prog: &mut Program) {
        });
 }

-/// Implement erc_20 fund xfer
+/// Implement erc_7984 fund xfer
 /// Targeted algorithm is as follow:
 /// 1. Check that from has enough funds
 /// 2. Compute real_amount to xfer (i.e. amount or 0)
 /// 3. Compute new amount (from - new_amount, to + new_amount)
 #[instrument(level = "info", skip(prog))]
-pub fn iop_erc_20(prog: &mut Program) {
+pub fn iop_erc_7984(prog: &mut Program) {
    // Allocate metavariables:
    // Dest -> Operand
    let mut dst_from = prog.iop_template_var(OperandKind::Dst, 0);
@@ -1314,7 +1314,7 @@ pub fn iop_erc_20(prog: &mut Program) {
    let src_amount = prog.iop_template_var(OperandKind::Src, 2);

    // Add Comment header
-    prog.push_comment("ERC_20 (new_from, new_to) <- (from, to, amount)".to_string());
+    prog.push_comment("ERC_7984 (new_from, new_to) <- (from, to, amount)".to_string());

    let props = prog.params();
    let tfhe_params: asm::DigitParameters = props.clone().into();
--- a/backends/tfhe-hpu-backend/src/fw/fw_impl/llt/mod.rs
+++ b/backends/tfhe-hpu-backend/src/fw/fw_impl/llt/mod.rs
@@ -70,7 +70,7 @@ crate::impl_fw!("Llt" [
    IF_THEN_ZERO => fw_impl::ilp::iop_if_then_zero;
    IF_THEN_ELSE => fw_impl::ilp::iop_if_then_else;

-    ERC_20 => fw_impl::llt::iop_erc_20;
+    ERC_7984 => fw_impl::llt::iop_erc_7984;
    MEMCPY => fw_impl::ilp::iop_memcpy;

    COUNT0 => fw_impl::ilp_log::iop_count0;
@@ -83,7 +83,7 @@ crate::impl_fw!("Llt" [

    // SIMD Implementations
    ADD_SIMD     => fw_impl::llt::iop_add_simd;
-    ERC_20_SIMD  => fw_impl::llt::iop_erc_20_simd;
+    ERC_7984_SIMD  => fw_impl::llt::iop_erc_7984_simd;
 ]);

 // ----------------------------------------------------------------------------
@@ -225,24 +225,24 @@ pub fn iop_muls(prog: &mut Program) {
 }

 #[instrument(level = "trace", skip(prog))]
-pub fn iop_erc_20(prog: &mut Program) {
+pub fn iop_erc_7984(prog: &mut Program) {
    // Add Comment header
-    prog.push_comment("ERC_20 (new_from, new_to) <- (from, to, amount)".to_string());
+    prog.push_comment("ERC_7984 (new_from, new_to) <- (from, to, amount)".to_string());
    // TODO: Make sweep of kogge_blk_w
    // All these little parameters would be very handy to write an
    // exploration/compilation program which would try to minimize latency by
    // playing with these.
-    iop_erc_20_rtl(prog, 0, Some(10)).add_to_prog(prog);
+    iop_erc_7984_rtl(prog, 0, Some(10)).add_to_prog(prog);
 }

 #[instrument(level = "trace", skip(prog))]
-pub fn iop_erc_20_simd(prog: &mut Program) {
+pub fn iop_erc_7984_simd(prog: &mut Program) {
    // Add Comment header
-    prog.push_comment("ERC_20_SIMD (new_from, new_to) <- (from, to, amount)".to_string());
+    prog.push_comment("ERC_7984_SIMD (new_from, new_to) <- (from, to, amount)".to_string());
    simd(
        prog,
        crate::asm::iop::SIMD_N,
-        fw_impl::llt::iop_erc_20_rtl,
+        fw_impl::llt::iop_erc_7984_rtl,
        None,
    );
 }
@@ -379,7 +379,7 @@ pub fn iop_rotate_scalar_left(prog: &mut Program) {
 // Helper Functions
 // ----------------------------------------------------------------------------

-/// Implement erc_20 fund xfer
+/// Implement erc_7984 fund xfer
 /// Targeted algorithm is as follow:
 /// 1. Check that from has enough funds
 /// 2. Compute real_amount to xfer (i.e. amount or 0)
@@ -391,7 +391,7 @@ pub fn iop_rotate_scalar_left(prog: &mut Program) {
 ///     (dst_from[0], dst_to[0], ..., dst_from[N-1], dst_to[N-1])
 /// Where N is the batch size
 #[instrument(level = "trace", skip(prog))]
-pub fn iop_erc_20_rtl(prog: &mut Program, batch_index: u8, kogge_blk_w: Option<usize>) -> Rtl {
+pub fn iop_erc_7984_rtl(prog: &mut Program, batch_index: u8, kogge_blk_w: Option<usize>) -> Rtl {
    // Allocate metavariables:
    // Dest -> Operand
    let dst_from = prog.iop_template_var(OperandKind::Dst, 2 * batch_index);
--- a/backends/tfhe-hpu-backend/src/interface/variable.rs
+++ b/backends/tfhe-hpu-backend/src/interface/variable.rs
@@ -156,7 +156,7 @@ impl HpuVarWrapped {
        {
            let mut inner = var.inner.lock().unwrap();

-            for (slot, ct) in std::iter::zip(inner.bundle.iter_mut(), ct.into_iter()) {
+            for (slot, ct) in std::iter::zip(inner.bundle.iter_mut(), ct) {
                #[cfg(feature = "io-dump")]
                let params = ct.params().clone();
                for (id, cut) in ct.into_container().iter().enumerate() {
--- a/backends/zk-cuda-backend/build.rs
+++ b/backends/zk-cuda-backend/build.rs
@@ -1,5 +1,14 @@
 use std::path::PathBuf;
-use std::process::Command;
+
+fn get_linux_distribution_name() -> Option<String> {
+    let content = std::fs::read_to_string("/etc/os-release").ok()?;
+    for line in content.lines() {
+        if let Some(value) = line.strip_prefix("NAME=") {
+            return Some(value.trim_matches('"').to_string());
+        }
+    }
+    None
+}

 fn main() {
    // Handle docs.rs builds (no CUDA available)
@@ -29,16 +38,10 @@ fn main() {
        println!("cargo:rustc-link-arg=-Wl,--allow-multiple-definition");
        println!("cargo:rustc-link-arg=-Wl,--no-as-needed");

-        // Check Linux distribution (reuse script from tfhe-cuda-backend)
        let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
            .expect("CARGO_MANIFEST_DIR must be set by cargo during build");
-        let script_path = PathBuf::from(&manifest_dir).join("../tfhe-cuda-backend/get_os_name.sh");
-        let output = Command::new(&script_path)
-            .output()
-            .expect("Failed to run get_os_name.sh — is tfhe-cuda-backend present?");
-        let distribution =
-            String::from_utf8(output.stdout).expect("get_os_name.sh output must be valid UTF-8");
-        if distribution != "Ubuntu\n" {
+
+        if get_linux_distribution_name().as_deref() != Some("Ubuntu") {
            println!(
                "cargo:warning=This Linux distribution is not officially supported. \
                Only Ubuntu is supported by zk-cuda-backend at this time. Build may fail\n"
--- a/backends/zk-cuda-backend/cuda/CMakeLists.txt
+++ b/backends/zk-cuda-backend/cuda/CMakeLists.txt
@@ -71,11 +71,6 @@ set(CMAKE_CUDA_FLAGS_DEBUG "-g -O0 -G")
 # Additional CUDA flags (aligned with tfhe-cuda-backend)
 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -Xcompiler -Wextra --use_fast_math --expt-relaxed-constexpr")

-# =============================================================================
-# Path to tfhe-cuda-backend for device utilities
-# =============================================================================
-set(TFHE_CUDA_BACKEND_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../tfhe-cuda-backend/cuda)
-
 # Core source files (without device utilities) Device utilities come from tfhe-cuda-backend.
 set(FP_CORE_SOURCES src/primitives/fp.cu src/primitives/fp2.cu src/curve.cu src/msm/pippenger/msm_pippenger.cu
                    src/msm/msm.cu)
@@ -112,7 +107,7 @@ endif()
 target_link_libraries(zk_cuda_backend PUBLIC cudart)

 # Include both local headers and tfhe-cuda-backend headers (for device.h)
-target_include_directories(zk_cuda_backend PUBLIC include ../src/include ${TFHE_CUDA_BACKEND_DIR}/include)
+target_include_directories(zk_cuda_backend PUBLIC include ../src/include)

 # =============================================================================
 # Tests and Benchmarks (optional, controlled by ZK_CUDA_BACKEND_BUILD_TESTS/BENCHMARKS)
@@ -135,4 +130,3 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
 message(STATUS "CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
 message(STATUS "C++ standard: ${CMAKE_CXX_STANDARD}")
 message(STATUS "CUDA standard: ${CMAKE_CUDA_STANDARD}")
-message(STATUS "tfhe-cuda-backend path: ${TFHE_CUDA_BACKEND_DIR}")
--- a/backends/zk-cuda-backend/cuda/include/checked_arithmetic.h
+++ b/backends/zk-cuda-backend/cuda/include/checked_arithmetic.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <cstddef>
+#include <cstdio>
+
+#include "device.h"
+
+// Variadic checked multiplication of size_t values.
+// Folds left-to-right using __builtin_mul_overflow, returning true on overflow.
+// On overflow the value written to *out is unspecified.
+template <typename... Args>
+inline bool checked_mul(size_t *out, size_t first, Args... rest) {
+  size_t result = first;
+  for (size_t value : {static_cast<size_t>(rest)...}) {
+    if (__builtin_mul_overflow(result, value, &result))
+      return true;
+  }
+  *out = result;
+  return false;
+}
+
+// Variadic safe multiplication: computes the product and panics on overflow.
+template <typename... Args> inline size_t safe_mul(size_t first, Args... rest) {
+  size_t result;
+  bool overflow = checked_mul(&result, first, rest...);
+  PANIC_IF_FALSE(!overflow, "multiplication overflow wraps size_t");
+  return result;
+}
+
+// Variadic safe multiplication with an appended sizeof(T) factor.
+// Computes (args... * sizeof(T)) with overflow checking.
+template <typename T, typename... Args>
+inline size_t safe_mul_sizeof(Args... args) {
+  return safe_mul(args..., sizeof(T));
+}
--- a/backends/zk-cuda-backend/cuda/include/device.h
+++ b/backends/zk-cuda-backend/cuda/include/device.h
@@ -0,0 +1,145 @@
+#ifndef DEVICE_H
+#define DEVICE_H
+
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cuda_runtime.h>
+
+extern "C" {
+
+#define check_cuda_error(ans)                                                  \
+  { cuda_error((ans), __FILE__, __LINE__); }
+inline void cuda_error(cudaError_t code, const char *file, int line) {
+  if (code != cudaSuccess) {
+    std::fprintf(stderr, "Cuda error: %s %s %d\n", cudaGetErrorString(code),
+                 file, line);
+    std::abort();
+  }
+}
+
+// The PANIC macro should be used to validate user-inputs to GPU functions
+// it will execute in all targets, including production settings
+// e.g., cudaMemCopy to the device should check that the destination pointer is
+// a device pointer
+#define PANIC(format, ...)                                                     \
+  {                                                                            \
+    std::fprintf(stderr, "%s::%d::%s: panic.\n" format "\n", __FILE__,         \
+                 __LINE__, __func__, ##__VA_ARGS__);                           \
+    std::abort();                                                              \
+  }
+
+// This is a generic assertion checking macro with user defined printf-style
+// message
+#define PANIC_IF_FALSE(cond, format, ...)                                      \
+  do {                                                                         \
+    if (!(cond)) {                                                             \
+      PANIC(format "\n\n %s\n", ##__VA_ARGS__, #cond);                         \
+    }                                                                          \
+  } while (0)
+
+#ifndef GPU_ASSERTS_DISABLE
+// The GPU assert should be used to validate assumptions in algorithms,
+// for example, checking that two user-provided quantities have a certain
+// relationship or that the size of the buffer  provided to a function is
+// sufficient when it is filled with some algorithm that depends on
+// user-provided inputs e.g., OPRF corrections buffer should not have a size
+// higher than the number of blocks in the datatype that is generated
+#define GPU_ASSERT(cond, format, ...)                                          \
+  PANIC_IF_FALSE(cond, format, ##__VA_ARGS__)
+#else
+#define GPU_ASSERT(cond)                                                       \
+  do {                                                                         \
+  } while (0)
+#endif
+
+uint32_t cuda_get_device();
+void cuda_set_device(uint32_t gpu_index);
+
+cudaEvent_t cuda_create_event(uint32_t gpu_index);
+
+void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
+                       uint32_t gpu_index);
+void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
+                            uint32_t gpu_index);
+
+void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index);
+
+cudaStream_t cuda_create_stream(uint32_t gpu_index);
+
+void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index);
+
+void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index);
+
+uint32_t cuda_is_available();
+
+void *cuda_malloc(uint64_t size, uint32_t gpu_index);
+
+void *cuda_malloc_with_size_tracking_async(uint64_t size, cudaStream_t stream,
+                                           uint32_t gpu_index,
+                                           uint64_t &size_tracker,
+                                           bool allocate_gpu_memory);
+
+void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
+
+bool cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
+uint64_t cuda_device_total_memory(uint32_t gpu_index);
+
+void cuda_memcpy_with_size_tracking_async_to_gpu(void *dest, const void *src,
+                                                 uint64_t size,
+                                                 cudaStream_t stream,
+                                                 uint32_t gpu_index,
+                                                 bool gpu_memory_allocated);
+
+void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
+                              cudaStream_t stream, uint32_t gpu_index);
+
+void cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
+    void *dest, void const *src, uint64_t size, cudaStream_t stream,
+    uint32_t gpu_index, bool gpu_memory_allocated);
+
+void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
+                                  cudaStream_t stream, uint32_t gpu_index);
+
+void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
+                            uint32_t gpu_index);
+
+void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
+                              cudaStream_t stream, uint32_t gpu_index);
+
+void cuda_memset_with_size_tracking_async(void *dest, uint64_t val,
+                                          uint64_t size, cudaStream_t stream,
+                                          uint32_t gpu_index,
+                                          bool gpu_memory_allocated);
+
+void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
+                       cudaStream_t stream, uint32_t gpu_index);
+
+int cuda_get_number_of_gpus();
+
+int cuda_get_number_of_sms();
+
+void cuda_synchronize_device(uint32_t gpu_index);
+
+void cuda_drop(void *ptr, uint32_t gpu_index);
+
+void cuda_drop_with_size_tracking_async(void *ptr, cudaStream_t stream,
+                                        uint32_t gpu_index,
+                                        bool gpu_memory_allocated);
+
+void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index);
+}
+
+uint32_t cuda_get_max_shared_memory(uint32_t gpu_index);
+
+uint32_t cuda_get_max_shared_memory_per_block(uint32_t gpu_index);
+
+bool cuda_check_support_cooperative_groups();
+
+bool cuda_check_support_thread_block_clusters();
+
+template <typename Torus>
+void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
+                          Torus *d_array, Torus value, Torus n);
+
+#endif
--- a/backends/zk-cuda-backend/cuda/include/helper_profile.cuh
+++ b/backends/zk-cuda-backend/cuda/include/helper_profile.cuh
@@ -0,0 +1,16 @@
+#ifndef HELPER_PROFILE
+#define HELPER_PROFILE
+
+#ifdef USE_NVTOOLS
+#include <nvtx3/nvToolsExt.h>
+#endif
+
+void cuda_nvtx_label_with_color(const char *name);
+void cuda_nvtx_pop();
+
+#define PUSH_RANGE(name)                                                       \
+  { cuda_nvtx_label_with_color(name); }
+#define POP_RANGE()                                                            \
+  { cuda_nvtx_pop(); }
+
+#endif
--- a/backends/zk-cuda-backend/cuda/src/helper_profile.cu
+++ b/backends/zk-cuda-backend/cuda/src/helper_profile.cu
@@ -0,0 +1,43 @@
+#include "helper_profile.cuh"
+#include <stdint.h>
+
+uint32_t adler32(const unsigned char *data) {
+  const uint32_t MOD_ADLER = 65521;
+  uint32_t a = 1, b = 0;
+  size_t index;
+  for (index = 0; data[index] != 0; ++index) {
+    a = (a + data[index] * 2) % MOD_ADLER;
+    b = (b + a) % MOD_ADLER;
+  }
+  return (b << 16) | a;
+}
+
+void cuda_nvtx_label_with_color(const char *name) {
+#ifdef USE_NVTOOLS
+  int color_id = adler32((const unsigned char *)name);
+  int r, g, b;
+  r = color_id & 0x000000ff;
+  g = (color_id & 0x000ff000) >> 12;
+  b = (color_id & 0x0ff00000) >> 20;
+  if (r < 64 & g < 64 & b < 64) {
+    r = r * 3;
+    g = g * 3 + 64;
+    b = b * 4;
+  }
+
+  color_id = 0xff000000 | (r << 16) | (g << 8) | (b);
+  nvtxEventAttributes_t eventAttrib = {0};
+  eventAttrib.version = NVTX_VERSION;
+  eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+  eventAttrib.colorType = NVTX_COLOR_ARGB;
+  eventAttrib.color = color_id;
+  eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+  eventAttrib.message.ascii = name;
+  nvtxRangePushEx(&eventAttrib);
+#endif
+}
+void cuda_nvtx_pop() {
+#ifdef USE_NVTOOLS
+  nvtxRangePop();
+#endif
+}
--- a/backends/zk-cuda-backend/src/c_wrapper.cu
+++ b/backends/zk-cuda-backend/src/c_wrapper.cu
@@ -11,7 +11,7 @@
 #include <stddef.h>
 #include <cstring>

-#include "../../tfhe-cuda-backend/cuda/src/utils/helper_profile.cuh"
+#include "helper_profile.cuh"

 // C++ helper functions (not exported, used internally)
 // These can call template functions since they have C++ linkage
--- a/ci/data_extractor/src/benchmark_specs.py
+++ b/ci/data_extractor/src/benchmark_specs.py
@@ -302,7 +302,7 @@ class BenchType(enum.StrEnum):

 class BenchSubset(enum.StrEnum):
    All = "all"
-    Erc20 = "erc20"
+    Erc7984 = "erc7984"
    Zk = "zk"

    @staticmethod
@@ -310,8 +310,8 @@ class BenchSubset(enum.StrEnum):
        match bench_subset.lower():
            case "all":
                return BenchSubset.All
-            case "erc20":
-                return BenchSubset.Erc20
+            case "erc7984":
+                return BenchSubset.Erc7984
            case "zk":
                return BenchSubset.Zk
            case _:
@@ -611,14 +611,14 @@ class BenchDetails:
                        # Case for arithmetic operations (add, sub, mul,...)
                        self.operation_name = "::".join(parts[2:-2])
                    else:
-                        # Case for higher-level operation (erc20 transfer, dex,...)
+                        # Case for higher-level operation (erc7984 transfer, dex,...)
                        self.operation_name = "::".join(parts[2:-1])
                else:
                    if "_PARAM_" in parts[-2]:
                        # Case for arithmetic operations (add, sub, mul,...)
                        self.operation_name = "::".join(parts[1:-2])
                    else:
-                        # Case for higher-level operation (erc20 transfer, dex,...)
+                        # Case for higher-level operation (erc7984 transfer, dex,...)
                        self.operation_name = "::".join(parts[1:-1])
                self.rust_type = parts[-1].partition("_mean")[0]
            case Layer.Shortint:
--- a/ci/data_extractor/src/data_extractor.py
+++ b/ci/data_extractor/src/data_extractor.py
@@ -137,7 +137,7 @@ parser.add_argument(
 parser.add_argument(
    "--bench-subset",
    dest="bench_subset",
-    choices=["all", "erc20", "zk"],
+    choices=["all", "erc7984", "zk"],
    default="all",
    help="Subset of benchmarks to filter against, dedicated formatting will be applied",
 )
@@ -285,8 +285,8 @@ def perform_hardware_comparison(

 def get_formatter(layer: Layer, bench_subset: BenchSubset):
    match bench_subset:
-        case BenchSubset.Erc20:
-            return formatters.hlapi.Erc20Formatter
+        case BenchSubset.Erc7984:
+            return formatters.hlapi.Erc7984Formatter
        case BenchSubset.Zk:
            if layer == Layer.Wasm:
                return formatters.wasm.ZKFormatter
@@ -442,7 +442,7 @@ def get_operands_types(layer: Layer, bench_subset: BenchSubset = None):
        return ciphertext_only
    elif bench_subset:
        match bench_subset:
-            case BenchSubset.Zk | BenchSubset.Erc20:
+            case BenchSubset.Zk | BenchSubset.Erc7984:
                return ciphertext_only
            case BenchSubset.All:
                return ciphertext_and_plaintext
--- a/ci/data_extractor/src/formatters/hlapi/hlapi.py
+++ b/ci/data_extractor/src/formatters/hlapi/hlapi.py
@@ -39,9 +39,9 @@ class HlApiFormatter(GenericFormatter):
 TRANSFER_IMPLEM_COLUMN_HEADER = "Transfer implementation"


-class Erc20Formatter(HlApiFormatter):
+class Erc7984Formatter(HlApiFormatter):
    """
-    Formatter for ERC20 benchmarks.
+    Formatter for ERC7984 benchmarks.
    """

    @staticmethod
@@ -63,7 +63,7 @@ class Erc20Formatter(HlApiFormatter):
                bench_type = BenchType.Latency
                conversion_func = utils.convert_latency_value_to_readable_text

-            # For now ERC20 benchmarks are only made on 64-bit ciphertexts.
+            # For now ERC7984 benchmarks are only made on 64-bit ciphertexts.
            value = conversion_func(timings[-1])
            formatted[test_name][bench_type] = value

--- a/ci/regression.toml
+++ b/ci/regression.toml
@@ -66,7 +66,7 @@ target.hlapi-dex = [
    "swap_claim::whitepaper",
    "swap_claim::no_cmux"
 ]
-target.hlapi-erc20 = ["transfer::whitepaper", "transfer::no_cmux"]
+target.hlapi-erc7984 = ["transfer::whitepaper", "transfer::no_cmux"]
 target.core_crypto-ks = ["keyswitch"]
 target.core_crypto-pbs = ["multi_bit_pbs"]
 parameters_filter = "PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128"
@@ -99,7 +99,7 @@ target.hlapi-dex = [
    "swap_claim::whitepaper",
    "swap_claim::no_cmux"
 ]
-target.hlapi-erc20 = ["transfer::whitepaper", "transfer::no_cmux"]
+target.hlapi-erc7984 = ["transfer::whitepaper", "transfer::no_cmux"]
 target.shortint = ["bitand"]
 target.core_crypto-ks = ["keyswitch"]
 target.core_crypto-pbs = [ "pbs_mem_optimized"]
--- a/mockups/tfhe-hpu-mockup/Cargo.toml
+++ b/mockups/tfhe-hpu-mockup/Cargo.toml
@@ -16,19 +16,14 @@ tfhe = { path = "../../tfhe", features = ["hpu", "hpu-debug"] }

 ipc-channel = "0.18.3"

-strum = { version = "0.26.2", features = ["derive"] }
-strum_macros = "0.26.2"
 bytemuck = { workspace = true }

 clap = { version = "4.4.4", features = ["derive"] }
 clap-num = "*"
-anyhow = "1.0.82"
 tracing = "0.1.40"
 tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] }
 serde_json = "1.0"
-rand = "0.8.5"
 serde = { version = "1", features = ["derive"] }
-bitflags = "2.6.0"

 [[bin]]
 name = "hpu_mockup"
--- a/mockups/tfhe-hpu-mockup/README.md
+++ b/mockups/tfhe-hpu-mockup/README.md
@@ -164,7 +164,7 @@ cargo run --release --features="hpu" --test hpu -- u8
 # * bitwise: for ct x ct bitwise IOps
 # * cmp: for comparison IOps
 # * ternary: for if-then-else and like IOps
-# * algo: for IOps dedicated to offload sub-algorithm like ERC_20
+# * algo: for IOps dedicated to offload sub-algorithm like ERC_7984
 # Command below only run comparison IOps, for convenience, `just hpu_test "cmp"` could be also used
 cargo run --release --features="hpu" --test hpu -- cmp
 ```
--- a/nightly-toolchain.txt
+++ b/nightly-toolchain.txt
@@ -1 +1 @@
-nightly-2026-01-14
+nightly-2026-04-22
--- a/tests/backward_compatibility/high_level_api.rs
+++ b/tests/backward_compatibility/high_level_api.rs
@@ -25,7 +25,7 @@ use tfhe::{
    CompressedKVStore, CompressedPublicKey, CompressedServerKey,
    CompressedSquashedNoiseCiphertextList, CompressedSquashedNoiseCiphertextListBuilder, FheBool,
    FheInt8, FheUint32, FheUint64, FheUint8, ReRandomizationContext, ReRandomizationMode,
-    ReRandomizationSupport, ServerKey, SquashedNoiseFheBool, SquashedNoiseFheInt,
+    ReRandomizationSupport, Seed, ServerKey, SquashedNoiseFheBool, SquashedNoiseFheInt,
    SquashedNoiseFheUint,
 };
 use tfhe_backward_compat_data::load::{
@@ -748,6 +748,22 @@ fn test_hl_key_features(
        }
    }

+    // OPRF: check that oblivious pseudo-random generation works with the dedicated key.
+    // The decrypted values only need to be within range; the seed is deterministic but we
+    // don't compare to specific bit values (those are validated in the unit tests).
+    if server_key.supports_oprf() {
+        let seed = Seed(42u128);
+
+        let rand_bool = FheBool::generate_oblivious_pseudo_random(seed);
+        let _: bool = rand_bool.decrypt(client_key);
+
+        let rand_uint = FheUint8::generate_oblivious_pseudo_random(seed);
+        let _: u8 = rand_uint.decrypt(client_key);
+
+        let rand_int = FheInt8::generate_oblivious_pseudo_random(seed);
+        let _: i8 = rand_int.decrypt(client_key);
+    }
+
    Ok(())
 }

--- a/tfhe-benchmark/Cargo.toml
+++ b/tfhe-benchmark/Cargo.toml
@@ -92,8 +92,8 @@ harness = false
 required-features = ["integer", "internal-keycache", "pbs-stats"]

 [[bench]]
-name = "hlapi-erc20"
-path = "benches/high_level_api/erc20.rs"
+name = "hlapi-erc7984"
+path = "benches/high_level_api/erc7984.rs"
 harness = false
 required-features = ["integer", "internal-keycache"]

--- a/tfhe-benchmark/benches/high_level_api/dex.rs
+++ b/tfhe-benchmark/benches/high_level_api/dex.rs
@@ -253,7 +253,7 @@ mod pbs_stats {
        let _ = swap_request_update_dex_balance_func(&from_balance, &current_dex_balance, &amount);
        let count = tfhe::get_pbs_count() * 2;

-        println!("ERC20 swap request update dex balance/::{type_name}: {count} PBS");
+        println!("ERC7984 swap request update dex balance/::{type_name}: {count} PBS");

        let params = client_key.computation_parameters();
        let params_name = params.name();
@@ -308,7 +308,7 @@ mod pbs_stats {
        let (_, _) = swap_request_finalize_func(&to_balance_0, &total_dex_token_0_in, &sent_0);
        let count = tfhe::get_pbs_count() * 2;

-        println!("ERC20 swap request finalize/::{type_name}: {count} PBS");
+        println!("ERC7984 swap request finalize/::{type_name}: {count} PBS");

        let params = client_key.computation_parameters();
        let params_name = params.name();
@@ -373,7 +373,7 @@ mod pbs_stats {
        );
        let count = tfhe::get_pbs_count();

-        println!("ERC20 swap claim prepare/::{type_name}: {count} PBS");
+        println!("ERC7984 swap claim prepare/::{type_name}: {count} PBS");

        let params = client_key.computation_parameters();
        let params_name = params.name();
@@ -433,7 +433,7 @@ mod pbs_stats {
        );
        let count = tfhe::get_pbs_count() * 2;

-        println!("ERC20 swap claim update dex balance/::{type_name}: {count} PBS");
+        println!("ERC7984 swap claim update dex balance/::{type_name}: {count} PBS");

        let params = client_key.computation_parameters();
        let params_name = params.name();
--- a/tfhe-benchmark/benches/high_level_api/erc7984.rs
+++ b/tfhe-benchmark/benches/high_level_api/erc7984.rs
@@ -252,8 +252,8 @@ where
        boolean: vec![],
        imm: vec![],
    };
-    let mut res_handle = FheHpu::iop_exec(&hpu_asm::iop::IOP_ERC_20, src);
-    // Iop erc_20 return new_from, new_to
+    let mut res_handle = FheHpu::iop_exec(&hpu_asm::iop::IOP_ERC_7984, src);
+    // Iop erc_7984 return new_from, new_to
    let new_to = res_handle.native.pop().unwrap();
    let new_from = res_handle.native.pop().unwrap();
    (new_from, new_to)
@@ -278,8 +278,8 @@ where
        boolean: vec![],
        imm: vec![],
    };
-    let res_handle = FheHpu::iop_exec(&hpu_asm::iop::IOP_ERC_20_SIMD, src);
-    // Iop erc_20 return new_from, new_to
+    let res_handle = FheHpu::iop_exec(&hpu_asm::iop::IOP_ERC_7984_SIMD, src);
+    // Iop erc_7984 return new_from, new_to
    let res = res_handle.native;
    res
 }
@@ -319,18 +319,18 @@ mod pbs_stats {
        let (_, _) = transfer_func(&from_amount, &to_amount, &amount);
        let count = tfhe::get_pbs_count();

-        println!("ERC20 transfer/{fn_name}::{type_name}: {count} PBS");
+        println!("ERC7984 transfer/{fn_name}::{type_name}: {count} PBS");

        let params = client_key.computation_parameters();
        let params_name = params.name();

        let test_name = if cfg!(feature = "gpu") {
-            format!("hlapi::cuda::erc20::pbs_count::{fn_name}::{params_name}::{type_name}")
+            format!("hlapi::cuda::erc7984::pbs_count::{fn_name}::{params_name}::{type_name}")
        } else {
-            format!("hlapi::erc20::pbs_count::{fn_name}::{params_name}::{type_name}")
+            format!("hlapi::erc7984::pbs_count::{fn_name}::{params_name}::{type_name}")
        };

-        let results_file = Path::new("erc20_pbs_count.csv");
+        let results_file = Path::new("erc7984_pbs_count.csv");
        if !results_file.exists() {
            File::create(results_file).expect("create results file failed");
        }
@@ -392,7 +392,7 @@ fn bench_transfer_latency<FheType, F>(
        &bench_id,
        params,
        params_name,
-        "erc20-transfer",
+        "erc7984-transfer",
        &OperatorType::Atomic,
        64,
        vec![],
@@ -413,7 +413,7 @@ fn bench_transfer_latency_simd<FheType, F>(
    F: for<'a> Fn(&'a Vec<FheType>, &'a Vec<FheType>, &'a Vec<FheType>) -> Vec<FheType>,
 {
    use tfhe::tfhe_hpu_backend::prelude::hpu_asm;
-    let hpu_simd_n = hpu_asm::iop::IOP_ERC_20_SIMD
+    let hpu_simd_n = hpu_asm::iop::IOP_ERC_7984_SIMD
        .format()
        .unwrap()
        .proto
@@ -453,7 +453,7 @@ fn bench_transfer_latency_simd<FheType, F>(
        &bench_id,
        params,
        params_name,
-        "erc20-simd-transfer",
+        "erc7984-simd-transfer",
        &OperatorType::Atomic,
        64,
        vec![],
@@ -507,7 +507,7 @@ fn bench_transfer_throughput<FheType, F>(
            &bench_id,
            params,
            &params_name,
-            "erc20-transfer",
+            "erc7984-transfer",
            &OperatorType::Atomic,
            64,
            vec![],
@@ -597,7 +597,7 @@ fn cuda_bench_transfer_throughput<FheType, F>(
        &bench_id,
        params,
        &params_name,
-        "erc20-transfer",
+        "erc7984-transfer",
        &OperatorType::Atomic,
        64,
        vec![],
@@ -661,7 +661,7 @@ fn hpu_bench_transfer_throughput<FheType, F>(
            &bench_id,
            params,
            &params_name,
-            "erc20-transfer",
+            "erc7984-transfer",
            &OperatorType::Atomic,
            64,
            vec![],
@@ -683,7 +683,7 @@ fn hpu_bench_transfer_throughput_simd<FheType, F>(
    F: for<'a> Fn(&'a Vec<FheType>, &'a Vec<FheType>, &'a Vec<FheType>) -> Vec<FheType> + Sync,
 {
    use tfhe::tfhe_hpu_backend::prelude::hpu_asm;
-    let hpu_simd_n = hpu_asm::iop::IOP_ERC_20_SIMD
+    let hpu_simd_n = hpu_asm::iop::IOP_ERC_7984_SIMD
        .format()
        .unwrap()
        .proto
@@ -746,7 +746,7 @@ fn hpu_bench_transfer_throughput_simd<FheType, F>(
            &bench_id,
            params,
            &params_name,
-            "erc20-simd-ransfer",
+            "erc7984-simd-ransfer",
            &OperatorType::Atomic,
            64,
            vec![],
@@ -769,7 +769,7 @@ fn main() {

    let mut c = Criterion::default().sample_size(10).configure_from_args();

-    let bench_name = "hlapi::erc20";
+    let bench_name = "hlapi::erc7984";

    // FheUint64 PBS counts
    // We don't run multiple times since every input is encrypted
@@ -896,7 +896,7 @@ fn main() {

    let mut c = Criterion::default().sample_size(10).configure_from_args();

-    let bench_name = "hlapi::cuda::erc20";
+    let bench_name = "hlapi::cuda::erc7984";

    // FheUint64 PBS counts
    // We don't run multiple times since every input is encrypted
@@ -1027,7 +1027,7 @@ fn main() {

    let mut c = Criterion::default().sample_size(10).configure_from_args();

-    let bench_name = "hlapi::hpu::erc20";
+    let bench_name = "hlapi::hpu::erc7984";

    match get_bench_type() {
        BenchmarkType::Latency => {
@@ -1040,7 +1040,7 @@ fn main() {
                "transfer::whitepaper",
                transfer_whitepaper::<FheUint64>,
            );
-            // Erc20 optimized instruction only available on Hpu
+            // Erc7984 optimized instruction only available on Hpu
            bench_transfer_latency(
                &mut group,
                &cks,
@@ -1049,7 +1049,7 @@ fn main() {
                "transfer::hpu_optim",
                transfer_hpu::<FheUint64>,
            );
-            // Erc20 SIMD instruction only available on Hpu
+            // Erc7984 SIMD instruction only available on Hpu
            bench_transfer_latency_simd(
                &mut group,
                &cks,
@@ -1071,7 +1071,7 @@ fn main() {
                "transfer::whitepaper",
                transfer_whitepaper::<FheUint64>,
            );
-            // Erc20 optimized instruction only available on Hpu
+            // Erc7984 optimized instruction only available on Hpu
            hpu_bench_transfer_throughput(
                &mut group,
                &cks,
@@ -1080,7 +1080,7 @@ fn main() {
                "transfer::hpu_optim",
                transfer_hpu::<FheUint64>,
            );
-            // Erc20 SIMD instruction only available on Hpu
+            // Erc7984 SIMD instruction only available on Hpu
            hpu_bench_transfer_throughput_simd(
                &mut group,
                &cks,
--- a/tfhe-benchmark/benches/integer/oprf.rs
+++ b/tfhe-benchmark/benches/integer/oprf.rs
@@ -8,6 +8,7 @@ use rayon::prelude::*;
 #[cfg(any(feature = "gpu", feature = "hpu"))]
 use std::cmp::max;
 use tfhe::integer::keycache::KEY_CACHE;
+use tfhe::integer::oprf::{OprfPrivateKey, OprfServerKey};
 use tfhe::integer::IntegerKeyKind;
 use tfhe::keycache::NamedParam;
 #[cfg(any(feature = "gpu", feature = "hpu"))]
@@ -35,32 +36,42 @@ pub fn unsigned_oprf(c: &mut Criterion) {
                    format!("{bench_name}_bounded::{param_name}::{bit_size}_bits");

                bench_group.bench_function(&bench_id_oprf, |b| {
-                    let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
+                    let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
+                    let oprf_pk = OprfPrivateKey::new(&cks);
+                    let oprf_sk = OprfServerKey::new(&oprf_pk, &cks).unwrap();

                    b.iter(|| {
-                        _ = black_box(sk.par_generate_oblivious_pseudo_random_unsigned_integer(
-                            Seed(0),
-                            num_block as u64,
-                        ));
+                        _ = black_box(
+                            oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer(
+                                Seed(0),
+                                num_block as u64,
+                                &sks,
+                            ),
+                        );
                    })
                });

                bench_group.bench_function(&bench_id_oprf_bounded, |b| {
-                    let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
+                    let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
+                    let oprf_pk = OprfPrivateKey::new(&cks);
+                    let oprf_sk = OprfServerKey::new(&oprf_pk, &cks).unwrap();

                    b.iter(|| {
                        _ = black_box(
-                            sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
+                            oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
                                Seed(0),
                                bit_size as u64,
                                num_block as u64,
+                                &sks,
                            ),
                        );
                    })
                });
            }
            BenchmarkType::Throughput => {
-                let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
+                let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
+                let oprf_pk = OprfPrivateKey::new(&cks);
+                let oprf_sk = OprfServerKey::new(&oprf_pk, &cks).unwrap();

                bench_id_oprf = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
                bench_id_oprf_bounded =
@@ -71,10 +82,11 @@ pub fn unsigned_oprf(c: &mut Criterion) {
                    {
                        // Execute the operation once to know its cost.
                        reset_pbs_count();
-                        sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
+                        oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
                            Seed(0),
                            bit_size as u64,
                            num_block as u64,
+                            &sks,
                        );
                        let pbs_count = max(get_pbs_count(), 1);
                        throughput_num_threads(num_block, pbs_count)
@@ -85,11 +97,13 @@ pub fn unsigned_oprf(c: &mut Criterion) {
                        let setup = |_batch_size: usize| ();
                        let run = |_: &mut (), batch_size: usize| {
                            (0..batch_size).into_par_iter().for_each(|_| {
-                                sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
-                                    Seed(0),
-                                    bit_size as u64,
-                                    num_block as u64,
-                                );
+                                oprf_sk
+                                    .par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
+                                        Seed(0),
+                                        bit_size as u64,
+                                        num_block as u64,
+                                        &sks,
+                                    );
                            });
                        };
                        find_optimal_batch(run, setup) as u64
@@ -100,9 +114,10 @@ pub fn unsigned_oprf(c: &mut Criterion) {
                bench_group.bench_function(&bench_id_oprf, |b| {
                    b.iter(|| {
                        (0..elements).into_par_iter().for_each(|_| {
-                            sk.par_generate_oblivious_pseudo_random_unsigned_integer(
+                            oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer(
                                Seed(0),
                                num_block as u64,
+                                &sks,
                            );
                        })
                    })
@@ -111,10 +126,11 @@ pub fn unsigned_oprf(c: &mut Criterion) {
                bench_group.bench_function(&bench_id_oprf_bounded, |b| {
                    b.iter(|| {
                        (0..elements).into_par_iter().for_each(|_| {
-                            sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
+                            oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
                                Seed(0),
                                bit_size as u64,
                                num_block as u64,
+                                &sks,
                            );
                        })
                    })
@@ -148,6 +164,8 @@ pub mod cuda {
    use criterion::black_box;
    use tfhe::core_crypto::gpu::{get_number_of_gpus, CudaStreams};
    use tfhe::integer::gpu::server_key::CudaServerKey;
+    use tfhe::integer::gpu::CudaOprfServerKey;
+    use tfhe::integer::oprf::{CompressedOprfServerKey, OprfPrivateKey};
    use tfhe::GpuIndex;
    use tfhe_csprng::seeders::Seed;

@@ -177,12 +195,18 @@ pub mod cuda {
                        let (cks, _cpu_sks) =
                            KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
                        let gpu_sks = CudaServerKey::new(&cks, &streams);
+                        let oprf_pk = OprfPrivateKey::new(&cks);
+                        let compressed_oprf_sk =
+                            CompressedOprfServerKey::new(&oprf_pk, &cks).unwrap();
+                        let cuda_oprf_sk =
+                            CudaOprfServerKey::decompress_from_cpu(&compressed_oprf_sk, &streams);

                        b.iter(|| {
                            _ = black_box(
-                                gpu_sks.par_generate_oblivious_pseudo_random_unsigned_integer(
+                                cuda_oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer(
                                    Seed(0),
                                    num_block as u64,
+                                    &gpu_sks,
                                    &streams,
                                ),
                            );
@@ -193,14 +217,20 @@ pub mod cuda {
                        let (cks, _cpu_sks) =
                            KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
                        let gpu_sks = CudaServerKey::new(&cks, &streams);
+                        let oprf_pk = OprfPrivateKey::new(&cks);
+                        let compressed_oprf_sk =
+                            CompressedOprfServerKey::new(&oprf_pk, &cks).unwrap();
+                        let cuda_oprf_sk =
+                            CudaOprfServerKey::decompress_from_cpu(&compressed_oprf_sk, &streams);

                        b.iter(|| {
                            _ = black_box(
-                                gpu_sks
+                                cuda_oprf_sk
                                    .par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
                                        Seed(0),
                                        bit_size as u64,
                                        num_block as u64,
+                                        &gpu_sks,
                                        &streams,
                                    ),
                            );
@@ -210,13 +240,25 @@ pub mod cuda {
                BenchmarkType::Throughput => {
                    let (cks, cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
                    let gpu_sks_vec = cuda_local_keys(&cks);
+                    let cpu_oprf_pk = OprfPrivateKey::new(&cks);
+                    let cpu_oprf_sk = OprfServerKey::new(&cpu_oprf_pk, &cks).unwrap();
+                    let compressed_oprf_sk =
+                        CompressedOprfServerKey::new(&cpu_oprf_pk, &cks).unwrap();
+                    // One CudaOprfServerKey per GPU, matching `gpu_sks_vec`.
+                    let cuda_oprf_sks_vec: Vec<CudaOprfServerKey> = (0..get_number_of_gpus())
+                        .map(|gpu_index| {
+                            let stream = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
+                            CudaOprfServerKey::decompress_from_cpu(&compressed_oprf_sk, &stream)
+                        })
+                        .collect();

                    // Execute the operation once to know its cost.
                    reset_pbs_count();
-                    cpu_sks.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
+                    cpu_oprf_sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
                        Seed(0),
                        bit_size as u64,
                        num_block as u64,
+                        &cpu_sks,
                    );
                    let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default

@@ -232,10 +274,11 @@ pub mod cuda {
                            (0..elements).into_par_iter().for_each(|i| {
                                let gpu_index: u32 = i as u32 % get_number_of_gpus();
                                let stream = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
-                                gpu_sks_vec[gpu_index as usize]
+                                cuda_oprf_sks_vec[gpu_index as usize]
                                    .par_generate_oblivious_pseudo_random_unsigned_integer(
                                        Seed(0),
                                        num_block as u64,
+                                        &gpu_sks_vec[gpu_index as usize],
                                        &stream,
                                    );
                            })
@@ -247,11 +290,12 @@ pub mod cuda {
                            (0..elements).into_par_iter().for_each(|i| {
                                let gpu_index: u32 = i as u32 % get_number_of_gpus();
                                let stream = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
-                                gpu_sks_vec[gpu_index as usize]
+                                cuda_oprf_sks_vec[gpu_index as usize]
                                    .par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
                                        Seed(0),
                                        bit_size as u64,
                                        num_block as u64,
+                                        &gpu_sks_vec[gpu_index as usize],
                                        &stream,
                                    );
                            })
--- a/tfhe-benchmark/benches/shortint/oprf.rs
+++ b/tfhe-benchmark/benches/shortint/oprf.rs
@@ -2,6 +2,7 @@ use benchmark::params_aliases::*;
 use criterion::{black_box, criterion_group, Criterion};
 use tfhe::keycache::NamedParam;
 use tfhe::shortint::keycache::KEY_CACHE;
+use tfhe::shortint::oprf::{OprfPrivateKey, OprfServerKey};
 use tfhe_csprng::seeders::Seed;

 fn oprf(c: &mut Criterion) {
@@ -12,11 +13,15 @@ fn oprf(c: &mut Criterion) {
    let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS;

    let keys = KEY_CACHE.get_from_param(param);
+    let cks = keys.client_key();
    let sks = keys.server_key();

+    let oprf_pk = OprfPrivateKey::new(cks);
+    let oprf_sk = OprfServerKey::new(&oprf_pk, cks).unwrap();
+
    bench_group.bench_function(format!("2-bits-oprf::{}", param.name()), |b| {
        b.iter(|| {
-            _ = black_box(sks.generate_oblivious_pseudo_random(Seed(0), 2));
+            _ = black_box(oprf_sk.generate_oblivious_pseudo_random(Seed(0), 2, sks));
        })
    });
 }
--- a/tfhe-benchmark/src/bin/wasm_benchmarks_parser.rs
+++ b/tfhe-benchmark/src/bin/wasm_benchmarks_parser.rs
@@ -59,7 +59,7 @@ pub fn parse_wasm_benchmarks(results_file: &Path, raw_results_file: &Path) {
        let bench_name = name_parts[0];
        let params: PBSParameters = params_from_name(name_parts[1]).into();
        println!("{name_parts:?}");
-        if bench_name.contains("_size") {
+        if full_name.contains("_size") {
            write_result(&mut file, &prefixed_full_name, *val as usize);
        } else {
            let value_in_ns = (val * 1_000_000_f32) as usize;
--- a/tfhe-csprng/src/seeders/mod.rs
+++ b/tfhe-csprng/src/seeders/mod.rs
@@ -168,7 +168,7 @@ mod generic_tests {
    fn test_xof_seed_getters() {
        let seed_bytes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
        let bits = u128::from_le_bytes(seed_bytes);
-        let dsep = [b't', b'f', b'h', b'e', b'k', b's', b'p', b's'];
+        let dsep = *b"tfheksps";
        let seed = XofSeed::new_u128(bits, dsep);

        let s = u128::from_le_bytes(seed.seed().try_into().unwrap());
--- a/tfhe-zk-pok/src/proofs/pke/mod.rs
+++ b/tfhe-zk-pok/src/proofs/pke/mod.rs
@@ -342,6 +342,28 @@ impl<G: Curve> Proof<G> {
            None => ComputeLoad::Verify,
        }
    }
+
+    pub fn to_le_bytes(&self) -> Vec<u8> {
+        let mut bytes = Vec::new();
+
+        let Self {
+            c_hat,
+            c_y,
+            pi,
+            compute_load_proof_fields,
+        } = self;
+
+        bytes.extend_from_slice(c_hat.to_le_bytes().as_ref());
+        bytes.extend_from_slice(c_y.to_le_bytes().as_ref());
+        bytes.extend_from_slice(pi.to_le_bytes().as_ref());
+        let (c_hat_t_bytes, c_h_bytes, pi_kzg_bytes) =
+            ComputeLoadProofFields::to_le_bytes(compute_load_proof_fields);
+        bytes.extend_from_slice(&c_hat_t_bytes);
+        bytes.extend_from_slice(&c_h_bytes);
+        bytes.extend_from_slice(&pi_kzg_bytes);
+
+        bytes
+    }
 }

 impl<G: Curve> ParameterSetConformant for Proof<G> {
@@ -404,6 +426,26 @@ pub(crate) struct ComputeLoadProofFields<G: Curve> {
    pub(crate) pi_kzg: G::G1,
 }

+impl<G: Curve> ComputeLoadProofFields<G> {
+    #[allow(clippy::type_complexity)]
+    fn to_le_bytes(fields: &Option<Self>) -> (Box<[u8]>, Box<[u8]>, Box<[u8]>) {
+        if let Some(ComputeLoadProofFields {
+            c_hat_t,
+            c_h,
+            pi_kzg,
+        }) = fields.as_ref()
+        {
+            (
+                Box::from(G::G2::to_le_bytes(*c_hat_t).as_ref()),
+                Box::from(G::G1::to_le_bytes(*c_h).as_ref()),
+                Box::from(G::G1::to_le_bytes(*pi_kzg).as_ref()),
+            )
+        } else {
+            (Box::from([]), Box::from([]), Box::from([]))
+        }
+    }
+}
+
 type CompressedG2<G> = <<G as Curve>::G2 as Compressible>::Compressed;
 type CompressedG1<G> = <<G as Curve>::G1 as Compressible>::Compressed;

--- a/tfhe-zk-pok/src/proofs/pke_v2/mod.rs
+++ b/tfhe-zk-pok/src/proofs/pke_v2/mod.rs
@@ -440,6 +440,44 @@ impl<G: Curve> Proof<G> {
    pub fn hash_config(&self) -> PkeV2SupportedHashConfig {
        self.hash_config
    }
+
+    pub fn to_le_bytes(&self) -> Vec<u8> {
+        let mut bytes = Vec::new();
+
+        let Self {
+            C_hat_e,
+            C_e,
+            C_r_tilde,
+            C_R,
+            C_hat_bin,
+            C_y,
+            C_h1,
+            C_h2,
+            C_hat_t,
+            pi,
+            pi_kzg,
+            compute_load_proof_fields,
+            hash_config: _,
+        } = self;
+
+        bytes.extend_from_slice(C_hat_e.to_le_bytes().as_ref());
+        bytes.extend_from_slice(C_e.to_le_bytes().as_ref());
+        bytes.extend_from_slice(C_r_tilde.to_le_bytes().as_ref());
+        bytes.extend_from_slice(C_R.to_le_bytes().as_ref());
+        bytes.extend_from_slice(C_hat_bin.to_le_bytes().as_ref());
+        bytes.extend_from_slice(C_y.to_le_bytes().as_ref());
+        bytes.extend_from_slice(C_h1.to_le_bytes().as_ref());
+        bytes.extend_from_slice(C_h2.to_le_bytes().as_ref());
+        bytes.extend_from_slice(C_hat_t.to_le_bytes().as_ref());
+        bytes.extend_from_slice(pi.to_le_bytes().as_ref());
+        bytes.extend_from_slice(pi_kzg.to_le_bytes().as_ref());
+        let (C_hat_h3_bytes, C_hat_w_bytes) =
+            ComputeLoadProofFields::to_le_bytes(compute_load_proof_fields);
+        bytes.extend_from_slice(&C_hat_h3_bytes);
+        bytes.extend_from_slice(&C_hat_w_bytes);
+
+        bytes
+    }
 }

 /// These fields can be pre-computed on the prover side in the faster Verifier scheme. If that's the
--- a/tfhe/Cargo.toml
+++ b/tfhe/Cargo.toml
@@ -64,7 +64,7 @@ tfhe-fft = { version = "0.10.1", path = "../tfhe-fft", features = [
    "serde",
    "fft128",
 ] }
-tfhe-ntt = { version = "0.7.0", path = "../tfhe-ntt" }
+tfhe-ntt = { version = "0.7.1", path = "../tfhe-ntt" }
 pulp = { workspace = true, features = ["default"] }
 tfhe-cuda-backend = { version = "0.14.0", path = "../backends/tfhe-cuda-backend", optional = true }
 aligned-vec = { workspace = true, features = ["default", "serde"] }
@@ -99,7 +99,7 @@ serde-wasm-bindgen = { workspace = true, optional = true }
 getrandom = { workspace = true, optional = true }
 bytemuck = { workspace = true }

-tfhe-hpu-backend = { version = "0.4", path = "../backends/tfhe-hpu-backend", optional = true }
+tfhe-hpu-backend = { version = "0.5", path = "../backends/tfhe-hpu-backend", optional = true }

 [features]
 default = ["avx512"]
--- a/tfhe/docs/.gitbook/assets/cpu-gpu-hpu-integer-benchmark-fheuint64-tuniform-2m128-ciphertext.svg
+++ b/tfhe/docs/.gitbook/assets/cpu-gpu-hpu-integer-benchmark-fheuint64-tuniform-2m128-ciphertext.svg
@@ -8,23 +8,23 @@
 	<rect x="0" y="40" width="300" height="520" fill="#fbbc04"/>
 	<rect x="300" y="40" width="420" height="520" fill="#f3f3f3"/>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">Negation (-)</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">71.5 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="60.0">77.5 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="60.0">9.08 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="60.0">8.4 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">Add / Sub (+,-)</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">93.2 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="100.0">91.7 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="100.0">9.07 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="100.0">8.35 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">Mul (x)</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">352 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="140.0">357 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="140.0">32.8 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="140.0">122 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="180.0">Equal / Not Equal (eq, ne)</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="180.0">70.1 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="180.0">72.0 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="180.0">7.03 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="180.0">6.77 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="220.0">Comparisons (ge, gt, le, lt)</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="220.0">87.6 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="220.0">89.5 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="220.0">10.6 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="220.0">6.81 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="260.0">Max / Min (max, min)</text>
@@ -32,31 +32,31 @@
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="260.0">15.0 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="260.0">11.7 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="300.0">Bitwise operations (&amp;, |, ^)</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="300.0">19.1 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="300.0">19.0 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="300.0">1.99 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="300.0">2.95 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="340.0">Div / Rem  (/, %)</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="340.0">5.04 s</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="340.0">4.88 s</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="340.0">514 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="340.0">912 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="380.0">Left / Right Shifts (&lt;&lt;, &gt;&gt;)</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="380.0">119 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="380.0">121 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="380.0">18.0 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="380.0">25.8 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="420.0">Left / Right Rotations (left_rotate, right_rotate)</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="420.0">119 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="420.0">121 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="420.0">18.0 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="420.0">27.9 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="460.0">Leading / Trailing zeros/ones</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="460.0">223 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="460.0">222 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="460.0">20.2 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="460.0">14.7 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="500.0">Log2</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="500.0">244 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="500.0">246 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="500.0">21.9 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="500.0">14.8 ms</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="540.0">Select</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="540.0">39.3 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="370.0" y="540.0">40.2 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="510.0" y="540.0">4.7 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="650.0" y="540.0">5.53 ms</text>
 	<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
--- a/tfhe/docs/.gitbook/assets/cpu-hlapi-erc7984-benchmark-latency-throughput.svg
+++ b/tfhe/docs/.gitbook/assets/cpu-hlapi-erc7984-benchmark-latency-throughput.svg
@@ -7,13 +7,13 @@
 	<rect x="0" y="40" width="300" height="120" fill="#fbbc04"/>
 	<rect x="300" y="40" width="420" height="120" fill="#f3f3f3"/>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="60.0">whitepaper</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="60.0">276 ms</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="615.0" y="60.0">23.0 ops/s</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="60.0">253 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="615.0" y="60.0">25.2 ops/s</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="100.0">no_cmux</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="100.0">238 ms</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="615.0" y="100.0">24.0 ops/s</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="100.0">256 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="615.0" y="100.0">25.2 ops/s</text>
 	<text dominant-baseline="middle" text-anchor="start" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="6" y="140.0">overflow</text>
-	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="140.0">225 ms</text>
+	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="405.0" y="140.0">238 ms</text>
 	<text dominant-baseline="middle" text-anchor="middle" font-family="Arial" font-size="14" font-weight="normal" fill="black" x="615.0" y="140.0">21.3 ops/s</text>
 	<line stroke="white" stroke-width="2" x1="0" y1="0" x2="720" y2="0"/>
 	<line stroke="white" stroke-width="2" x1="0" y1="40" x2="720" y2="40"/>
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Arthur Meyre	bb400ed85a	chore: fix cleatext -> cleartext typo	2026-04-27 16:14:50 +02:00
Arthur Meyre	1c05b863f4	chore: custom fft base_n variable rename - base_n has a different meaning in the tfhe-fft code, to make it less confusing when working on such code, renamed base_n	2026-04-27 16:14:49 +02:00
dependabot[bot]	5ceb25bfc2	chore(deps): bump tj-actions/changed-files from 47.0.5 to 47.0.6 Bumps [tj-actions/changed-files](https://github.com/tj-actions/changed-files) from 47.0.5 to 47.0.6. - [Release notes](https://github.com/tj-actions/changed-files/releases) - [Changelog](https://github.com/tj-actions/changed-files/blob/main/HISTORY.md) - [Commits](`22103cc46b...9426d40962`) --- updated-dependencies: - dependency-name: tj-actions/changed-files dependency-version: 47.0.6 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>	2026-04-27 14:30:11 +02:00
dependabot[bot]	680c2e3eb6	chore(deps): bump actions/cache from 5.0.4 to 5.0.5 Bumps [actions/cache](https://github.com/actions/cache) from 5.0.4 to 5.0.5. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](`668228422a...27d5ce7f10`) --- updated-dependencies: - dependency-name: actions/cache dependency-version: 5.0.5 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>	2026-04-27 11:03:30 +02:00
dependabot[bot]	8bc080355d	chore(deps): bump zizmorcore/zizmor-action from 0.5.2 to 0.5.3 Bumps [zizmorcore/zizmor-action](https://github.com/zizmorcore/zizmor-action) from 0.5.2 to 0.5.3. - [Release notes](https://github.com/zizmorcore/zizmor-action/releases) - [Commits](`71321a20a9...b1d7e1fb5d`) --- updated-dependencies: - dependency-name: zizmorcore/zizmor-action dependency-version: 0.5.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>	2026-04-27 10:29:38 +02:00
dependabot[bot]	0cc8d625e4	chore(deps): bump actions/setup-node from 6.3.0 to 6.4.0 Bumps [actions/setup-node](https://github.com/actions/setup-node) from 6.3.0 to 6.4.0. - [Release notes](https://github.com/actions/setup-node/releases) - [Commits](`53b83947a5...48b55a011b`) --- updated-dependencies: - dependency-name: actions/setup-node dependency-version: 6.4.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>	2026-04-27 10:29:25 +02:00
Arthur Meyre	ec5d0da883	chore: bump ntt requirement which should have been 0.7.1 already	2026-04-27 09:49:03 +02:00
Arthur Meyre	8ed5633300	chore(hl): export two missing (Compressed)ReRandomizationKey types	2026-04-23 15:32:17 +02:00
David Testé	cf07dcf6a3	chore(docs): update leading-trailing zeros results	2026-04-23 15:16:54 +02:00
Arthur Meyre	20dad23256	chore: bump rand to 0.8.6 in data generation crate - 1.6 is done in a separate PR which will use the officially published tag as source for the code, which also updates the lock	2026-04-23 14:35:02 +02:00
Nicolas Sarlin	d7380e4264	chore(backward): use released tfhe for generate_1_6 dep	2026-04-23 14:34:41 +02:00
Nicolas Sarlin	093ffb7699	chore(ci): update toolchain to nightly 2026-04-22	2026-04-23 10:08:57 +02:00
Arthur Meyre	c804b838cb	chore: update typos file filter - with HPU data file checked out the typos CLI finds typos in essentially binary data - exclude .hpu files from the checks	2026-04-22 17:22:15 +02:00
Arthur Meyre	7b174b1865	chore: make the plaintext PRF available as a test util - KMS is testing things around the PRF and they need a way to verify the PRF application, so making a cleartext PRF function available as a test utils	2026-04-22 10:18:32 +02:00
Arthur Meyre	79cb6b6066	chore: dirty fix for zk-cuda-backend rust build	2026-04-22 10:18:21 +02:00
Nicolas Sarlin	6ff87e94bb	chore(gpu): remove os detection script (done in rust)	2026-04-22 10:04:52 +02:00
Thomas Montaigu	4c27f48968	chore(oprf): add missing into/from raw parts	2026-04-22 00:25:44 +02:00
Arthur Meyre	8bf2a12e9b	chore: dirty fix for zk-cuda-backend build problem - when compiling for real it cannot find the file which is not available	2026-04-21 17:23:30 +02:00
Arthur Meyre	64b5a0fdcd	chore: fix cuda release workflow	2026-04-21 16:30:30 +02:00
Thomas Montaigu	49c390edef	refactor(oprf): change hashed data	2026-04-21 14:43:17 +02:00
Thomas Montaigu	82860a0b01	refactor(oprf)!: use a dedicated key for oprf The OPRF is a simple bootstrap, however as it uses a custom modulus switch I decided to define a new type and not re-use the ShortintBoostrapKey, except for GPU where it was easier to reuse it. This means that shortint/integer APIs must now create an OprkPrivateKey + OprfServerKey to do oprf (or use .as_oprf_key_view) In the HLAPI no breaking change as we can use either dedicated key or fallback on the compute bsk This refactor makes the shortint oprf able to generate multiple blocks at once starting from the same seed. This is to follow some guidelines. This means that shortint's oprf now has a function doing most of the all to generate Ciphertext that encrypts random bits split evenly amongst multiple blocks	2026-04-21 14:43:17 +02:00
Theo Souchon	39ca504ce4	chore(lint): change report backward to have the right behavior for message generation	2026-04-21 14:34:13 +02:00
dependabot[bot]	61c7ffea2e	chore(deps): bump actions/upload-artifact from 7.0.0 to 7.0.1 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 7.0.0 to 7.0.1. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](`bbbca2ddaa...043fb46d1a`) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-version: 7.0.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>	2026-04-21 12:08:45 +02:00
Nicolas Sarlin	48bb3833e7	fix(shortint): proven ct list expand with a ksk but no fn fails	2026-04-20 14:15:54 +02:00
Theo Souchon	2ad2f522db	chore(lint): remove upgrade false positive warning if new variant added in an enum versioned	2026-04-20 08:24:29 +02:00
Nicolas Sarlin	2333a5591e	chore(ci): check that Cargo.lock of generate_ crates is up to date	2026-04-17 17:33:59 +02:00
David Testé	9e3e283741	doc(bench): update benchmark results tables	2026-04-17 12:52:37 +02:00
Arthur Meyre	e3b9fd56df	feat: add mul div entry points - this operation has optimization opportunities (at least for the scalar mul and scalar div case) but those won't be implemented here, this is a first commit to make the API available	2026-04-17 11:04:32 +02:00
Thomas Montaigu	05b1c9a651	feat(hlapi): bind CudaServerKey::contains	2026-04-16 16:20:11 +02:00
Thomas Montaigu	8d2caa108a	chore(hlapi): add gpu stuff to FheInteger trait	2026-04-16 16:20:11 +02:00
Thomas Montaigu	dea1b81b06	feat(hlapi): add contains for cpu	2026-04-16 16:20:11 +02:00
Arthur Meyre	a1dc91af4f	chore: update rand version in tfhe-hpu-backend - to silence a soundness warning (which does not concern us since we don't use the faulty mechanism)	2026-04-16 16:11:34 +02:00
Arthur Meyre	b34b7d39f1	chore: remove unused deps from mockup - those are not referenced at all in the code - clap-num is less clear since clap is used, so left it in for now	2026-04-16 16:11:34 +02:00
Arthur Meyre	dc14834559	chore: bump tfhe-hpu-backend after erc7984 update	2026-04-16 11:51:58 +02:00
Nicolas Sarlin	10ab4f4409	feat: add re_randomization for ProvenCompactCiphertList	2026-04-16 11:37:21 +02:00
Nicolas Sarlin	d5439a9f48	fix(core): check that ct modulus is power of two in glwe algebra	2026-04-16 11:37:21 +02:00
Mayeul@Zama	e299dc2af7	feat(integer): add improved leading_zeroes	2026-04-15 17:29:05 +02:00
Enzo Di Maria	bdb75ec806	fix(gpu): AES noise fix	2026-04-15 17:08:04 +02:00
Andrei Stoian	32cf1969bf	fix(gpu): semgrep step in pcc now fails on error	2026-04-15 14:20:34 +02:00
Andrei Stoian	600a30131e	chore(gpu): optimize CI	2026-04-15 12:48:31 +02:00
David Palm	96d230cf6f	chore: make CompressedXofKeySet::decompress take a reference	2026-04-14 16:24:33 +02:00
Nicolas Sarlin	4790f8ba1c	fix(bench): wrong size in wasm benchmarks	2026-04-14 11:17:11 +02:00
dependabot[bot]	79a54df25b	chore(deps): bump docker/login-action from 4.0.0 to 4.1.0 Bumps [docker/login-action](https://github.com/docker/login-action) from 4.0.0 to 4.1.0. - [Release notes](https://github.com/docker/login-action/releases) - [Commits](`b45d80f862...4907a6ddec`) --- updated-dependencies: - dependency-name: docker/login-action dependency-version: 4.1.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>	2026-04-14 10:32:05 +02:00
Theo Souchon	50d6be121a	chore(test): refacto around noise check test and json output	2026-04-14 09:58:20 +02:00
Nicolas Sarlin	7cd966d8a7	chore: allow rand audit advisory	2026-04-14 08:54:05 +02:00
David Testé	6ca929051d	chore(ci): remove permanent instance fallback for gpu These fallback were set to mitigate Hyperstack resource shortages. Those instances are not used anymore and workflows are modified to avoid having a workflow run stuck because it waits for a permanent runner that doesn't exist.	2026-04-10 14:30:32 +02:00
Pedro Alves	871cc8f772	chore(docs): rewrite GPU ZK-PoK documentation for zk-cuda-backend integration	2026-04-10 08:40:08 -03:00
Theo Souchon	b938473788	chore: renamed erc20 to erc7984	2026-04-10 09:18:51 +02:00
Nicolas Sarlin	74869f5e2f	chore(integer): refactor expansion helper - Split the pure expand and the post processing (cast, unpack, sanitize) - Add a new internal intermediate type: ExpandedCiphertextList - verify_and_expand just calls verify+expand	2026-04-09 11:07:03 +02:00
dependabot[bot]	326dd6a5c7	chore(deps): bump zgosalvez/github-actions-ensure-sha-pinned-actions Bumps [zgosalvez/github-actions-ensure-sha-pinned-actions](https://github.com/zgosalvez/github-actions-ensure-sha-pinned-actions) from 5.0.1 to 5.0.4. - [Release notes](https://github.com/zgosalvez/github-actions-ensure-sha-pinned-actions/releases) - [Commits](`70c4af2ed5...ca46236c6c`) --- updated-dependencies: - dependency-name: zgosalvez/github-actions-ensure-sha-pinned-actions dependency-version: 5.0.4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>	2026-04-08 09:42:31 +02:00
Guillermo Oyarzun	1abc69751a	feat(gpu): create noise and pfail tests for rerand	2026-04-07 20:33:31 +02:00
Pedro Alves	3c2cb273d5	chore(docs): add GPU ZK benchmark SVG generation to CI pipeline Add GPU ZK benchmark and SVG generation jobs to the documentation workflows, and fix the data extractor to handle the cuda::zk:: prefix chain in GPU ZK benchmark names.	2026-04-07 05:02:04 -03:00
dependabot[bot]	b18060e5c8	chore(deps): bump codecov/codecov-action from 5.5.2 to 6.0.0 Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 5.5.2 to 6.0.0. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](`671740ac38...57e3a136b7`) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-version: 6.0.0 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com>	2026-04-07 09:51:30 +02:00
dependabot[bot]	c8827a21a7	chore(deps): bump rust-lang/crates-io-auth-action from 1.0.3 to 1.0.4 Bumps [rust-lang/crates-io-auth-action](https://github.com/rust-lang/crates-io-auth-action) from 1.0.3 to 1.0.4. - [Release notes](https://github.com/rust-lang/crates-io-auth-action/releases) - [Commits](`b7e9a28ede...bbd81622f2`) --- updated-dependencies: - dependency-name: rust-lang/crates-io-auth-action dependency-version: 1.0.4 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>	2026-04-07 09:51:08 +02:00