wip: measure latencies of a list of cts

WIP: fix gpu streams and use iter_batched
chore(bench): new heuristic to define elements for throughput
2026-04-28 03:01:21 -04:00 · 2025-01-28 11:19:27 +01:00 · 2025-01-22 10:56:08 +01:00 · 2025-01-20 15:21:05 +01:00 · 2025-01-20 10:59:12 +01:00 · 2025-01-20 10:00:49 +01:00
430 changed files with 10008 additions and 35650 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -8,8 +8,14 @@ root = true
 end_of_line = lf
 insert_final_newline = true

-# 4 space indentation
-[*.rs]
+# 4 space indentation for rust and toml
+[*.{rs,toml}]
 charset = utf-8
 indent_style = space
 indent_size = 4
+
+# 2 for c and js
+[*.{js,json,c,h}]
+charset = utf-8
+indent_style = space
+indent_size = 2
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@@ -5,6 +5,7 @@ self-hosted-runner:
    - 4090-desktop
    - large_windows_16_latest
    - large_ubuntu_16
+    - large_ubuntu_16-22.04
 # Configuration variables in array of strings defined in your repository or
 # organization. `null` means disabling configuration variables check.
 # Empty array means no configuration variable is allowed.
--- a/.github/workflows/aws_tfhe_backward_compat_tests.yml
+++ b/.github/workflows/aws_tfhe_backward_compat_tests.yml
@@ -76,7 +76,7 @@ jobs:
        with:
          persist-credentials: 'false'
          repository: zama-ai/tfhe-backward-compat-data
-          path: tfhe/tfhe-backward-compat-data
+          path: tests/tfhe-backward-compat-data
          lfs: 'true'
          ref: ${{ steps.backward_compat_branch.outputs.branch }}

@@ -94,7 +94,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (backward-compat-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, backward-compat-tests ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -11,13 +11,26 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request_target' }}
+  MSG_MINIMAL: event,action url,commit
+  BRANCH: ${{ github.head_ref || github.ref }}
+  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
  REF: ${{ github.event.pull_request.head.sha || github.sha }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
+  # Trigger pull_request event on CI files to be able to test changes before merging to main branch.
+  # Workflow would fail if changes come from a forked repository since secrets are not available with this event.
+  pull_request:
+    paths:
+      - '.github/**'
+      - 'ci/**'
+  # General entry point for Zama's pull request as well as contribution from forks.
  pull_request_target:
+    paths:
+      - '**'
+      - '!.github/**'
+      - '!ci/**'

 jobs:
  should-run:
@@ -49,6 +62,7 @@ jobs:
      user_docs_test: ${{ env.IS_PULL_REQUEST == 'false' ||
        steps.changed-files.outputs.user_docs_any_changed ||
        steps.changed-files.outputs.dependencies_any_changed }}
+      ci_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ci_any_changed }}
      any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
    steps:
      - name: Checkout tfhe-rs
@@ -105,11 +119,15 @@ jobs:
            user_docs:
              - tfhe/src/**
              - '!tfhe/src/c_api/**'
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - README.md
+            ci:
+              - .github/**
+              - ci/**

      - name: Aggregate file changes
        id: aggregated-changes
+        # CI files are not included in this aggregator.
        if: ( steps.changed-files.outputs.dependencies_any_changed == 'true' ||
          steps.changed-files.outputs.csprng_any_changed == 'true' ||
          steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
@@ -124,16 +142,20 @@ jobs:
        run: |
          echo "any_changed=true" >> "$GITHUB_OUTPUT"

+  # Fail if the triggering actor is not part of Zama organization.
+  # If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
  check-user-permission:
    needs: should-run
+    if: github.event_name != 'pull_request_target' ||
+      (github.event_name == 'pull_request_target' && needs.should-run.outputs.ci_file_changed == 'false')
    uses: ./.github/workflows/check_triggering_actor.yml
    secrets:
      TOKEN: ${{ secrets.GITHUB_TOKEN }}

  setup-instance:
    name: Setup instance (fast-tests)
-    if: github.event_name != 'pull_request_target' ||
-      needs.should-run.outputs.any_file_changed == 'true'
+    if: github.event_name == 'workflow_dispatch' ||
+      (github.event_name != 'workflow_dispatch' && needs.should-run.outputs.any_file_changed == 'true')
    needs: [ should-run, check-user-permission ]
    runs-on: ubuntu-latest
    outputs:
@@ -152,8 +174,6 @@ jobs:

  fast-tests:
    name: Fast CPU tests
-    if: github.event_name != 'pull_request_target' ||
-      (github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
    needs: [ should-run, setup-instance ]
    concurrency:
      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
@@ -269,11 +289,11 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (fast-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, fast-tests ]
    runs-on: ubuntu-latest
    steps:
@@ -293,4 +313,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -134,7 +134,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (unsigned-integer-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [setup-instance, unsigned-integer-tests]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -138,7 +138,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (signed-integer-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [setup-instance, signed-integer-tests]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -111,7 +111,7 @@ jobs:
            user_docs:
              - tfhe/src/**
              - '!tfhe/src/c_api/**'
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - README.md

      - name: Aggregate file changes
@@ -244,7 +244,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cpu-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cpu-tests ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -113,7 +113,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (wasm-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, wasm-tests ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_boolean.yml
+++ b/.github/workflows/benchmark_boolean.yml
@@ -93,7 +93,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_boolean
          path: ${{ env.RESULTS_FILENAME }}
@@ -121,7 +121,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (boolean-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, boolean-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_core_crypto.yml
+++ b/.github/workflows/benchmark_core_crypto.yml
@@ -3,6 +3,9 @@ name: Core crypto benchmarks

 on:
  workflow_dispatch:
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 5a.m.
+    - cron: '0 5 * * 6'

 env:
  CARGO_TERM_COLOR: always
@@ -81,7 +84,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
@@ -109,7 +112,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (core-crypto-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, core-crypto-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_erc20.yml
+++ b/.github/workflows/benchmark_erc20.yml
@@ -97,7 +97,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_erc20
          path: ${{ env.RESULTS_FILENAME }}
@@ -118,7 +118,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (erc20-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, erc20-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_gpu_4090.yml
+++ b/.github/workflows/benchmark_gpu_4090.yml
@@ -82,7 +82,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_integer_multi_bit_gpu_default
          path: ${{ env.RESULTS_FILENAME }}
@@ -157,7 +157,7 @@ jobs:
      

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_gpu_core_crypto.yml
+++ b/.github/workflows/benchmark_gpu_core_crypto.yml
@@ -98,7 +98,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
@@ -131,7 +131,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-integer-full-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-core-crypto-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_gpu_erc20_common.yml
+++ b/.github/workflows/benchmark_gpu_erc20_common.yml
@@ -124,7 +124,7 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_erc20_${{ inputs.profile }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -157,7 +157,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-erc20-${{ inputs.profile }}-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_gpu_integer_common.yml
+++ b/.github/workflows/benchmark_gpu_integer_common.yml
@@ -200,7 +200,7 @@ jobs:
          --bench-type ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -233,7 +233,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-${{ inputs.profile }}-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_integer.yml
+++ b/.github/workflows/benchmark_integer.yml
@@ -170,7 +170,7 @@ jobs:
          --bench-type ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -191,7 +191,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (integer-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, integer-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_shortint.yml
+++ b/.github/workflows/benchmark_shortint.yml
@@ -136,7 +136,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -157,7 +157,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (shortint-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, shortint-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_signed_integer.yml
+++ b/.github/workflows/benchmark_signed_integer.yml
@@ -164,7 +164,7 @@ jobs:
          --bench-type ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -185,7 +185,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (integer-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, signed-integer-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_tfhe_fft.yml
+++ b/.github/workflows/benchmark_tfhe_fft.yml
@@ -84,7 +84,7 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_fft
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_tfhe_ntt.yml
+++ b/.github/workflows/benchmark_tfhe_ntt.yml
@@ -84,7 +84,7 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_ntt
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_tfhe_zk_pok.yml
+++ b/.github/workflows/benchmark_tfhe_zk_pok.yml
@@ -121,7 +121,7 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_tfhe_zk_pok
          path: ${{ env.RESULTS_FILENAME }}
@@ -149,7 +149,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (tfhe-zk-pok-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, tfhe-zk-pok-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_wasm_client.yml
+++ b/.github/workflows/benchmark_wasm_client.yml
@@ -166,7 +166,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_wasm_${{ matrix.browser }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -194,7 +194,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (wasm-client-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, wasm-client-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_zk_pke.yml
+++ b/.github/workflows/benchmark_zk_pke.yml
@@ -177,7 +177,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_integer_zk
          path: ${{ env.RESULTS_FILENAME }}
@@ -205,7 +205,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (pke-zk-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, pke-zk-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/cargo_build_tfhe_ntt.yml
+++ b/.github/workflows/cargo_build_tfhe_ntt.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  cargo-builds:
+  cargo-builds-ntt:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
--- a/.github/workflows/cargo_test_fft.yml
+++ b/.github/workflows/cargo_test_fft.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  cargo-tests:
+  cargo-tests-fft:
    runs-on: ${{ matrix.runner_type }}
    strategy:
      matrix:
@@ -38,7 +38,7 @@ jobs:
        run: |
          make test_fft_no_std

-  cargo-tests-nightly:
+  cargo-tests-fft-nightly:
    runs-on: ${{ matrix.runner_type }}
    strategy:
      matrix:
@@ -60,7 +60,7 @@ jobs:
        run: |
          make test_fft_no_std_nightly

-  cargo-tests-node-js:
+  cargo-tests-fft-node-js:
    runs-on: "ubuntu-latest"
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
--- a/.github/workflows/cargo_test_ntt.yml
+++ b/.github/workflows/cargo_test_ntt.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  cargo-tests:
+  cargo-tests-ntt:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@@ -33,7 +33,7 @@ jobs:
      - name: Test no-std
        run: make test_ntt_no_std

-  cargo-tests-nightly:
+  cargo-tests-ntt-nightly:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
--- a/.github/workflows/check_triggering_actor.yml
+++ b/.github/workflows/check_triggering_actor.yml
@@ -13,7 +13,7 @@ jobs:
    steps:
      - name: Get User Permission
        id: check-access
-        uses: actions-cool/check-user-permission@956b2e73cdfe3bcb819bb7225e490cb3b18fd76e # v2.2.1
+        uses: actions-cool/check-user-permission@7b90a27f92f3961b368376107661682c441f6103 # v2.3.0
        with:
          require: write
          username: ${{ github.triggering_actor }}
--- a/.github/workflows/ci_lint.yml
+++ b/.github/workflows/ci_lint.yml
@@ -27,7 +27,7 @@ jobs:
          make lint_workflow

      - name: Ensure SHA pinned actions
-        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@6ae615f6475d2ede5ad88bea6baa7a1d5e93ffaa # v3.0.19
+        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@c3a2b64f69b7a1542a68f44d9edbd9ec3fc1455e # v3.0.20
        with:
          allowlist: |
            slsa-framework/slsa-github-generator
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -115,7 +115,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (code-coverage)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, code-coverage ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/csprng_randomness_tests.yml
+++ b/.github/workflows/csprng_randomness_tests.yml
@@ -69,7 +69,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (csprng-randomness-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, csprng-randomness-tests ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_fast_h100_tests.yml
+++ b/.github/workflows/gpu_fast_h100_tests.yml
@@ -51,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_fast_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -149,7 +149,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_fast_tests.yml
+++ b/.github/workflows/gpu_fast_tests.yml
@@ -50,7 +50,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_fast_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -147,7 +147,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_full_multi_gpu_tests.yml
+++ b/.github/workflows/gpu_full_multi_gpu_tests.yml
@@ -51,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/**_multi_gpu_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -152,7 +152,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-tests-multi-gpu)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_integer_long_run_tests.yml
+++ b/.github/workflows/gpu_integer_long_run_tests.yml
@@ -92,7 +92,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (gpu-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_pcc.yml
+++ b/.github/workflows/gpu_pcc.yml
@@ -104,7 +104,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-pcc)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-pcc ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_signed_integer_classic_tests.yml
+++ b/.github/workflows/gpu_signed_integer_classic_tests.yml
@@ -51,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_signed_integer_classic_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -132,7 +132,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-signed-classic-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_signed_integer_h100_tests.yml
+++ b/.github/workflows/gpu_signed_integer_h100_tests.yml
@@ -51,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_signed_integer_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -132,7 +132,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_signed_integer_tests.yml
+++ b/.github/workflows/gpu_signed_integer_tests.yml
@@ -58,7 +58,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_signed_integer_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -150,7 +150,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-signed-integer-tests ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_unsigned_integer_classic_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_classic_tests.yml
@@ -51,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_unsigned_integer_classic_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -132,7 +132,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-unsigned-classic-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_unsigned_integer_h100_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_h100_tests.yml
@@ -51,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_unsigned_integer_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -132,7 +132,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_unsigned_integer_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_tests.yml
@@ -57,7 +57,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_unsigned_integer_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -146,7 +146,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-unsigned-integer-tests ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/integer_long_run_tests.yml
+++ b/.github/workflows/integer_long_run_tests.yml
@@ -72,7 +72,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cpu-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cpu-tests ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/make_release.yml
+++ b/.github/workflows/make_release.yml
@@ -43,14 +43,14 @@ jobs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
      - name: Prepare package
        run: |
          cargo package -p tfhe
-      - uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
+      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
        with:
          name: crate
          path: target/package/*.crate
@@ -77,12 +77,9 @@ jobs:
    name: Publish Release
    needs: [package] # for comparing hashes
    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      id-token: write
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
@@ -110,7 +107,7 @@ jobs:
      - name: Slack notification (hashes comparison)
        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: failure
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
@@ -155,7 +152,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_concrete_csprng.yml
+++ b/.github/workflows/make_release_concrete_csprng.yml
@@ -1,49 +0,0 @@
-name: Publish tfhe-csprng release
-
-on:
-  workflow_dispatch:
-    inputs:
-      dry_run:
-        description: "Dry-run"
-        type: boolean
-        default: true
-
-env:
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-
-jobs:
-  verify_tag:
-    uses: ./.github/workflows/verify_tagged_commit.yml
-    secrets:
-      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
-      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
-
-  publish_release:
-    name: Publish tfhe-csprng Release
-    needs: verify_tag
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Publish crate.io package
-        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
-          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
-        run: |
-          cargo publish -p tfhe-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "tfhe-csprng release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_cuda.yml
+++ b/.github/workflows/make_release_cuda.yml
@@ -1,4 +1,3 @@
-# Publish new release of tfhe-cuda-backend on crates.io.
 name: Publish CUDA release

 on:
@@ -8,10 +7,6 @@ on:
        description: "Dry-run"
        type: boolean
        default: true
-      push_to_crates:
-        description: "Push to crate"
-        type: boolean
-        default: true

 env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
@@ -45,10 +40,12 @@ jobs:
          backend: aws
          profile: gpu-build

-  publish-cuda-release:
-    name: Publish CUDA Release
+  package:
+    name: Package CUDA Release for provenance
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
    strategy:
      fail-fast: false
      # explicit include-based build matrix, of known valid options
@@ -61,7 +58,7 @@ jobs:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
@@ -95,26 +92,75 @@ jobs:
            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
            echo "HOME=/home/ubuntu";
          } >> "${GITHUB_ENV}"
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-cuda-backend
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"

+  provenance:
+    if: ${{ !inputs.dry_run  }}
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
+  publish-cuda-release:
+    name: Publish CUDA Release
+    needs: [setup-instance, package] # for comparing hashes
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.2"
+            gcc: 9
+    steps:
      - name: Publish crate.io package
-        if: ${{ inputs.push_to_crates }}
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          cargo publish -p tfhe-cuda-backend --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-cuda-backend crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (publish-release)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, publish-cuda-release ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/make_release_tfhe_csprng.yml
+++ b/.github/workflows/make_release_tfhe_csprng.yml
@@ -0,0 +1,103 @@
+name: Publish tfhe-csprng release
+
+on:
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: "Dry-run"
+        type: boolean
+        default: true
+
+env:
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  verify_tag:
+    uses: ./.github/workflows/verify_tagged_commit.yml
+    secrets:
+      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
+      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
+
+  package:
+    runs-on: ubuntu-latest
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-csprng
+      - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        with:
+          name: crate-tfhe-csprng
+          path: target/package/*.crate
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+
+  provenance:
+    if: ${{ !inputs.dry_run  }}
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
+
+  publish_release:
+    name: Publish tfhe-csprng Release
+    needs: [verify_tag, package]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+      - name: Download artifact
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: crate-tfhe-csprng
+          path: target/package
+      - name: Publish crate.io package
+        env:
+          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
+        run: |
+          cargo publish -p tfhe-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-csprng - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "tfhe-csprng release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_tfhe_fft.yml
+++ b/.github/workflows/make_release_tfhe_fft.yml
@@ -19,15 +19,53 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

+  package:
+    runs-on: ubuntu-latest
+    needs: verify_tag
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-fft
+      - uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
+        with:
+          name: crate
+          path: target/package/*.crate
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+  provenance:
+    if: ${{ !inputs.dry_run  }}
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
  publish_release:
    name: Publish tfhe-fft Release
    runs-on: ubuntu-latest
-    needs: verify_tag
+    needs: [verify_tag, package] # for comparing hashes
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Publish crate.io package
        env:
@@ -36,10 +74,26 @@ jobs:
        run: |
          cargo publish -p tfhe-fft --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-fft crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_ntt.yml
+++ b/.github/workflows/make_release_tfhe_ntt.yml
@@ -19,13 +19,50 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

+  package:
+    runs-on: ubuntu-latest
+    needs: verify_tag
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-ntt
+      - uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
+        with:
+          name: crate
+          path: target/package/*.crate
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+  provenance:
+    if: ${{ !inputs.dry_run  }}
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
  publish_release:
    name: Publish tfhe-ntt Release
    runs-on: ubuntu-latest
-    needs: verify_tag
+    needs: [verify_tag, package] # for comparing hashes
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0

@@ -36,10 +73,26 @@ jobs:
        run: |
          cargo publish -p tfhe-ntt --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-ntt crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_versionable.yml
+++ b/.github/workflows/make_release_tfhe_versionable.yml
@@ -18,35 +18,159 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

-  publish_release:
+  package-derive:
+    runs-on: ubuntu-latest
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-versionable-derive
+      - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        with:
+          name: crate-tfhe-versionable-derive
+          path: target/package/*.crate
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+  provenance-derive:
+    needs: [package-derive]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package-derive.outputs.hash }}
+
+  publish_release-derive:
    name: Publish tfhe-versionable Release
-    needs: verify_tag
+    needs: [verify_tag, package-derive] # for comparing hashes
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Publish proc-macro crate
+      - name: Download artifact
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: crate-tfhe-versionable-derive
+          path: target/package
+      - name: Publish crate.io package
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
-          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          cargo publish -p tfhe-versionable-derive --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package-derive.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-versionable-derive - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "tfhe-versionable-derive release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

-      - name: Publish main crate
-        if: ${{ ! inputs.dry_run }}
+  package:
+    runs-on: ubuntu-latest
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
+        with:
+          fetch-depth: 0
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-versionable
+      - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        with:
+          name: crate-tfhe-versionable
+          path: target/package/*.crate
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+  provenance:
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
+  publish_release:
+    name: Publish tfhe-versionable Release
+    needs: [package] # for comparing hashes
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
+        with:
+          fetch-depth: 0
+      - name: Download artifact
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: crate-tfhe-versionable
+          path: target/package
+      - name: Publish crate.io package
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
        run: |
-          cargo publish -p tfhe-versionable --token ${{ env.CRATES_TOKEN }}
+          cargo publish -p tfhe-versionable --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+
+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-versionable - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_zk_pok.yml
+++ b/.github/workflows/make_release_zk_pok.yml
@@ -1,4 +1,3 @@
-# Publish new release of tfhe-zk-pok on crates.io.
 name: Publish tfhe-zk-pok release

 on:
@@ -13,6 +12,40 @@ env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}

 jobs:
+  package:
+      runs-on: ubuntu-latest
+      outputs:
+        hash: ${{ steps.hash.outputs.hash }}
+      steps:
+        - name: Checkout
+          uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+          with:
+            fetch-depth: 0
+        - name: Prepare package
+          run: |
+            cargo package -p tfhe-zk-pok
+        - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+          with:
+            name: crate-zk-pok
+            path: target/package/*.crate
+        - name: generate hash
+          id: hash
+          run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+  provenance:
+    if: ${{ !inputs.dry_run  }}
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
  verify_tag:
    uses: ./.github/workflows/verify_tagged_commit.yml
    secrets:
@@ -21,26 +54,43 @@ jobs:

  publish_release:
    name: Publish tfhe-zk-pok Release
-    needs: verify_tag
+    needs: [verify_tag, package] # for comparing hashes
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
+      - name: Download artifact
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: crate-zk-pok
+          path: target/package
      - name: Publish crate.io package
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          cargo publish -p tfhe-zk-pok --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-
+      - name: Verify hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-zk-pok crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -14,7 +14,7 @@ on:

 jobs:
  params-curves-security-check:
-    runs-on: large_ubuntu_16
+    runs-on: large_ubuntu_16-22.04
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
--- a/.gitignore
+++ b/.gitignore
@@ -33,4 +33,4 @@ node_modules/
 package-lock.json

 # Dir used for backward compatibility test data
-tfhe/tfhe-backward-compat-data/
+tests/tfhe-backward-compat-data/
--- a/.linelint.yml
+++ b/.linelint.yml
@@ -1,11 +1,15 @@
 ignore:
  - .git
  - target
+  - tfhe/build
+  - venv
+  - web-test-runner
  - tfhe/benchmarks_parameters
  - tfhe/web_wasm_parallel_tests/node_modules
  - tfhe/web_wasm_parallel_tests/dist
  - keys
  - coverage
+  - utils/tfhe-lints/ui/main.stderr

 rules:
  # checks if file ends in a newline character
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,28 +9,23 @@ members = [
    "apps/trivium",
    "tfhe-csprng",
    "backends/tfhe-cuda-backend",
-    "backends/tfhe-hpu-backend",
    "utils/tfhe-versionable",
    "utils/tfhe-versionable-derive",
-    "mockups/tfhe-hpu-mockup",
+    "tests",
 ]

-exclude = [
-    "tfhe/backward_compatibility_tests",
-    "utils/cargo-tfhe-lints-inner",
-    "utils/cargo-tfhe-lints"
-]
+exclude = ["tests/backward_compatibility_tests", "utils/tfhe-lints"]
 [workspace.dependencies]
 aligned-vec = { version = "0.6", default-features = false }
 bytemuck = "1.14.3"
 dyn-stack = { version = "0.11", default-features = false }
-itertools = "0.13"
+itertools = "0.14"
 num-complex = "0.4"
-pulp = { version = "0.20.0", default-features = false }
+pulp = { version = "0.20", default-features = false }
 rand = "0.8"
 rayon = "1"
 serde = { version = "1.0", default-features = false }
-wasm-bindgen = ">=0.2.86,<0.2.94"
+wasm-bindgen = "0.2.100"

 [profile.bench]
 lto = "fat"
@@ -48,3 +43,6 @@ inherits = "dev"
 opt-level = 3
 lto = "off"
 debug-assertions = false
+
+[workspace.metadata.dylint]
+libraries = [{ path = "utils/tfhe-lints" }]
--- a/85
+++ b/85
@@ -20,7 +20,7 @@ BENCH_OP_FLAVOR?=DEFAULT
 BENCH_TYPE?=latency
 NODE_VERSION=22.6
 BACKWARD_COMPAT_DATA_URL=https://github.com/zama-ai/tfhe-backward-compat-data.git
-BACKWARD_COMPAT_DATA_BRANCH?=v0.4
+BACKWARD_COMPAT_DATA_BRANCH?=v0.5
 BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
 BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
 TFHE_SPEC:=tfhe
@@ -117,7 +117,7 @@ install_wasm_bindgen_cli: install_rs_build_toolchain
 .PHONY: install_wasm_pack # Install wasm-pack to build JS packages
 install_wasm_pack: install_rs_build_toolchain
 	@wasm-pack --version | grep "$(WASM_PACK_VERSION)" > /dev/null 2>&1 || \
-	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.1 || \
+	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@$(WASM_PACK_VERSION) || \
 	( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )

 .PHONY: install_node # Install last version of NodeJS via nvm
@@ -151,10 +151,9 @@ install_tarpaulin: install_rs_build_toolchain
 	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cargo-tarpaulin --locked || \
 	( echo "Unable to install cargo tarpaulin, unknown error." && exit 1 )

-.PHONY: install_tfhe_lints # Install custom tfhe-rs lints
-install_tfhe_lints:
-	(cd utils/cargo-tfhe-lints-inner && cargo install --path .) && \
-	cd utils/cargo-tfhe-lints && cargo install --path .
+.PHONY: install_cargo_dylint # Install custom tfhe-rs lints
+install_cargo_dylint:
+	cargo install cargo-dylint dylint-link

 .PHONY: install_typos_checker # Install typos checker
 install_typos_checker: install_rs_build_toolchain
@@ -243,7 +242,8 @@ fmt_js: check_nvm_installed
 	source ~/.nvm/nvm.sh && \
 	nvm install $(NODE_VERSION) && \
 	nvm use $(NODE_VERSION) && \
-	$(MAKE) -C tfhe/web_wasm_parallel_tests fmt
+	$(MAKE) -C tfhe/web_wasm_parallel_tests fmt && \
+	$(MAKE) -C tfhe/js_on_wasm_tests fmt

 .PHONY: fmt_gpu # Format rust and cuda code
 fmt_gpu: install_rs_check_toolchain
@@ -272,7 +272,8 @@ check_fmt_js: check_nvm_installed
 	source ~/.nvm/nvm.sh && \
 	nvm install $(NODE_VERSION) && \
 	nvm use $(NODE_VERSION) && \
-	$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt
+	$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt && \
+	$(MAKE) -C tfhe/js_on_wasm_tests check_fmt

 .PHONY: check_typos # Check for typos in codebase
 check_typos: install_typos_checker
@@ -281,14 +282,14 @@ check_typos: install_typos_checker
 .PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
 clippy_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=boolean,shortint,integer,internal-keycache,gpu \
+		--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
 		--all-targets \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: check_gpu # Run check on tfhe with "gpu" enabled
 check_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
-		--features=boolean,shortint,integer,internal-keycache,gpu \
+		--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
 		--all-targets \
 		-p $(TFHE_SPEC)

@@ -393,10 +394,10 @@ clippy_trivium: install_rs_check_toolchain
 .PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
 clippy_all_targets: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
+		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
+		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats,experimental \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
@@ -416,10 +417,15 @@ clippy_versionable: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		-p tfhe-versionable -- --no-deps -D warnings

+.PHONY: clippy_tfhe_lints # Run clippy lints on tfhe-lints
+clippy_tfhe_lints: install_cargo_dylint # the toolchain is selected with toolchain.toml
+	cd utils/tfhe-lints && \
+	cargo clippy --all-targets -- --no-deps -D warnings
+
 .PHONY: clippy_all # Run all clippy targets
 clippy_all: clippy_rustdoc clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets \
 clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_tfhe_csprng clippy_zk_pok clippy_trivium \
-clippy_versionable
+clippy_versionable clippy_tfhe_lints

 .PHONY: clippy_fast # Run main clippy targets
 clippy_fast: clippy_rustdoc clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks \
@@ -439,9 +445,9 @@ check_rust_bindings_did_not_change:


 .PHONY: tfhe_lints # Run custom tfhe-rs lints
-tfhe_lints: install_tfhe_lints
-	cd tfhe && RUSTFLAGS="$(RUSTFLAGS)" cargo tfhe-lints \
-		--features=boolean,shortint,integer,zk-pok -- -D warnings
+tfhe_lints: install_cargo_dylint
+	RUSTFLAGS="$(RUSTFLAGS)" cargo dylint --all -p tfhe --no-deps -- \
+		--features=boolean,shortint,integer,strings,zk-pok

 .PHONY: build_core # Build core_crypto without experimental features
 build_core: install_rs_build_toolchain install_rs_check_toolchain
@@ -515,11 +521,11 @@ build_web_js_api: install_rs_build_toolchain install_wasm_pack
 build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
 	cd tfhe && \
 	rustup component add rust-src --toolchain $(RS_CHECK_TOOLCHAIN) && \
-	RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory,+mutable-globals" rustup run $(RS_CHECK_TOOLCHAIN) \
+	RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory" rustup run $(RS_CHECK_TOOLCHAIN) \
 		wasm-pack build --release --target=web \
 		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api,zk-pok \
 		-Z build-std=panic_abort,std && \
-	find pkg/snippets -type f -iname workerHelpers.worker.js -exec sed -i "s|from '..\/..\/..\/';|from '..\/..\/..\/tfhe.js';|" {} \;
+	find pkg/snippets -type f -iname workerHelpers.js -exec sed -i "s|const pkg = await import('..\/..\/..');|const pkg = await import('..\/..\/..\/tfhe.js');|" {} \;
 	jq '.files += ["snippets"]' tfhe/pkg/package.json > tmp_pkg.json && mv -f tmp_pkg.json tfhe/pkg/package.json

 .PHONY: build_node_js_api # Build the js API targeting nodejs
@@ -824,7 +830,7 @@ test_strings: install_rs_build_toolchain
 .PHONY: test_user_doc # Run tests from the .md documentation
 test_user_doc: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
+		--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok,strings \
 		-p $(TFHE_SPEC) \
 		-- test_user_docs::

@@ -887,16 +893,21 @@ test_versionable: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
 		--all-targets -p tfhe-versionable

+.PHONY: test_tfhe_lints # Run test on tfhe-lints
+test_tfhe_lints: install_cargo_dylint
+	cd utils/tfhe-lints && \
+	cargo test
+
 # The backward compat data repo holds historical binary data but also rust code to generate and load them.
 # Here we use the "patch" functionality of Cargo to make sure the repo used for the data is the same as the one used for the code.
 .PHONY: test_backward_compatibility_ci
 test_backward_compatibility_ci: install_rs_build_toolchain
 	TFHE_BACKWARD_COMPAT_DATA_DIR="$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tfhe/$(BACKWARD_COMPAT_DATA_DIR)\"" \
-		--features=shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
+		--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tests/$(BACKWARD_COMPAT_DATA_DIR)\"" \
+		--features=shortint,integer,zk-pok -p tests test_backward_compatibility -- --nocapture

 .PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
-test_backward_compatibility: tfhe/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
+test_backward_compatibility: tests/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci

 .PHONY: backward_compat_branch # Prints the required backward compatibility branch
 backward_compat_branch:
@@ -1045,35 +1056,35 @@ bench_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --

 .PHONY: bench_signed_integer # Run benchmarks for signed integer
 bench_signed_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-signed-bench \
-	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --

 .PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
 bench_integer_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --

 .PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
 bench_integer_compression: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench	glwe_packing_compression-integer-bench \
-	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --

 .PHONY: bench_integer_compression_gpu
 bench_integer_compression_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench	glwe_packing_compression-integer-bench \
-	--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,gpu,pbs-stats -p $(TFHE_SPEC) --

 .PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
 bench_integer_multi_bit: install_rs_check_toolchain
@@ -1081,7 +1092,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --

 .PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
 bench_signed_integer_multi_bit: install_rs_check_toolchain
@@ -1089,7 +1100,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-signed-bench \
-	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --

 .PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
 bench_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1097,7 +1108,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --

 .PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
 bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1105,14 +1116,14 @@ bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
+	--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) -- ::unsigned

 .PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
 bench_integer_zk: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench zk-pke-bench \
-	--features=integer,internal-keycache,zk-pok,nightly-avx512 \
+	--features=integer,internal-keycache,zk-pok,nightly-avx512,pbs-stats \
 	-p $(TFHE_SPEC) --

 .PHONY: bench_shortint # Run benchmarks for shortint
@@ -1275,9 +1286,9 @@ write_params_to_file: install_rs_check_toolchain

 .PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
 clone_backward_compat_data:
-	./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tfhe/$(BACKWARD_COMPAT_DATA_DIR)
+	./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tests/$(BACKWARD_COMPAT_DATA_DIR)

-tfhe/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
+tests/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data

 #
 # Real use case examples
@@ -1303,9 +1314,7 @@ sha256_bool: install_rs_check_toolchain

 .PHONY: pcc # pcc stands for pre commit checks (except GPU)
 pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
-clippy_all check_compile_tests
-# TFHE lints deactivated as it's incompatible with 1.83 - temporary
-# tfhe_lints
+clippy_all check_compile_tests test_tfhe_lints tfhe_lints

 .PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
 pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change
@@ -1315,7 +1324,7 @@ fpcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_t
 check_compile_tests

 .PHONY: conformance # Automatically fix problems that can be fixed
-conformance: fmt fmt_js
+conformance: fix_newline fmt fmt_js

 #=============================== FFT Section ==================================
 .PHONY: doc_fft # Build rust doc for tfhe-fft
--- a/apps/trivium/README.md
+++ b/apps/trivium/README.md
@@ -95,7 +95,7 @@ fn main() {
 			val >>= 1;
 		}
 	}
-	
+
 	let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();

 	let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
@@ -129,24 +129,36 @@ Other sizes than 64 bit are expected to be available in the future.

 # FHE shortint Trivium implementation

-The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `PARAM_MESSAGE_1_CARRY_1_KS_PBS`). It uses a lower level API 
-of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run on the same 
-cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes 
+The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64`).
+It uses a lower level API of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run
+on the same cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes
 its setup a little more intricate.

 Example code:
 ```rust
 use tfhe::shortint::prelude::*;
-use tfhe::shortint::CastingKey;
+use tfhe::shortint::parameters::{
+    V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
+    V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
+};
 use tfhe::{ConfigBuilder, generate_keys, FheUint64};
 use tfhe::prelude::*;
 use tfhe_trivium::TriviumStreamShortint;

 fn test_shortint() {
-	let config = ConfigBuilder::default().build();
+	let config = ConfigBuilder::default()
+        .use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
+        .build();
 	let (hl_client_key, hl_server_key) = generate_keys(config);
-	let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
-	let ksk = CastingKey::new((&client_key, &server_key), (&hl_client_key, &hl_server_key));
+    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
+    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
+
+	let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
+    let ksk = KeySwitchingKey::new(
+        (&client_key, Some(&server_key)),
+        (&underlying_ck, &underlying_sk),
+        V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+    );

 	let key_string = "0053A6F94C9FF24598EB".to_string();
 	let mut key = [0; 80];
--- a/apps/trivium/benches/kreyvium_shortint.rs
+++ b/apps/trivium/benches/kreyvium_shortint.rs
@@ -1,12 +1,17 @@
 use criterion::Criterion;
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
+use tfhe::shortint::parameters::{
+    V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
+    V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
+};
 use tfhe::shortint::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
 use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};

 pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
@@ -57,7 +62,9 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
 }

 pub fn kreyvium_shortint_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
@@ -103,7 +110,9 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
 }

 pub fn kreyvium_shortint_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
--- a/apps/trivium/benches/trivium_shortint.rs
+++ b/apps/trivium/benches/trivium_shortint.rs
@@ -1,12 +1,17 @@
 use criterion::Criterion;
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
+use tfhe::shortint::parameters::{
+    V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
+    V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
+};
 use tfhe::shortint::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
 use tfhe_trivium::{TransCiphering, TriviumStreamShortint};

 pub fn trivium_shortint_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
@@ -57,7 +62,9 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
 }

 pub fn trivium_shortint_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
@@ -103,7 +110,9 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
 }

 pub fn trivium_shortint_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
--- a/apps/trivium/src/kreyvium/test.rs
+++ b/apps/trivium/src/kreyvium/test.rs
@@ -1,6 +1,9 @@
 use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
+use tfhe::shortint::parameters::{
+    V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
+    V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
+};
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
 // Values for these tests come from the github repo renaud1239/Kreyvium,
 // commit fd6828f68711276c25f55e605935028f5e843f43
@@ -216,7 +219,9 @@ use tfhe::shortint::prelude::*;

 #[test]
 fn kreyvium_test_shortint_long() {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
--- a/apps/trivium/src/trivium/test.rs
+++ b/apps/trivium/src/trivium/test.rs
@@ -1,6 +1,9 @@
 use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
+use tfhe::shortint::parameters::{
+    V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
+    V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
+};
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
 // Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
 // file testvectors/trivium-80.80.test-vectors
@@ -352,7 +355,9 @@ use tfhe::shortint::prelude::*;

 #[test]
 fn trivium_test_shortint_long() {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
--- a/backends/tfhe-cuda-backend/Cargo.toml
+++ b/backends/tfhe-cuda-backend/Cargo.toml
@@ -14,4 +14,4 @@ keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
 [build-dependencies]
 cmake = { version = "0.1" }
 pkg-config = { version = "0.3" }
-bindgen = "0.70.1"
+bindgen = "0.71"
--- a/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
+++ b/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
@@ -5,45 +5,50 @@

 extern "C" {

-void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
-                                          void *lwe_array_out,
-                                          void const *lwe_array_in,
-                                          uint32_t input_lwe_dimension,
-                                          uint32_t input_lwe_ciphertext_count);
-void cuda_negate_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
-                                          void *lwe_array_out,
-                                          void const *lwe_array_in,
-                                          uint32_t input_lwe_dimension,
-                                          uint32_t input_lwe_ciphertext_count);
-void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
-                                       void *lwe_array_out,
-                                       void const *lwe_array_in_1,
-                                       void const *lwe_array_in_2,
-                                       uint32_t input_lwe_dimension,
-                                       uint32_t input_lwe_ciphertext_count);
-void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
-                                       void *lwe_array_out,
-                                       void const *lwe_array_in_1,
-                                       void const *lwe_array_in_2,
-                                       uint32_t input_lwe_dimension,
-                                       uint32_t input_lwe_ciphertext_count);
+void cuda_negate_lwe_ciphertext_vector_32(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in, const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
+void cuda_negate_lwe_ciphertext_vector_64(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in, const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_32(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in_1, void const *lwe_array_in_2,
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_64(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in_1, void const *lwe_array_in_2,
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);

 void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *plaintext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
 void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *plaintext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
 void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *cleartext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
 void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *cleartext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_plaintext_64(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in, const uint64_t plaintext_in,
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
 }

 #endif // CUDA_LINALG_H_
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/fast_packing_keyswitch.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/fast_packing_keyswitch.cuh
@@ -26,15 +26,6 @@ template <typename Torus> uint64_t get_shared_mem_size_tgemm() {
  return BLOCK_SIZE_GEMM * THREADS_GEMM * 2 * sizeof(Torus);
 }

-__host__ inline bool can_use_pks_fast_path(uint32_t lwe_dimension,
-                                           uint32_t num_lwe,
-                                           uint32_t polynomial_size,
-                                           uint32_t level_count,
-                                           uint32_t glwe_dimension) {
-  // TODO: activate it back, fix tests and extend to level_count > 1
-  return false;
-}
-
 // Initialize decomposition by performing rounding
 // and decomposing one level of an array of Torus LWEs. Only
 // decomposes the mask elements of the incoming LWEs.
@@ -57,6 +48,8 @@ __global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,
  // is lwe_dimension + 1, while for writing it is lwe_dimension
  auto read_val_idx = lwe_idx * (lwe_dimension + 1) + lwe_sample_idx;
  auto write_val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
+  auto write_state_idx =
+      num_lwe * lwe_dimension + lwe_idx * lwe_dimension + lwe_sample_idx;

  Torus a_i = lwe_in[read_val_idx];

@@ -64,6 +57,8 @@ __global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,

  Torus mod_b_mask = (1ll << base_log) - 1ll;
  lwe_out[write_val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
+  synchronize_threads_in_block();
+  lwe_out[write_state_idx] = state;
 }

 // Continue decomposiion of an array of Torus elements in place. Supposes
@@ -84,12 +79,16 @@ decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
    return;

  auto val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
+  auto state_idx = num_lwe * lwe_dimension + val_idx;

-  Torus state = buffer_in[val_idx];
+  Torus state = buffer_in[state_idx];
+  synchronize_threads_in_block();

  Torus mod_b_mask = (1ll << base_log) - 1ll;

  buffer_in[val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
+  synchronize_threads_in_block();
+  buffer_in[state_idx] = state;
 }

 // Multiply matrices A, B of size (M, K), (K, N) respectively
@@ -99,6 +98,10 @@ decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
 // BLOCK_SIZE_GEMM) splitting them in multiple tiles: (BLOCK_SIZE_GEMM,
 // THREADS_GEMM)-shaped tiles of values from A, and a (THREADS_GEMM,
 // BLOCK_SIZE_GEMM)-shaped tiles of values from B.
+//
+// This code is adapted by generalizing the 1d block-tiling
+// kernel from https://github.com/siboehm/SGEMM_CUDA
+// to any matrix dimension
 template <typename Torus, typename TorusVec>
 __global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
                      int stride_B, Torus *C) {
@@ -111,7 +114,6 @@ __global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
  const uint cRow = blockIdx.y;
  const uint cCol = blockIdx.x;

-  const uint totalResultsBlocktile = BM * BN;
  const int threadCol = threadIdx.x % BN;
  const int threadRow = threadIdx.x / BN;

@@ -152,7 +154,7 @@ __global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
    } else {
      Bs[innerRowB * BN + innerColB] = 0;
    }
-    __syncthreads();
+    synchronize_threads_in_block();

    // Advance blocktile for the next iteration of this loop
    A += BK;
@@ -168,7 +170,7 @@ __global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
            As[(threadRow * TM + resIdx) * BK + dotIdx] * tmp;
      }
    }
-    __syncthreads();
+    synchronize_threads_in_block();
  }

  // Initialize the pointer to the output block of size (BLOCK_SIZE_GEMM,
@@ -259,10 +261,6 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(

  // Optimization of packing keyswitch when packing many LWEs

-  if (level_count > 1) {
-    PANIC("Fast path PKS only supports level_count==1");
-  }
-
  cudaSetDevice(gpu_index);
  check_cuda_error(cudaGetLastError());

@@ -273,10 +271,11 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
  // buffer and the keyswitched GLWEs in the second half of the buffer. Thus the
  // scratch buffer for the fast path must determine the half-size of the
  // scratch buffer as the max between the size of the GLWE and the size of the
-  // LWE-mask
-  int memory_unit = glwe_accumulator_size > lwe_dimension
+  // LWE-mask times two (to keep both decomposition state and decomposed
+  // intermediate value)
+  int memory_unit = glwe_accumulator_size > lwe_dimension * 2
                        ? glwe_accumulator_size
-                        : lwe_dimension;
+                        : lwe_dimension * 2;

  // ping pong the buffer between successive calls
  // split the buffer in two parts of this size
@@ -309,7 +308,7 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
                 CEIL_DIV(num_lwes, BLOCK_SIZE_GEMM));
  dim3 threads_gemm(BLOCK_SIZE_GEMM * THREADS_GEMM);

-  auto stride_KSK_buffer = glwe_accumulator_size;
+  auto stride_KSK_buffer = glwe_accumulator_size * level_count;

  uint32_t shared_mem_size = get_shared_mem_size_tgemm<Torus>();
  tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
@@ -317,21 +316,20 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
      stride_KSK_buffer, d_mem_1);
  check_cuda_error(cudaGetLastError());

-  /*
-    TODO: transpose key to generalize to level_count > 1
+  auto ksk_block_size = glwe_accumulator_size;

-    for (int li = 1; li < level_count; ++li) {
-      decompose_vectorize_step_inplace<Torus, TorusVec>
-          <<<grid_decomp, threads_decomp, 0, stream>>>(
-              d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
-      check_cuda_error(cudaGetLastError());
+  for (int li = 1; li < level_count; ++li) {
+    decompose_vectorize_step_inplace<Torus, TorusVec>
+        <<<grid_decomp, threads_decomp, 0, stream>>>(
+            d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
+    check_cuda_error(cudaGetLastError());

-      tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size,
-    stream>>>( num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
-          fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
-      check_cuda_error(cudaGetLastError());
-    }
-  */
+    tgemm<Torus, TorusVec>
+        <<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
+            num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
+            fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
+    check_cuda_error(cudaGetLastError());
+  }

  // should we include the mask in the rotation ??
  dim3 grid_rotate(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
@@ -73,24 +73,13 @@ void cuda_packing_keyswitch_lwe_list_to_glwe_64(
    uint32_t output_polynomial_size, uint32_t base_log, uint32_t level_count,
    uint32_t num_lwes) {

-  if (can_use_pks_fast_path(input_lwe_dimension, num_lwes,
-                            output_polynomial_size, level_count,
-                            output_glwe_dimension)) {
-    host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
-        static_cast<cudaStream_t>(stream), gpu_index,
-        static_cast<uint64_t *>(glwe_array_out),
-        static_cast<const uint64_t *>(lwe_array_in),
-        static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
-        input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
-        base_log, level_count, num_lwes);
-  } else
-    host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
-        static_cast<cudaStream_t>(stream), gpu_index,
-        static_cast<uint64_t *>(glwe_array_out),
-        static_cast<const uint64_t *>(lwe_array_in),
-        static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
-        input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
-        base_log, level_count, num_lwes);
+  host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
+      static_cast<cudaStream_t>(stream), gpu_index,
+      static_cast<uint64_t *>(glwe_array_out),
+      static_cast<const uint64_t *>(lwe_array_in),
+      static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
+      input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
+      base_log, level_count, num_lwes);
 }

 void cleanup_packing_keyswitch_lwe_list_to_glwe(void *stream,
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
@@ -164,9 +164,11 @@ __host__ void scratch_packing_keyswitch_lwe_list_to_glwe(

  int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;

-  int memory_unit = glwe_accumulator_size > lwe_dimension
+  // allocate at least LWE-mask times two: to keep both decomposition state and
+  // decomposed intermediate value
+  int memory_unit = glwe_accumulator_size > lwe_dimension * 2
                        ? glwe_accumulator_size
-                        : lwe_dimension;
+                        : lwe_dimension * 2;

  if (allocate_gpu_memory) {
    *fp_ks_buffer = (int8_t *)cuda_malloc_async(
@@ -221,44 +223,6 @@ __device__ void packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext(
  }
 }

-// public functional packing keyswitch for a batch of LWE ciphertexts
-//
-// Selects the input each thread is working on using the y-block index.
-//
-// Assumes there are (glwe_dimension+1) * polynomial_size threads split through
-// different thread blocks at the x-axis to work on that input.
-template <typename Torus>
-__global__ void packing_keyswitch_lwe_list_to_glwe(
-    Torus *glwe_array_out, Torus const *lwe_array_in, Torus const *fp_ksk,
-    uint32_t lwe_dimension_in, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
-    Torus *d_mem) {
-  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
-
-  const int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
-  const int lwe_size = (lwe_dimension_in + 1);
-
-  const int input_id = blockIdx.y;
-  const int degree = input_id;
-
-  // Select an input
-  auto lwe_in = lwe_array_in + input_id * lwe_size;
-  auto ks_glwe_out = d_mem + input_id * glwe_accumulator_size;
-  auto glwe_out = glwe_array_out + input_id * glwe_accumulator_size;
-
-  // KS LWE to GLWE
-  packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext<Torus>(
-      ks_glwe_out, lwe_in, fp_ksk, lwe_dimension_in, glwe_dimension,
-      polynomial_size, base_log, level_count);
-
-  // P * x ^degree
-  auto in_poly = ks_glwe_out + (tid / polynomial_size) * polynomial_size;
-  auto out_result = glwe_out + (tid / polynomial_size) * polynomial_size;
-  polynomial_accumulate_monic_monomial_mul<Torus>(out_result, in_poly, degree,
-                                                  tid % polynomial_size,
-                                                  polynomial_size, 1, true);
-}
-
 /// To-do: Rewrite this kernel for efficiency
 template <typename Torus>
 __global__ void accumulate_glwes(Torus *glwe_out, Torus *glwe_array_in,
@@ -276,52 +240,4 @@ __global__ void accumulate_glwes(Torus *glwe_out, Torus *glwe_array_in,
  }
 }

-template <typename Torus>
-__host__ void host_packing_keyswitch_lwe_list_to_glwe(
-    cudaStream_t stream, uint32_t gpu_index, Torus *glwe_out,
-    Torus const *lwe_array_in, Torus const *fp_ksk_array, int8_t *fp_ks_buffer,
-    uint32_t lwe_dimension_in, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
-    uint32_t num_lwes) {
-
-  if (num_lwes > polynomial_size)
-    PANIC("Cuda error: too many LWEs to pack. The number of LWEs should be "
-          "smaller than "
-          "polynomial_size.")
-
-  cudaSetDevice(gpu_index);
-  int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
-
-  int num_blocks = 0, num_threads = 0;
-  getNumBlocksAndThreads(glwe_accumulator_size, 128, num_blocks, num_threads);
-
-  dim3 grid(num_blocks, num_lwes);
-  dim3 threads(num_threads);
-
-  // The fast path of PKS uses the scratch buffer (d_mem) differently:
-  // it needs to store the decomposed masks in the first half of this buffer
-  // and the keyswitched GLWEs in the second half of the buffer. Thus the
-  // scratch buffer for the fast path must determine the half-size of the
-  // scratch buffer as the max between the size of the GLWE and the size of the
-  // LWE-mask
-  int memory_unit = glwe_accumulator_size > lwe_dimension_in
-                        ? glwe_accumulator_size
-                        : lwe_dimension_in;
-
-  auto d_mem = (Torus *)fp_ks_buffer;
-  auto d_tmp_glwe_array_out = d_mem + num_lwes * memory_unit;
-
-  // individually keyswitch each lwe
-  packing_keyswitch_lwe_list_to_glwe<Torus><<<grid, threads, 0, stream>>>(
-      d_tmp_glwe_array_out, lwe_array_in, fp_ksk_array, lwe_dimension_in,
-      glwe_dimension, polynomial_size, base_log, level_count, d_mem);
-  check_cuda_error(cudaGetLastError());
-
-  // accumulate to a single glwe
-  accumulate_glwes<Torus><<<num_blocks, threads, 0, stream>>>(
-      glwe_out, d_tmp_glwe_array_out, glwe_dimension, polynomial_size,
-      num_lwes);
-  check_cuda_error(cudaGetLastError());
-}
-
 #endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh
@@ -117,21 +117,11 @@ host_integer_compress(cudaStream_t const *streams, uint32_t const *gpu_indexes,
  while (rem_lwes > 0) {
    auto chunk_size = min(rem_lwes, mem_ptr->lwe_per_glwe);

-    if (can_use_pks_fast_path(
-            input_lwe_dimension, chunk_size, compression_params.polynomial_size,
-            compression_params.ks_level, compression_params.glwe_dimension)) {
-      host_fast_packing_keyswitch_lwe_list_to_glwe<Torus, ulonglong4>(
-          streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
-          fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
-          compression_params.polynomial_size, compression_params.ks_base_log,
-          compression_params.ks_level, chunk_size);
-    } else {
-      host_packing_keyswitch_lwe_list_to_glwe<Torus>(
-          streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
-          fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
-          compression_params.polynomial_size, compression_params.ks_base_log,
-          compression_params.ks_level, chunk_size);
-    }
+    host_fast_packing_keyswitch_lwe_list_to_glwe<Torus, ulonglong4>(
+        streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
+        fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
+        compression_params.polynomial_size, compression_params.ks_base_log,
+        compression_params.ks_level, chunk_size);

    rem_lwes -= chunk_size;
    lwe_subset += chunk_size * lwe_in_size;
--- a/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cu
@@ -4,12 +4,11 @@
 * Perform the addition of two u32 input LWE ciphertext vectors.
 * See the equivalent operation on u64 ciphertexts for more details.
 */
-void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
-                                       void *lwe_array_out,
-                                       void const *lwe_array_in_1,
-                                       void const *lwe_array_in_2,
-                                       uint32_t input_lwe_dimension,
-                                       uint32_t input_lwe_ciphertext_count) {
+void cuda_add_lwe_ciphertext_vector_32(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in_1, void const *lwe_array_in_2,
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count) {

  host_addition<uint32_t>(static_cast<cudaStream_t>(stream), gpu_index,
                          static_cast<uint32_t *>(lwe_array_out),
@@ -44,12 +43,11 @@ void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
 * vectors are left unchanged. This function is a wrapper to a device function
 * that performs the operation on the GPU.
 */
-void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
-                                       void *lwe_array_out,
-                                       void const *lwe_array_in_1,
-                                       void const *lwe_array_in_2,
-                                       uint32_t input_lwe_dimension,
-                                       uint32_t input_lwe_ciphertext_count) {
+void cuda_add_lwe_ciphertext_vector_64(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in_1, void const *lwe_array_in_2,
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count) {

  host_addition<uint64_t>(static_cast<cudaStream_t>(stream), gpu_index,
                          static_cast<uint64_t *>(lwe_array_out),
@@ -65,7 +63,8 @@ void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
 void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *plaintext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count) {

  host_addition_plaintext<uint32_t>(
      static_cast<cudaStream_t>(stream), gpu_index,
@@ -105,7 +104,8 @@ void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
 void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *plaintext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count) {

  host_addition_plaintext<uint64_t>(
      static_cast<cudaStream_t>(stream), gpu_index,
@@ -114,3 +114,41 @@ void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
      static_cast<const uint64_t *>(plaintext_array_in), input_lwe_dimension,
      input_lwe_ciphertext_count);
 }
+
+/*
+ * Perform the addition of a u64 input LWE ciphertext vector with a u64 input
+ * plaintext scalar.
+ * - `stream` is a void pointer to the Cuda stream to be used in the kernel
+ * launch
+ * - `gpu_index` is the index of the GPU to be used in the kernel launch
+ * - `lwe_array_out` is an array of size
+ * `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have
+ * been allocated on the GPU before calling this function, and that will hold
+ * the result of the computation.
+ * - `lwe_array_in` is the LWE ciphertext vector used as input, it should have
+ * been allocated and initialized before calling this function. It has the same
+ * size as the output array.
+ * - `plaintext_in` is the plaintext used as input.
+ * - `input_lwe_dimension` is the number of mask elements in the input and
+ * output LWE ciphertext vectors
+ * - `input_lwe_ciphertext_count` is the number of ciphertexts contained in the
+ * input LWE ciphertext vector, as well as in the output.
+ *
+ * The same input plaintext is added to the body of the
+ * LWE ciphertexts in the LWE ciphertext vector. The result of the
+ * operation is stored in the output LWE ciphertext vector. The two input
+ * vectors are unchanged. This function is a wrapper to a device function that
+ * performs the operation on the GPU.
+ */
+void cuda_add_lwe_ciphertext_vector_plaintext_64(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in, const uint64_t plaintext_in,
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count) {
+
+  host_addition_plaintext_scalar<uint64_t>(
+      static_cast<cudaStream_t>(stream), gpu_index,
+      static_cast<uint64_t *>(lwe_array_out),
+      static_cast<const uint64_t *>(lwe_array_in), plaintext_in,
+      input_lwe_dimension, input_lwe_ciphertext_count);
+}
--- a/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cuh
@@ -13,9 +13,10 @@
 #include <stdio.h>

 template <typename T>
-__global__ void
-plaintext_addition(T *output, T const *lwe_input, T const *plaintext_input,
-                   uint32_t input_lwe_dimension, uint32_t num_entries) {
+__global__ void plaintext_addition(T *output, T const *lwe_input,
+                                   T const *plaintext_input,
+                                   const uint32_t input_lwe_dimension,
+                                   const uint32_t num_entries) {

  int tid = threadIdx.x;
  int plaintext_index = blockIdx.x * blockDim.x + tid;
@@ -28,10 +29,26 @@ plaintext_addition(T *output, T const *lwe_input, T const *plaintext_input,
 }

 template <typename T>
-__host__ void
-host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index, T *output,
-                        T const *lwe_input, T const *plaintext_input,
-                        uint32_t lwe_dimension, uint32_t lwe_ciphertext_count) {
+__global__ void plaintext_addition_scalar(T *output, T const *lwe_input,
+                                          const T plaintext_input,
+                                          const uint32_t input_lwe_dimension,
+                                          const uint32_t num_entries) {
+
+  int tid = threadIdx.x;
+  int lwe_index = blockIdx.x * blockDim.x + tid;
+  if (lwe_index < num_entries) {
+    int index = lwe_index * (input_lwe_dimension + 1) + input_lwe_dimension;
+    // Here we take advantage of the wrapping behaviour of uint
+    output[index] = lwe_input[index] + plaintext_input;
+  }
+}
+
+template <typename T>
+__host__ void host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index,
+                                      T *output, T const *lwe_input,
+                                      T const *plaintext_input,
+                                      const uint32_t lwe_dimension,
+                                      const uint32_t lwe_ciphertext_count) {

  cudaSetDevice(gpu_index);
  int num_blocks = 0, num_threads = 0;
@@ -48,6 +65,27 @@ host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index, T *output,
  check_cuda_error(cudaGetLastError());
 }

+template <typename T>
+__host__ void host_addition_plaintext_scalar(
+    cudaStream_t stream, uint32_t gpu_index, T *output, T const *lwe_input,
+    const T plaintext_input, const uint32_t lwe_dimension,
+    const uint32_t lwe_ciphertext_count) {
+
+  cudaSetDevice(gpu_index);
+  int num_blocks = 0, num_threads = 0;
+  int num_entries = lwe_ciphertext_count;
+  getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
+  dim3 grid(num_blocks, 1, 1);
+  dim3 thds(num_threads, 1, 1);
+
+  cuda_memcpy_async_gpu_to_gpu(
+      output, lwe_input, (lwe_dimension + 1) * lwe_ciphertext_count * sizeof(T),
+      stream, gpu_index);
+  plaintext_addition_scalar<T><<<grid, thds, 0, stream>>>(
+      output, lwe_input, plaintext_input, lwe_dimension, num_entries);
+  check_cuda_error(cudaGetLastError());
+}
+
 template <typename T>
 __global__ void addition(T *output, T const *input_1, T const *input_2,
                         uint32_t num_entries) {
@@ -64,8 +102,8 @@ __global__ void addition(T *output, T const *input_1, T const *input_2,
 template <typename T>
 __host__ void host_addition(cudaStream_t stream, uint32_t gpu_index, T *output,
                            T const *input_1, T const *input_2,
-                            uint32_t input_lwe_dimension,
-                            uint32_t input_lwe_ciphertext_count) {
+                            const uint32_t input_lwe_dimension,
+                            const uint32_t input_lwe_ciphertext_count) {

  cudaSetDevice(gpu_index);
  // lwe_size includes the presence of the body
--- a/backends/tfhe-cuda-backend/cuda/src/linearalgebra/multiplication.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/linearalgebra/multiplication.cu
@@ -7,7 +7,8 @@
 void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *cleartext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count) {

  host_cleartext_vec_multiplication<uint32_t>(
      static_cast<cudaStream_t>(stream), gpu_index,
@@ -47,7 +48,8 @@ void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
 void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *cleartext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count) {

  host_cleartext_vec_multiplication<uint64_t>(
      static_cast<cudaStream_t>(stream), gpu_index,
--- a/backends/tfhe-cuda-backend/cuda/src/linearalgebra/multiplication.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/linearalgebra/multiplication.cuh
@@ -16,8 +16,8 @@
 template <typename T>
 __global__ void cleartext_vec_multiplication(T *output, T const *lwe_input,
                                             T const *cleartext_input,
-                                             uint32_t input_lwe_dimension,
-                                             uint32_t num_entries) {
+                                             const uint32_t input_lwe_dimension,
+                                             const uint32_t num_entries) {

  int tid = threadIdx.x;
  int index = blockIdx.x * blockDim.x + tid;
@@ -31,8 +31,8 @@ __global__ void cleartext_vec_multiplication(T *output, T const *lwe_input,
 template <typename T>
 __host__ void host_cleartext_vec_multiplication(
    cudaStream_t stream, uint32_t gpu_index, T *output, T const *lwe_input,
-    T const *cleartext_input, uint32_t input_lwe_dimension,
-    uint32_t input_lwe_ciphertext_count) {
+    T const *cleartext_input, const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count) {

  cudaSetDevice(gpu_index);
  // lwe_size includes the presence of the body
--- a/backends/tfhe-cuda-backend/cuda/src/linearalgebra/negation.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/linearalgebra/negation.cu
@@ -4,11 +4,10 @@
 * Perform the negation of a u32 input LWE ciphertext vector.
 * See the equivalent operation on u64 ciphertexts for more details.
 */
-void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
-                                          void *lwe_array_out,
-                                          void const *lwe_array_in,
-                                          uint32_t input_lwe_dimension,
-                                          uint32_t input_lwe_ciphertext_count) {
+void cuda_negate_lwe_ciphertext_vector_32(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in, const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count) {

  host_negation<uint32_t>(static_cast<cudaStream_t>(stream), gpu_index,
                          static_cast<uint32_t *>(lwe_array_out),
@@ -38,11 +37,10 @@ void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
 * LWE ciphertext vector is left unchanged. This function is a wrapper to a
 * device function that performs the operation on the GPU.
 */
-void cuda_negate_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
-                                          void *lwe_array_out,
-                                          void const *lwe_array_in,
-                                          uint32_t input_lwe_dimension,
-                                          uint32_t input_lwe_ciphertext_count) {
+void cuda_negate_lwe_ciphertext_vector_64(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in, const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count) {

  host_negation<uint64_t>(static_cast<cudaStream_t>(stream), gpu_index,
                          static_cast<uint64_t *>(lwe_array_out),
--- a/backends/tfhe-cuda-backend/cuda/src/linearalgebra/negation.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/linearalgebra/negation.cuh
@@ -23,8 +23,8 @@ __global__ void negation(T *output, T const *input, uint32_t num_entries) {

 template <typename T>
 __host__ void host_negation(cudaStream_t stream, uint32_t gpu_index, T *output,
-                            T const *input, uint32_t input_lwe_dimension,
-                            uint32_t input_lwe_ciphertext_count) {
+                            T const *input, const uint32_t input_lwe_dimension,
+                            const uint32_t input_lwe_ciphertext_count) {

  cudaSetDevice(gpu_index);
  // lwe_size includes the presence of the body
--- a/backends/tfhe-cuda-backend/src/bindings.rs
+++ b/backends/tfhe-cuda-backend/src/bindings.rs
@@ -1345,6 +1345,17 @@ extern "C" {
        input_lwe_ciphertext_count: u32,
    );
 }
+extern "C" {
+    pub fn cuda_add_lwe_ciphertext_vector_plaintext_64(
+        stream: *mut ffi::c_void,
+        gpu_index: u32,
+        lwe_array_out: *mut ffi::c_void,
+        lwe_array_in: *const ffi::c_void,
+        plaintext_in: u64,
+        input_lwe_dimension: u32,
+        input_lwe_ciphertext_count: u32,
+    );
+}
 extern "C" {
    pub fn cuda_fourier_polynomial_mul(
        stream: *mut ffi::c_void,
--- a/backends/tfhe-hpu-backend/.gitattributes
+++ b/backends/tfhe-hpu-backend/.gitattributes
@@ -1 +0,0 @@
-*.xclbin filter=lfs diff=lfs merge=lfs -text
--- a/backends/tfhe-hpu-backend/.gitignore
+++ b/backends/tfhe-hpu-backend/.gitignore
@@ -1,2 +0,0 @@
-ngt_*
-config
--- a/backends/tfhe-hpu-backend/Cargo.toml
+++ b/backends/tfhe-hpu-backend/Cargo.toml
@@ -1,75 +0,0 @@
-[package]
-name = "tfhe-hpu-backend"
-version = "0.1.0"
-edition = "2021"
-authors = ["Zama Hardware team"]
-readme = "README.md"
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[features]
-default = []
-hw-xrt = []
-io-dump = ["num-traits"]
-utils = ["clap", "clap-num"]
-
-[build-dependencies]
-cxx-build = "1.0"
-
-[dev-dependencies]
-serial_test = "3.1.1"
-
-[dependencies]
-cxx = "1.0"
-hw_regmap = {git = "ssh://git@github.com/zama-ai/hw_regmap.git", branch="main"}
-
-strum = { version = "0.26.2", features = ["derive"] }
-strum_macros = "0.26.2"
-enum_dispatch = "0.3.13"
-tracing = "0.1.40"
-tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
-serde = { version = "1", features = ["derive"]}
-toml = {version = "*", features = []}
-paste = "1.0.15"
-thiserror = "1.0.61"
-bytemuck = "1.16.0"
-anyhow = "1.0.82"
-deku = "0.16.0"
-lazy_static = "1.4.0"
-rand = "0.8.5"
-regex = "1.10.4"
-bitflags = "2.5.0"
-itertools = "0.11.0"
-lru = "0.12.3"
-
-# Dependencies used for Sim feature
-ipc-channel = "0.18.3"
-# ron = "*"
-# getset = "*"
-# indexmap = "*"
-
-# Dependencies used for debug feature
-num-traits = { version = "*", optional=true}
-clap = { version = "4.4.4", features = ["derive"], optional=true}
-clap-num = {version = "1.1.1", optional=true}
-
-
-# Binary for manual debugging
-# Enable to access Hpu register and drive some custom sequence by hand
-[[bin]]
-name = "hputil"
-path = "src/utils/hputil.rs"
-required-features = ["utils"]
-
-# Binary for asm manipulation
-# Enable to convert back and forth between asm/hex format
-[[bin]]
-name = "fmt"
-path = "src/utils/fmt.rs"
-required-features = ["utils"]
-
-# Firmware generation
-# Enable to expand IOp in list of Dop for inspection
-[[bin]]
-name = "fw"
-path = "src/utils/fw.rs"
-required-features = ["utils"]
--- a/backends/tfhe-hpu-backend/Readme.md
+++ b/backends/tfhe-hpu-backend/Readme.md
@@ -1,188 +0,0 @@
-# Tfhe-hpu-backend
-
-## Brief
-The `tfhe-hpu-backend` holds the code for the HPU acceleration of Zama's variant of TFHE.
-It contains a `HpuDevice` abstraction that enables easy configuration and dispatching of TFHE operations on HPU accelerator.
-
-The user API exposes the following functions for hardware setup:
- `HpuDevice::new`, `HpuDevice::from_config`: Instantiate abstraction device from configuration file. 
- `HpuDevice::init`: Configure and upload the required public material.
- `new_var_from`: Create a HPU ciphertext from `tfhe-rs` ciphertext.
-
-HPU variables could also be created from `high-level-api` object, with the help of the `hw-xfer` feature.
-This implements a trait that enables `clone_on`, `mv_on` `FheUint` object on the HPU accelerator, and cast back `from` them.
-
-These objects implement `std::ops` trait and could be used to dispatch operations on HPU hardware.
-
-### Backend structure
-`tfhe-hpu-backend` is split in various modules:
- `entities`: Define structure handled by HPU accelerator. Conversion trait from/into those objects are implemented in `tfhe-rs`.
- `asm`: Describe assembly-like language for the HPU. It enables to abstract HPU behavior and easily update it through micro-code.
- `fw`: Abstraction to help the micro-code designer. Use simple rust program for describing new HPU operations. Help with register/heap management.
- `interface`:
-  + `device`: High-level structure that exposes the User API.
-  + `backend`: Inner private structure that contains HPU modules
-  + `variable`: Wrap HPU ciphertexts. It enables to hook hardware object lifetime within the `rust` borrow-checker. 
-  + `memory`: Handle on-board memory allocation and synchronization
-  + `config`: Help to configure HPU accelerator through a TOML configuration file
-  + `cmd`: Translate operation over `variable` in concrete HPU commands
-  + `regmap`: Communicate with the HPU internal register with ease.
-  + `rtl`: Define concrete `rust` structure populated from HPU's status/configuration registers
-
-
-Below an overview of the internal structure of the Backend.
-![HPU backend structure](./figures/tfhe-hpu-backend.excalidraw.png)
-
-This picture depicts the internal modules of `tfhe-hpu-backend`, Device is the main entry point for the user. Its lifecycle is as follows:
-
-1. Create HpuDevice, open link with the associated FPGA. Configure associated driver and upload the bit stream. Read FPGA registers to extract supported configuration and features. Build Firmware conversion table (IOp -> DOps stream).
-
-2. Allocate required memory chunk in the on-board memory. Upload public material required by TFHE computation.
-
-3. Create HPU variables that handle TFHE Ciphertexts. It wraps TFHE Ciphertext with required internal resources and enforces the correct lifetime management. This abstraction enforces that during variable lifecycle all required resources are valid.
-
-4. User could triggered HPU operation from HPU variable.
-  Variable abstraction enforces that required objects are correctly synced on the hardware. Then it converts each operation in a concrete HPU command.
-  When HPU operation is acknowledged by the hardware, the internal state of the associated variable is updated.
-  This mechanism enables asynchronous operation and minimal amount of Host to/from HW memory transfer.
-  This mechanism also enables offloading a computation graph to the HPU and requires a synchronization only on the final results.
-
-## Example
-### Configuration file
-HPU configuration knobs are gathered in a TOML configuration file. This file describes the targeted FPGA with it's associated configuration:
-```toml
-[fpga] # FPGA target
-  # Register layout in the FPGA
-  regmap="backends/tfhe-hpu-backend/config/hpu_regif_core.toml"
-  polling_us=10
-[fpga.ffi.Xrt] # Hardware properties
-  id= 0 # ID of the used FPGA
-  kernel= "hpu_3parts_1in3" # Name of the entry point kernel
-  xclbin="backends/tfhe-hpu-backend/config/hpu_3parts.xclbin" # Path to the FPGA bitstream file
-
-[rtl] # RTL option
-  bpip_used = true # BPIP/IPIP mode
-  bpip_timeout = 100_000 # BPIP timeout in clock `cycles`
-
-[board] # Board configuration
-  ct_bank = [4096, 0, 0, 4096] # Allocated Ciphertext in various bank
-  ct_pc = [10, 11] # HBM pc connected to Ciphertext memory
-
-  lut_bank = 256 # Number of LUT allocated
-  lut_pc = 12 # HBM pc connected to LUT table
-
-  fw_size= 65536 # Size in byte of the Firmware translation table
-  fw_pc = 1 # HBM pc used by the firmware
-
-  bsk_pc = [ 2, 3, 4, 5, 6, 7, 8, 9] # HBM pc used by the bootstrapping key
-  ksk_pc = [24,25,26,27,28,29,30,31] # HBM pc used by the keyswitching key
-
-[firmware] # Firmware properties
-  integer_w=[16] # List of supported IOP width
-  pbs_w=8 # PBS batch width used for firmware generation
-  # List of custom IOP definition files
-  custom_iop.CUST_0 = "backends/tfhe-hpu-backend/config/custom_iop/cust_0.asm"
-  ```
-
-### Device setup
-Following code snippet shows how to instantiate and configure a `HpuDevice`:
-```rust
-    // Instanciate HpuDevice --------------------------------------------------
-    let hpu_device = HpuDevice::from_config("backends/tfhe-hpu-backend/config/hpu_config.toml");
-
-    // Extract pbs_configuration from Hpu and generate top-level config
-    let pbs_params = tfhe::shortint::PBSParameters::PBS(hpu_device.params().into());
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(pbs_params)
-        .build();
-
-    // Generate Keys
-    let (cks, sks) = generate_keys(config);
-    let sks_compressed = cks.generate_compressed_server_key();
-
-    // Init cpu side server keys
-    set_server_key(sks);
-
-    // Init Hpu device with server key and firmware
-    let (integer_sks_compressed, ..) = sks_compressed.into_raw_parts();
-    tfhe::integer::hpu::init_device(&hpu_device, integer_sks_compressed);
-```
-
-### Clone CPU ciphertext on HPU
-Following code snippet shows how to convert CPU ciphertext in HPU one:
-``` rust
-    // Draw random value as input
-    let a = rand::thread_rng().gen_range(0..u8::MAX);
-
-    // Encrypt them on Cpu side
-    let a_fhe = FheUint8::encrypt(a, &cks);
-
-    // Clone a ciphertext and move them in HpuWorld
-    // NB: Data doesn't move over Pcie at this stage
-    //     Data are only arranged in Hpu ordered an copy in the host internal buffer
-    let a_hpu = a_fhe.clone_on(&hpu_device);
-```
-
-### Dispatch operation on HPU
-HPU variables implement `std::ops` trait. These functions dispatch the operation on HPU device.
-Following code snippet shows how to start operation on HPU from Hpu variables:
-
-``` rust
-  // NB: a_hpu, b_hpu are HpuFheUint created from FheUint
-  // Compute a * b on Hpu
-  // Result are stored in `axb_hpu`. Result is kept on HPU, axb_hpu is only the image of the result (i.e. No PCIe xfer at this stage)
-  let axb_hpu = a_hpu * b_hpu;
-
-  // Dispatch operation with low-level interface
-  // Enable to dispatch operation directly based on IOp name
-  // For ct x constant operations
-  let iop_imm_res = a_hpu.iop_imm(iop_name, b as usize);
-  // For ct x ct operations
-  let iop_imm_res = a_hpu.iop_ct(iop_name, b_hpu);
-```
-
-### Retrieved result in CPU world
-The exposed API enables to only synced back required value.
-This enables to offload a sub-computation graph without the cost of syncing intermediate value.
-
-Following code snippet starts two operation on HPU and shows how to synced only the required result:
-```rust
-  // NB: a_hpu, b_hpu, c_hpu are HpuFheUint created from FheUint
-  let axb_hpu = a_hpu * b_hpu;
-  let axb_c_hpu = axb_hpu ^ c_hpu;
-
-  // Retrieved result in CPU world
-  // Pay the xfer cost for last result only
-  let axb_c_hpu = FheUint8::from(axb_c_hpu);
-```
-
-## Pre-made Examples
-There are some example application already available in tfhe:
- * hpu_Xb: Benchmark application where `X` could be within [8,16,32,64]. Used to extract IOp performances
- * hpu_mixed: Showcase of mixing CPU/HPU operation with the help of HpuFheUint abstraction
- * hpu_gtv: Used with hpu_mockup to generate RTL stimulus. Multiple IOp width is backed in the same binary
-
-In order to run those applications on hardware, user must build from the project root (i.e `tfhe-rs-internal`) with `hw-xrt` and `hpu-xfer` features:
-```
-cargo build --release --features="hpu-xfer,hw-xrt" --examples
-./target/release/hpu_64b --iop MUL --iter 10
-```
-
-## Test framework
-There is also a set of test backed in tfhe-rs. One for each IOp width in [8,16,32,64].
-Those test have 3 sub-kind: 
-* `alu`: Run and check all ct x ct IOp
-* `bitwise`: Run and check all bitwise IOp
-* `cmp`: Run and check all comparison IOp
-
->NB: Like the premade examples, those test must be run from the project root.
-
-Snippets below give some example of command that could be used for testing:
-```
-# Run all sub-kind for 64b IOp
-cargo test --release --features="hw-xrt,hpu-xfer" --test hpu_64b
-
-# Run only `alu` sub-kind for 16b IOp
-cargo test --release --features="hw-xrt,hpu-xfer" --test hpu_16 -- alu
-```
-
--- a/backends/tfhe-hpu-backend/build.rs
+++ b/backends/tfhe-hpu-backend/build.rs
@@ -1,26 +0,0 @@
-fn main() {
-    if cfg!(feature = "hw-xrt") {
-        println!("cargo:rustc-link-search=/opt/xilinx/xrt/lib");
-        println!("cargo:rustc-link-lib=dylib=stdc++");
-        println!("cargo:rustc-link-lib=dl");
-        println!("cargo:rustc-link-lib=rt");
-        println!("cargo:rustc-link-lib=uuid");
-        println!("cargo:rustc-link-lib=dylib=xrt_coreutil");
-
-        cxx_build::bridge("src/ffi/xrt/mod.rs")
-            .file("src/ffi/xrt/cxx/hpu_hw.cc")
-            .file("src/ffi/xrt/cxx/mem_zone.cc")
-            .flag_if_supported("-std=c++23")
-            .include("/opt/xilinx/xrt/include") // TODO support parsing bash env instead of hard path
-            .flag("-fmessage-length=0")
-            .compile("hpu-hw-ffi");
-
-        println!("cargo:rerun-if-changed=src/ffi/xrt/mod.rs");
-        println!("cargo:rerun-if-changed=src/ffi/xrt/cxx/hpu_hw.cc");
-        println!("cargo:rerun-if-changed=src/ffi/xrt/cxx/hpu_hw.h");
-        println!("cargo:rerun-if-changed=src/ffi/xrt/cxx/mem_zone.cc");
-        println!("cargo:rerun-if-changed=src/ffi/xrt/cxx/mem_zone.h");
-    } else {
-        // Simulation ffi -> nothing to do
-    }
-}
--- a/backends/tfhe-hpu-backend/config_store/config_44b_pem2/custom_iop/cust_0.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_44b_pem2/custom_iop/cust_0.asm
@@ -1,15 +0,0 @@
-# CUST_0
-# Simple IOp to check the xfer between Hpu/Cpu
-# Construct constant in dest slot -> 249 (0xf9)
-SUB R0 R0 R0
-ADDS R0 R0 1
-ST TD.0 R0 
-SUB R1 R1 R1
-ADDS R1 R1 2
-ST TD.1 R1 
-SUB R2 R2 R2
-ADDS R2 R2 3
-ST TD.2 R2 
-SUB R3 R3 R3
-ADDS R3 R3 3
-ST TD.3 R3 
--- a/backends/tfhe-hpu-backend/config_store/config_44b_pem2/custom_iop/cust_1.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_44b_pem2/custom_iop/cust_1.asm
@@ -1,11 +0,0 @@
-# CUST_1
-# Simple IOp to check the xfer between Hpu/Cpu
-# Dest <- Src_a
-LD R0   TA.0
-LD R1   TA.1
-LD R2   TA.2
-LD R3   TA.3
-ST TD.0 R0 
-ST TD.1 R1 
-ST TD.2 R2 
-ST TD.3 R3 
--- a/backends/tfhe-hpu-backend/config_store/config_44b_pem2/custom_iop/cust_2.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_44b_pem2/custom_iop/cust_2.asm
@@ -1,15 +0,0 @@
-# CUST_2
-# Simple IOp to check PBS behavior
-# Dest <- PBSNone(Src_a)
-LD   R0   TA.0
-PBS  R0   R0    PbsNone
-ST   TD.0 R0 
-LD   R1   TA.1
-PBS  R1   R1    PbsNone
-ST   TD.1 R1 
-LD   R2   TA.2
-PBS  R2   R2    PbsNone
-ST   TD.2 R2 
-LD   R3   TA.3
-PBS  R3   R3    PbsNone
-ST   TD.3 R3 
--- a/backends/tfhe-hpu-backend/config_store/config_44b_pem2/custom_iop/cust_6.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_44b_pem2/custom_iop/cust_6.asm
@@ -1,6 +0,0 @@
-# CUST_6
-# Simple IOp to check PBS behavior
-# Dest <- PBSNone(Src_a)
-LD   R0   TA.0
-PBS  R0   R0    PbsNone
-ST   TD.0 R0 
--- a/backends/tfhe-hpu-backend/config_store/config_44b_pem2/hpu_3parts.xclbin
+++ b/backends/tfhe-hpu-backend/config_store/config_44b_pem2/hpu_3parts.xclbin
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8153b1244422e1d7b17bd9a95456bc108f3d9bdc3ffa7de34bd41a3c59006241
-size 78894818
--- a/backends/tfhe-hpu-backend/config_store/config_44b_pem2/hpu_3parts.xclbin.info
+++ b/backends/tfhe-hpu-backend/config_store/config_44b_pem2/hpu_3parts.xclbin.info
--- a/backends/tfhe-hpu-backend/config_store/config_44b_pem2/hpu_3parts.xclbin.link_summary
+++ b/backends/tfhe-hpu-backend/config_store/config_44b_pem2/hpu_3parts.xclbin.link_summary
--- a/backends/tfhe-hpu-backend/config_store/config_44b_pem2/hpu_config.toml
+++ b/backends/tfhe-hpu-backend/config_store/config_44b_pem2/hpu_config.toml
@@ -1,32 +0,0 @@
- 
-[fpga]
-  regmap="backends/tfhe-hpu-backend/config/hpu_regif_core.toml"
-  polling_us=10
-[fpga.ffi.Xrt]
-  id= 0
-  kernel= "hpu_3parts_1in3"
-  xclbin="backends/tfhe-hpu-backend/config/hpu_3parts.xclbin"
-
-[rtl]
-  bpip_used = true
-  bpip_timeout = 100_000
-
-[board]
-  ct_bank = [4096, 0, 0, 4096]
-  ct_pc = [10, 11]
-
-  lut_bank = 256
-  lut_pc = 12
-
-  fw_size= 65536
-  fw_pc = 1
-
-  bsk_pc = [ 2, 3, 4, 5, 6, 7, 8, 9]
-  ksk_pc = [24,25,26,27,28,29,30,31]
-
-[firmware]
-  integer_w=[16]
-  pbs_w=8
-  custom_iop.CUST_0 = "backends/tfhe-hpu-backend/config/custom_iop/cust_0.asm"
-  custom_iop.CUST_1 = "backends/tfhe-hpu-backend/config/custom_iop/cust_1.asm"
-  custom_iop.CUST_2 = "backends/tfhe-hpu-backend/config/custom_iop/cust_2.asm"
--- a/backends/tfhe-hpu-backend/config_store/config_44b_pem2/hpu_regif_core.toml
+++ b/backends/tfhe-hpu-backend/config_store/config_44b_pem2/hpu_regif_core.toml
@@ -1,481 +0,0 @@
-# This is a sample example of register-map definition
-
-module_name="hpu_regif_core"
-description="Hpu top-level register interface. Used by the host to retrieved RTL information, configure it and issue commands."
-word_size_b = 32
-offset = 0x00
-ext_pkg = ["axi_if_common_param_pkg", "axi_if_shell_axil_pkg"]
-
-# =====================================================================================================================
-[section.Xrt]
-description="Vitis Required registers"
-offset= 0x0
-align_offset=true
-
- # Currently not in used -> Placeholder only
-[section.Xrt.register.reserved]
-  description="Xrt reserved"
-  default_val=0x00
-  owner="User"
-  read_access="Read"
-  write_access="Write"
-
-# =====================================================================================================================
-[section.Info]
-description="Contain all the RTL parameters used that have impact on associated SW"
-offset= 0x10
-align_offset=true
-
-[section.Info.register.Version]
-  description="RTL version"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  param_name="VERSION"
-
-[section.Info.register.NttInternal]
-  description="Ntt internal parameters"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  field.radix = { size_b=8, offset_b=0 , param_name="R", description="Ntt radix"}
-  field.psi   = { size_b=8, offset_b=8 , param_name="PSI", description="Ntt Psi"}
-  field.div   = { size_b=8, offset_b=16, param_name="BWD_PSI_DIV", description="Ntt backward div"}
-  field.delta = { size_b=8, offset_b=24, param_name="DELTA", description="Ntt network delta"}
-
-[section.Info.register.NttArch]
-  description="Ntt architecture"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  param_name="NTT_CORE_ARCH"
-
-[section.Info.register.NttPbsNb]
-  description="Maximum number of PBS in the Ntt pipeline"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  field.batch_pbs_nb = { size_b=8, offset_b=0 , param_name="BATCH_PBS_NB", description="Maximum number of PBS in the NTT pipe"}
-  field.total_pbs_nb = { size_b=8, offset_b=8 , param_name="TOTAL_PBS_NB", description="Maximum number of PBS stored in PEP"}
-
-[section.Info.register.NttModulo]
-  description="Code associated with the prime number used in Ntt"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  param_name="MOD_NTT_NAME"
-
-[section.Info.register.Appli]
-  description="Code associated with the application"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  param_name="APPLICATION_NAME"
-
-[section.Info.register.KsShape]
-  description="Shape of Keyswitch computation kernel"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  field.x = { size_b=8, offset_b=0 , param_name="LBX", description="Nb coef. on X dim"}
-  field.y = { size_b=8, offset_b=8 , param_name="LBY", description="Nb coef. on Y dim"}
-  field.z = { size_b=8, offset_b=16, param_name="LBZ", description="Nb coef. on Z dim"}
-
-[section.Info.register.KsInfo]
-  description="Properties of Keyswitch computation kernel"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  field.mod_ksk_w = { size_b=8, offset_b=0 , param_name="MOD_KSK_W", description="Width of ksk modulo"}
-  field.ks_l      = { size_b=8, offset_b=8 , param_name="KS_L", description="Nb of ks level"}
-  field.ks_b      = { size_b=8, offset_b=16, param_name="KS_B_W", description="Width of ks decomp"}
-
-[section.Info.register.RegfInfo]
-  description="Properties of register file"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  field.reg_nb  = { size_b=8, offset_b=0 , param_name="REGF_REG_NB", description="Number of registers in regfile"}
-  field.coef_nb = { size_b=8, offset_b=8 , param_name="REGF_COEF_NB", description="Number of coefficients at regfile interface"}
-
-[section.Info.register.IscInfo]
-  description="Properties of instruction scheduler"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  field.min_iop_size  = { size_b=8, offset_b=0 , param_name="MIN_IOP_SIZE", description="#DOp per IOp to prevent sync_id overflow."}
-
-[section.Info.register.PEInfo]
-  description="Properties of process elements"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  field.alu_nb = { size_b=8, offset_b=24 , param_name="PEA_ALU_NB", description="Number of coefficients processed in parallel in pe_alu"}
-  field.pep_regf_period = { size_b=8, offset_b=16 , param_name="PEP_REGF_PERIOD", description="Number of cycles between 2 consecutive data transfert between PEP and regfile"}
-  field.pem_regf_period = { size_b=8, offset_b=8 , param_name="PEM_REGF_PERIOD", description="Number of cycles between 2 consecutive data transfert between PEM and regfile"}
-  field.pea_regf_period = { size_b=8, offset_b=0 , param_name="PEA_REGF_PERIOD", description="Number of cycles between 2 consecutive data transfert between PEA and regfile"}
-
-[section.Info.register.HbmPc]
-  description="HBM pseudo channel properties"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  field.bsk_pc     = { size_b=8, offset_b=0 , param_name="BSK_PC", description="Bsk pc"}
-  field.bsk_cut_nb = { size_b=8, offset_b=8 , param_name="BSK_CUT_NB", description="Bsk cut nb"}
-  field.ksk_pc     = { size_b=8, offset_b=16, param_name="KSK_PC", description="Ksk pc"}
-  field.ksk_cut_nb = { size_b=8, offset_b=24, param_name="KSK_CUT_NB", description="Ksk cut nb"}
-[section.Info.register.HbmPc_2]
-  description="HBM pseudo channel properties (2)"
-  owner="Parameter"
-  read_access="Read"
-  write_access="None"
-  field.pem_pc = { size_b=8, offset_b=0, param_name="PEM_PC", description="pem_pc"}
-
-# =====================================================================================================================
-[section.LdSt]
-description="Define some properties of CT buffers in board-memory"
-align_offset=true
-duplicate=["_bank0","_bank1","_bank2","_bank3"]
-[section.LdSt.register.addr]
-  description="Ciphertext buffer addr"
-  owner="User"
-  read_access="Read"
-  write_access="Write"
-  duplicate=["_pc0_lsb", "_pc0_msb","_pc1_lsb", "_pc1_msb"]
-
-# =====================================================================================================================
-[section.PbsLut]
-description="Define PBS Lut offset in board-memory"
-align_offset=true
-
-[section.PbsLut.register.addr]
-  description="Pbs Lut gid offset"
-  owner="User"
-  read_access="Read"
-  write_access="Write"
-  duplicate=["_lsb", "_msb"]
-
-# =====================================================================================================================
-[section.Keys]
-description="Define keys location properties"
-align_offset=true
-duplicate=["_Bsk", "_Ksk"]
-
-[section.Keys.register.addr_pc]
-  description="Key address for PC#"
-  owner="User"
-  read_access="Read"
-  write_access="Write"
-  duplicate=["_pc0_lsb", "_pc0_msb", "_pc1_lsb", "_pc1_msb", "_pc2_lsb", "_pc2_msb", "_pc3_lsb", "_pc3_msb", "_pc4_lsb", "_pc4_msb", "_pc5_lsb", "_pc5_msb", "_pc6_lsb", "_pc6_msb", "_pc7_lsb", "_pc7_msb"]
-
-[section.Keys.register.avail]
-  description="Key available bit"
-  owner="User"
-  read_access="Read"
-  write_access="Write"
-  field.avail = { size_b=1, offset_b=0 , default_val=0, description="avail"}
-
-[section.Keys.register.reset]
-  description="Key reset sequence"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-  field.request     = { size_b=1, offset_b=0 , default_val=0, description="request"}
-  field.done = { size_b=1, offset_b=31 , default_val=0, description="done"}
-
-# =====================================================================================================================
-[section.Bpip]
-description="Define BPIP configuration"
-align_offset=true
-
-[section.Bpip.register.use]
-  description="(1) Use BPIP mode, (0) use IPIP mode (default)"
-  owner="User"
-  read_access="Read"
-  write_access="Write"
-  field.use_bpip = { size_b=1, offset_b=0 , default_val=0, description="use"}
-
-[section.Bpip.register.timeout]
-  description="Timeout for BPIP mode"
-  owner="User"
-  read_access="Read"
-  write_access="Write"
-  default_val=0xffffffff
-
-# =====================================================================================================================
-[section.Trace]
-description="Define Trace offset in board-memory"
-align_offset=true
-
-[section.Trace.register.addr]
-  description="Trace address offset"
-  owner="User"
-  read_access="Read"
-  write_access="Write"
-  duplicate=["_lsb", "_msb"]
-
-# =====================================================================================================================
-[section.WorkAck]
-description="Purpose of this section"
-offset= 0x800
-align_offset=true
-
-[section.WorkAck.register.workq]
-  description="Insert work in workq and read status"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.WorkAck.register.ackq]
-  description="Pop ack from in ackq"
-  owner="Kernel"
-  read_access="ReadNotify"
-  write_access="None"
-  field.bsk_pc     = { size_b=8, offset_b=0 , default_val=0, description="Bsk pc"}
-  field.bsk_cut_nb = { size_b=8, offset_b=8 , default_val=0, description="Bsk cut nb"}
-
-# =====================================================================================================================
-[section.Runtime]
-description="Contains all runtimes informations exposed by the RTL"
-offset= 0x1000
-align_offset=true
-
-[section.Runtime.register.errors]
-  description="Error register (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-  field.pbs   = { size_b=13, offset_b=0 , default_val=0, description="HPU error"}
-
-[section.Runtime.register.infos_loop]
-  description="Informations register : iteration loop"
-  owner="Kernel"
-  read_access="Read"
-  write_access="None"
-  field.br_loop   = { size_b=15, offset_b=0 ,  default_val=0, description="PBS current BR-loop"}
-  field.br_loop_c = { size_b=1,  offset_b=15 , default_val=0, description="PBS current BR-loop parity"}
-  field.ks_loop   = { size_b=15, offset_b=16 , default_val=0, description="KS current KS-loop"}
-  field.ks_loop_c = { size_b=1,  offset_b=31 , default_val=0, description="KS current KS-loop parity"}
-
-[section.Runtime.register.infos_pointer0]
-  description="Informations register : PEP pointers 0"
-  owner="Kernel"
-  read_access="Read"
-  write_access="None"
-  field.pool_rp = { size_b=8, offset_b=0  , default_val=0, description="PEP pool_rp"}
-  field.pool_wp = { size_b=8, offset_b=8  , default_val=0, description="PEP pool_wp"}
-  field.ldg_pt  = { size_b=8, offset_b=16 , default_val=0, description="PEP ldg_pt"}
-  field.ldb_pt  = { size_b=8, offset_b=24 , default_val=0, description="PEP ldb_pt"}
-
-[section.Runtime.register.infos_pointer1]
-  description="Informations register : PEP pointers 1"
-  owner="Kernel"
-  read_access="Read"
-  write_access="None"
-  field.ks_in_rp  = { size_b=8, offset_b=0  , default_val=0, description="PEP ks_in_rp"}
-  field.ks_in_wp  = { size_b=8, offset_b=8  , default_val=0, description="PEP ks_in_wp"}
-  field.ks_out_rp = { size_b=8, offset_b=16 , default_val=0, description="PEP ks_out_rp"}
-  field.ks_out_wp = { size_b=8, offset_b=24 , default_val=0, description="PEP ks_out_wp"}
-
-[section.Runtime.register.infos_pointer2]
-  description="Informations register : PEP pointers 2"
-  owner="Kernel"
-  read_access="Read"
-  write_access="None"
-  field.pbs_in_rp  = { size_b=8, offset_b=0  , default_val=0, description="PEP pbs_in_rp"}
-  field.pbs_in_wp  = { size_b=8, offset_b=8  , default_val=0, description="PEP pbs_in_wp"}
-
-[section.Runtime.register.isc_info]
-  description="ISC 4 latest instructions received ([0] is the most recent)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="None"
-  duplicate=["_0","_1","_2","_3"]
-
-[section.Runtime.register.pep_seq_bpip_batch_cnt]
-  description="PEP BPIP batch counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_seq_bpip_batch_flush_cnt]
-  description="PEP BPIP batch triggered with a flush counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_seq_bpip_batch_timeout_cnt]
-  description="PEP BPIP batch triggered with a timeout counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_seq_ld_ack_cnt]
-  description="PEP load blwe ack counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_seq_cmux_not_full_batch_cnt]
-  description="PEP not full batch CMUX counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_ldb_rcp_dur]
-  description="PEP load BLWE reception max duration (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_ldg_req_dur]
-  description="PEP load GLWE request max duration (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_ldg_rcp_dur]
-  description="PEP load GLWE reception max duration (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_mmacc_sxt_rcp_dur]
-  description="PEP MMACC SXT reception duration (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_mmacc_sxt_req_dur]
-  description="PEP MMACC SXT request duration (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_mmacc_sxt_cmd_wait_b_dur]
-  description="PEP MMACC SXT command without b duration (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_inst_cnt]
-  description="PEP input instruction counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pep_ack_cnt]
-  description="PEP instruction acknowledge counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pem_load_inst_cnt]
-  description="PEM load input instruction counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pem_load_ack_cnt]
-  description="PEM load instruction acknowledge counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pem_store_inst_cnt]
-  description="PEM store input instruction counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pem_store_ack_cnt]
-  description="PEM store instruction acknowledge counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pea_inst_cnt]
-  description="PEA input instruction counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pea_ack_cnt]
-  description="PEA instruction acknowledge counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.isc_inst_cnt]
-  description="ISC input instruction counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.isc_ack_cnt]
-  description="ISC instruction acknowledge sample counter (Could be reset by user)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="WriteNotify"
-
-[section.Runtime.register.pem_load_info_0]
-  description="PEM load first data)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="None"
-  duplicate=["_pc0_0","_pc0_1","_pc0_2","_pc0_3","_pc1_0","_pc1_1","_pc1_2","_pc1_3"]
-
-[section.Runtime.register.pem_load_info_1]
-  description="PEM load first address"
-  owner="Kernel"
-  read_access="Read"
-  write_access="None"
-  duplicate=["_pc0_lsb","_pc0_msb","_pc1_lsb","_pc1_msb"]
-
-[section.Runtime.register.pem_store_info_0]
-  description="PEM store info 0)"
-  owner="Kernel"
-  read_access="Read"
-  write_access="None"
-  field.cmd_vld             = { size_b=1, offset_b=0  , default_val=0, description="PEM_ST cmd vld"}
-  field.cmd_rdy             = { size_b=1, offset_b=1  , default_val=0, description="PEM_ST cmd rdy"}
-  field.pem_regf_rd_req_vld = { size_b=1, offset_b=2  , default_val=0, description="PEM_ST pem_regf_rd_req_vld"}
-  field.pem_regf_rd_req_rdy = { size_b=1, offset_b=3  , default_val=0, description="PEM_ST pem_regf_rd_req_rdy"}
-  field.brsp_fifo_in_vld    = { size_b=4, offset_b=4  , default_val=0, description="PEM_ST brsp_fifo_in_vld"}
-  field.brsp_fifo_in_rdy    = { size_b=4, offset_b=8  , default_val=0, description="PEM_ST brsp_fifo_in_rdy"}
-  field.rcp_fifo_in_vld     = { size_b=4, offset_b=12 , default_val=0, description="PEM_ST rcp_fifo_in_vld"}
-  field.rcp_fifo_in_rdy     = { size_b=4, offset_b=16 , default_val=0, description="PEM_ST rcp_fifo_in_rdy"}
-  field.r2_axi_vld          = { size_b=4, offset_b=20 , default_val=0, description="PEM_ST r2_axi_vld"}
-  field.r2_axi_rdy          = { size_b=4, offset_b=24 , default_val=0, description="PEM_ST r2_axi_rdy"}
-  field.c0_enough_location  = { size_b=4, offset_b=28 , default_val=0, description="PEM_ST c0_enough_location"}
-
-
-[section.Runtime.register.pem_store_info_1]
-  description="PEM store info 1"
-  owner="Kernel"
-  read_access="Read"
-  write_access="None"
-  field.s0_cmd_vld          = { size_b=4, offset_b=0  , default_val=0, description="PEM_ST s0_cmd_vld"}
-  field.s0_cmd_rdy          = { size_b=4, offset_b=4  , default_val=0, description="PEM_ST s0_cmd_rdy"}
-  field.m_axi_bvalid        = { size_b=4, offset_b=8  , default_val=0, description="PEM_ST m_axi_bvalid"}
-  field.m_axi_bready        = { size_b=4, offset_b=12 , default_val=0, description="PEM_ST m_axi_bready"}
-  field.m_axi_wvalid        = { size_b=4, offset_b=16 , default_val=0, description="PEM_ST m_axi_wvalid"}
-  field.m_axi_wready        = { size_b=4, offset_b=20 , default_val=0, description="PEM_ST m_axi_wready"}
-  field.m_axi_awvalid       = { size_b=4, offset_b=24 , default_val=0, description="PEM_ST m_axi_awvalid"}
-  field.m_axi_awready       = { size_b=4, offset_b=28 , default_val=0, description="PEM_ST m_axi_awready"}
-
-[section.Runtime.register.pem_store_info_2]
-  description="PEM store info 2"
-  owner="Kernel"
-  read_access="Read"
-  write_access="None"
-  field.c0_free_loc_cnt      = { size_b=16, offset_b=0  , default_val=0, description="PEM_ST c0_free_loc_cnt"}
-  field.brsp_bresp_cnt       = { size_b=16, offset_b=16 , default_val=0, description="PEM_ST brsp_bresp_cnt"}
-
-[section.Runtime.register.pem_store_info_3]
-  description="PEM store info 3"
-  owner="Kernel"
-  read_access="Read"
-  write_access="None"
-  field.brsp_ack_seen      = { size_b=16, offset_b=0  , default_val=0, description="PEM_ST brsp_ack_seen"}
-  field.c0_cmd_cnt         = { size_b=8,  offset_b=16 , default_val=0, description="PEM_ST c0_cmd_cnt"}
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/Readme.md
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/Readme.md
@@ -1,12 +0,0 @@
-## 1010_26848c_64b_msg2_carry2_msplit_batch8_PSI16_PARTgf64_fanout_Arb
-/projects/jjduflot/projects/dev_gf64_clean_dont_3/xrt/output_1010_26848c_64b_msg2_carry2_msplit_batch8_PSI16_PARTgf64_fanout_Arb/
-:zap:
-gf64
-APPLI_msg2_carry2  (N=2048, GLWE_K=1)
-msplit
-PSI16
-HPU_PART=gf64
-Timing : HBM : OK, HPU : 232MHz
-Contraintes fanout
-Hack arbiter
-
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_0.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_0.asm
@@ -1,15 +0,0 @@
-# CUST_0
-# Simple IOp to check the xfer between Hpu/Cpu
-# Construct constant in dest slot -> 249 (0xf9)
-SUB R0 R0 R0
-ADDS R0 R0 1
-ST TD.0 R0 
-SUB R1 R1 R1
-ADDS R1 R1 2
-ST TD.1 R1 
-SUB R2 R2 R2
-ADDS R2 R2 3
-ST TD.2 R2 
-SUB R3 R3 R3
-ADDS R3 R3 3
-ST TD.3 R3 
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_1.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_1.asm
@@ -1,11 +0,0 @@
-# CUST_1
-# Simple IOp to check the xfer between Hpu/Cpu
-# Dest <- Src_a
-LD R0   TA.0
-LD R1   TA.1
-LD R2   TA.2
-LD R3   TA.3
-ST TD.0 R0 
-ST TD.1 R1 
-ST TD.2 R2 
-ST TD.3 R3 
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_2.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_2.asm
@@ -1,15 +0,0 @@
-# CUST_2
-# Simple IOp to check PBS behavior
-# Dest <- PBSNone(Src_a)
-LD   R0   TA.0
-PBS  R0   R0    PbsNone
-ST   TD.0 R0 
-LD   R1   TA.1
-PBS  R1   R1    PbsNone
-ST   TD.1 R1 
-LD   R2   TA.2
-PBS  R2   R2    PbsNone
-ST   TD.2 R2 
-LD   R3   TA.3
-PBS  R3   R3    PbsNone
-ST   TD.3 R3 
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_6.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_6.asm
@@ -1,6 +0,0 @@
-# CUST_6
-# Simple IOp to check PBS behavior
-# Dest <- PBSNone(Src_a)
-LD   R0   TA.0
-PBS  R0   R0    PbsNone
-ST   TD.0 R0 
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_7.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_7.asm
@@ -1,8 +0,0 @@
-# CUST_7
-# Simple IOp to check common pattern behavior
-# Dest <- PBSNone(Src_a)
-LD   R0   TA.0
-LD   R1   TB.0
-MAC  R2   R1 R0 4
-PBS  R0   R2    PbsNone
-ST   TD.0 R0 
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_8.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_8.asm
@@ -1,6 +0,0 @@
-# CUST_8
-# Simple IOp to check MAC behavior
-LD   R0   TA.0
-LD   R1   TB.0
-MAC  R2   R1 R0 4
-ST   TD.0 R2 
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_9.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_9.asm
@@ -1,6 +0,0 @@
-# CUST_9
-# Simple IOp to check ADD behavior
-LD   R0   TA.0
-LD   R1   TB.0
-ADD  R2   R1 R0
-ST   TD.0 R2 
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_A.asm
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/custom_iop/cust_A.asm
@@ -1,17 +0,0 @@
-# MUL              I4@[0]0xd92      I4@[0]0xc2a      I4@[0]0xa24     
-LD               R0               TA.0            
-LD               R1               TB.0            
-MAC              R2               R0               R1               4               
-LD               R3               TB.1            
-MAC              R4               R0               R3               4               
-LD               R5               TA.1            
-MAC              R6               R5               R1               4               
-PBS              R7               R2               PbsMultCarryMsgLsb
-PBS              R8               R2               PbsMultCarryMsgMsb
-PBS              R9               R4               PbsMultCarryMsgLsb
-PBS              R10              R6               PbsMultCarryMsgLsb
-ST               TD.0             R7              
-ADD              R11              R8               R9              
-ADD              R12              R11              R10             
-PBS              R13              R12              PbsMsgOnly      
-ST               TD.1             R13             
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/hpu_config.toml
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/hpu_config.toml
@@ -1,37 +0,0 @@
- 
-[fpga]
-  regmap="backends/tfhe-hpu-backend/config/hpu_regif_core.toml"
-  polling_us=10
-[fpga.ffi.Xrt]
-  id= 0
-  kernel= "hpu_msplit_3parts_1in3"
-  xclbin="backends/tfhe-hpu-backend/config/hpu_msplit_3parts.xclbin"
-
-[rtl]
-  bpip_used = true
-  bpip_timeout = 100_000
-
-[board]
-  ct_bank = [4096, 0, 0, 4096]
-  ct_pc = [10, 11]
-
-  lut_bank = 256
-  lut_pc = 12
-
-  fw_size= 65536
-  fw_pc = 1
-
-  bsk_pc = [ 2, 3, 4, 5, 6, 7, 8, 9]
-  ksk_pc = [24,25,26,27,28,29,30,31]
-
-[firmware]
-  integer_w=[16]
-  pbs_w=8
-  custom_iop.CUST_0 = "backends/tfhe-hpu-backend/config/custom_iop/cust_0.asm"
-  custom_iop.CUST_1 = "backends/tfhe-hpu-backend/config/custom_iop/cust_1.asm"
-  custom_iop.CUST_2 = "backends/tfhe-hpu-backend/config/custom_iop/cust_2.asm"
-  custom_iop.CUST_6 = "backends/tfhe-hpu-backend/config/custom_iop/cust_6.asm"
-  custom_iop.CUST_7 = "backends/tfhe-hpu-backend/config/custom_iop/cust_7.asm"
-  custom_iop.CUST_8 = "backends/tfhe-hpu-backend/config/custom_iop/cust_8.asm"
-  custom_iop.CUST_9 = "backends/tfhe-hpu-backend/config/custom_iop/cust_9.asm"
-  custom_iop.CUST_A = "backends/tfhe-hpu-backend/config/custom_iop/cust_A.asm"
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/hpu_msplit_3parts.xclbin
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/hpu_msplit_3parts.xclbin
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9ae911961c3854e37b3c8694984a27895c7ac510f3a970065c3aa14d4a8cafc9
-size 76721833
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/hpu_msplit_3parts.xclbin.info
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/hpu_msplit_3parts.xclbin.info
--- a/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/hpu_msplit_3parts.xclbin.link_summary
+++ b/backends/tfhe-hpu-backend/config_store/config_gf64_msg2_carry2/hpu_msplit_3parts.xclbin.link_summary
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
David Testé	cae938a75b	wip: measure latencies of a list of cts	2025-01-28 11:19:27 +01:00
David Testé	bae1d1cf77	WIP: fix gpu streams and use iter_batched	2025-01-22 10:56:08 +01:00
David Testé	a3bc1a9d9e	chore(bench): new heuristic to define elements for throughput This is done to fill up backend with enough elements to fill the backend and avoid having long execution time for heavy operations like multiplication or division.	2025-01-20 15:21:05 +01:00
Nicolas Sarlin	cc85c441ea	chore(doc): add strings feature to doctests	2025-01-20 10:59:12 +01:00
Mayeul@Zama	1f254d6523	chore: feature gating js_high_level_api	2025-01-20 10:00:49 +01:00
Mayeul@Zama	909ce4ecbb	chore: update rust dependencies	2025-01-20 09:42:38 +01:00
Mayeul@Zama	67783f4683	chore: upgrade wasm-bindgen	2025-01-20 09:42:38 +01:00
tmontaigu	c62112a4a9	feat(c-hlapi): add abs	2025-01-17 17:04:18 +01:00
tmontaigu	9eb2eb9f0e	refactor(c-hlapi): have different macro for FheUint/FheInt As FheUint and FheInt may have different set of functions, we now have 2 different macros to define their C API.	2025-01-17 17:04:18 +01:00
Arthur Meyre	aa5b431aae	test(core): use 5 bits parameters tweaked to 4 bits to avoid high pfail	2025-01-17 16:06:27 +01:00
David Testé	2b914ae57a	chore(ci): use ubuntu 22.04 to run security checks Sagemath is not available as a package on Ubuntu 24.04.	2025-01-17 11:31:40 +01:00
Nicolas Sarlin	f4a8991f67	fix(safe_ser): aliases in named for renamed types deserialization	2025-01-17 11:13:43 +01:00
David Testé	a882262691	chore(ci): fix recursive search for changes in docs files	2025-01-17 10:04:46 +01:00
aquint-zama	1976a9dce6	chore: update release workflow	2025-01-16 16:04:58 +01:00
aquint-zama	ec87c15cc2	chore: add slsa support for tfhe-ntt	2025-01-16 16:04:58 +01:00
aquint-zama	c273e973bb	chore: add slsa support for tfhe-fft	2025-01-16 16:04:58 +01:00
aquint-zama	07e3fb2779	chore: add slsa support for tfhe-cuda-backend	2025-01-16 16:04:58 +01:00
aquint-zama	c2d4e77eec	chore: add slsa support for tfhe-versionable	2025-01-16 16:04:58 +01:00
aquint-zama	cdf627f2d5	chore: add slsa support for tfhe-csprng	2025-01-16 16:04:58 +01:00
aquint-zama	2f79f646f7	chore: add slsa support for tfhe-zk-pok	2025-01-16 16:04:58 +01:00
Guillermo Oyarzun	a9e4724178	feat(gpu): implement fhe rand on gpu	2025-01-16 14:48:09 +01:00
David Testé	7a8efb1934	chore(ci): display head ref in slack notification message Default 'Ref' displayed in message relies on github.ref value. On pull_request_target, it's the base_ref, instead of head_ref, that is set as value for github.ref. We cannot change 'Ref' field directly. As a workaround, we hide 'Ref' in the message and display the head_ref directly in SLACK_MESSAGE.	2025-01-16 14:25:05 +01:00
David Testé	bc1aeeb85e	chore(ci): skip aws fast tests if ci files changed This would skip 'check-user-permission' job if the event 'pull_request_target' is emitted and CI files have changed. It avoids overlapping of 'pull_request' and 'pull_request_target' events. CI changes would only be tested on 'pull_request' for Zama own pull requests.	2025-01-16 14:25:05 +01:00
Nicolas Sarlin	cbdba38147	fix(shortint): error message in packing keyswitch	2025-01-16 13:08:36 +01:00
Agnes Leroy	cea871fc6b	chore(gpu): update multi-bit parameters	2025-01-16 10:23:04 +01:00
Arthur Meyre	34a006a3ee	chore(ci): fix clippy on M1 for disabled multi bit noise	2025-01-16 09:44:21 +01:00
Pedro Alves	b3740e75f2	chore(docs): Remove mention to NVLink NVLink is not needed anymore in the CUDA backend.	2025-01-16 09:35:15 +01:00
Andrei Stoian	b46affa45b	chore(gpu): add reference to gemm algorithm	2025-01-16 09:07:58 +01:00
Mayeul@Zama	72095144dc	chore: fix new lints	2025-01-15 15:16:15 +01:00
Arthur Meyre	a91e8618c9	chore(docs): fix various issues with the docs	2025-01-15 11:37:04 +01:00
Nicolas Sarlin	9a64c34989	chore(lint): use dylint as lint driver for tfhe-lint	2025-01-14 18:30:04 +01:00
Nicolas Sarlin	7103a83ce5	chore(zk)!: use builtin isqrt instead of the internal implementation BREAKING CHANGE: bump MSRV	2025-01-14 18:29:29 +01:00
Nicolas Sarlin	1f41a6b85d	chore(zk): sqr fc takes u64 as parameter to avoid overflow	2025-01-14 18:29:29 +01:00
David Testé	ccc647a5ee	chore(ci): remove paths for pull_request_target event A 'paths:' directive with only excluded paths won't trigger the event.	2025-01-14 17:37:48 +01:00
Arthur Meyre	ffd4f5a93e	chore(ci): remove unused Dockerfile for wasm tests	2025-01-14 17:31:18 +01:00
David Testé	594157ecaa	chore(ci): choose pull request event with path changes To be able to run CI for external contribution, through forked repository, and be able to test CI modification in a development branch, we need to discriminate pull request events. For the former 'pull_request_target' event is needed to have access to GitHub secrets. 'pull_request' event is required for the latter otherwise the workflow would be pulled from the HEAD of the base branch and thus wouldn't contain changes from the developer.	2025-01-14 14:38:02 +01:00
dependabot[bot]	8ae871ec33	chore(deps): bump actions/upload-artifact from 4.5.0 to 4.6.0 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.5.0 to 4.6.0. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](`6f51ac03b9...65c4c4a1dd`) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>	2025-01-14 13:48:39 +01:00
dependabot[bot]	9535544409	chore(deps): bump actions-cool/check-user-permission from 2.2.1 to 2.3.0 Bumps [actions-cool/check-user-permission](https://github.com/actions-cool/check-user-permission) from 2.2.1 to 2.3.0. - [Release notes](https://github.com/actions-cool/check-user-permission/releases) - [Changelog](https://github.com/actions-cool/check-user-permission/blob/main/CHANGELOG.md) - [Commits](`956b2e73cd...7b90a27f92`) --- updated-dependencies: - dependency-name: actions-cool/check-user-permission dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>	2025-01-14 13:48:31 +01:00
dependabot[bot]	4438042b7d	chore(deps): bump zgosalvez/github-actions-ensure-sha-pinned-actions Bumps [zgosalvez/github-actions-ensure-sha-pinned-actions](https://github.com/zgosalvez/github-actions-ensure-sha-pinned-actions) from 3.0.19 to 3.0.20. - [Release notes](https://github.com/zgosalvez/github-actions-ensure-sha-pinned-actions/releases) - [Commits](`6ae615f647...c3a2b64f69`) --- updated-dependencies: - dependency-name: zgosalvez/github-actions-ensure-sha-pinned-actions dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>	2025-01-14 13:48:23 +01:00
Arthur Meyre	f7189edb20	chore(ci): temporarily disable the multi bit noise check on Apple M1	2025-01-14 10:56:36 +01:00
Nicolas Sarlin	7058b3611a	chore(backward): update data repo to 0.5	2025-01-14 09:35:32 +01:00
Pedro Alves	59b315993d	chore(gpu): update new_multi_gpu() description	2025-01-14 09:24:24 +01:00
Arthur Meyre	3d1998635a	docs: indicate PBS benchmarks have Gaussian parameters	2025-01-13 16:57:40 +01:00
Arthur Meyre	3fa72e62ae	chore(docs): replace tabs by spaces	2025-01-13 16:57:40 +01:00
Arthur Meyre	0d43912884	docs: add TUniform distribution and link in benchmarks	2025-01-13 16:57:40 +01:00
Agnes Leroy	9930550b69	chore(doc): update links to the benchmark tables	2025-01-13 16:57:40 +01:00
Nicolas Sarlin	bdc3539954	doc(zk): explain how to use zkv1	2025-01-13 13:40:16 +01:00
tmontaigu	979a1b36f8	chore(docs): add strings guides	2025-01-13 13:25:47 +01:00
Andrei Stoian	298fd66631	feat(gpu): optimize packing keyswitch on gpu	2025-01-13 09:18:53 -03:00
David Testé	0952dfa1ad	chore(ci): run teardown job only if setup is a success	2025-01-13 12:19:39 +01:00
Mayeul@Zama	e1e567a145	chore: remove unused cargo key	2025-01-13 12:03:06 +01:00
Nicolas Sarlin	ee1a534584	chore: formatting uniformization for c and js	2025-01-10 16:09:04 +01:00
Nicolas Sarlin	c9eef7d193	chore: enable space in editorconfig for more file types	2025-01-10 16:09:04 +01:00
Nicolas Sarlin	4c8d55f32b	chore(zk): add bench zk v1 vs v2	2025-01-10 15:47:25 +01:00
Nicolas Sarlin	cd5b3c61eb	chore(backward): move compat tests to avoid compiling them everytime	2025-01-10 09:43:04 +01:00
Arthur Meyre	baefb7d911	chore: enable strings for docs.rs generation	2025-01-09 19:48:43 +01:00
Guillermo Oyarzun	d2a3450ab9	fix(gpu): fix cornercase in match value function	2025-01-09 17:51:31 +01:00
David Testé	6fb13328ec	chore(ci): put back weekly core_crypto benchmarks	2025-01-09 17:45:14 +01:00
David Testé	f633eedc29	chore(ci): push tfhe-cuda-backend to crates.io unconditionally	2025-01-09 12:10:55 +01:00
Nicolas Sarlin	a9fb3e9fbf	chore(trivium): fix params, reverted back to gaussian	2025-01-09 09:19:04 +01:00
Arthur Meyre	9a4b584419	chore(ci): make cargo-builds-ntt unique to ntt	2025-01-08 17:05:42 +01:00
Arthur Meyre	cdcba5ca13	chore(ci): make test names unique for fft and ntt	2025-01-08 17:05:42 +01:00
				`@@ -1 +0,0 @@`
				`*.xclbin filter=lfs diff=lfs merge=lfs -text`