chore: update CODEOWNERS file

chore(bench): code refactor and automation for hlapi
feat(hpu): new HPU bitstream RTL v2.2
2026-01-10 23:28:11 -05:00 · 2026-01-09 16:12:50 +01:00 · 2026-01-09 16:09:27 +01:00 · 2026-01-09 15:25:35 +01:00 · 2026-01-09 15:19:08 +01:00 · 2026-01-09 15:19:08 +01:00
343 changed files with 12122 additions and 5089 deletions
--- a/.cargo/audit.toml
+++ b/.cargo/audit.toml
@@ -2,6 +2,8 @@
 ignore = [
    # Ignoring unmaintained 'paste' advisory as it is a widely used, low-risk build dependency.
    "RUSTSEC-2024-0436",
+    # Ignoring unmaintained 'bincode' crate. Getting rid of it would be too complex on the short term.
+    "RUSTSEC-2025-0141",
 ]

 [output]
--- a/.github/actions/gpu_setup/action.yml
+++ b/.github/actions/gpu_setup/action.yml
@@ -23,6 +23,8 @@ runs:
        echo "${CMAKE_SCRIPT_SHA} cmake-${CMAKE_VERSION}-linux-x86_64.sh" > checksum
        sha256sum -c checksum
        sudo bash cmake-"${CMAKE_VERSION}"-linux-x86_64.sh --skip-license --prefix=/usr/ --exclude-subdir
+        sudo apt-get clean
+        sudo rm -rf /var/lib/apt/lists/*
        sudo apt update
        sudo apt remove -y unattended-upgrades
        sudo apt install -y cmake-format libclang-dev
--- a/.github/workflows/aws_tfhe_backward_compat_tests.yml
+++ b/.github/workflows/aws_tfhe_backward_compat_tests.yml
@@ -66,7 +66,7 @@ jobs:
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'true' # Needed to pull lfs data
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -80,7 +80,7 @@ jobs:

      - name: Retrieve data from cache
        id: retrieve-data-cache
-        uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
+        uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
        with:
          path: |
            utils/tfhe-backward-compat-data/**/*.cbor
@@ -94,8 +94,8 @@ jobs:

      # Pull token was stored by action/checkout to be used by lfs, we don't need it anymore
      - name: Remove git credentials
-        run: |
-          git config --local --unset-all http.https://github.com/.extraheader
+        run: | # Starting version 6.0, action/checkout uses a dedicated file for git credentials
+          git config --local --unset-all http.https://github.com/.extraheader || rm "${RUNNER_TEMP}"/git-credentials-*.config

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
@@ -109,7 +109,7 @@ jobs:
      - name: Store data in cache
        if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
        continue-on-error: true
-        uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
+        uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
        with:
          path: |
            utils/tfhe-backward-compat-data/**/*.cbor
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -63,7 +63,7 @@ jobs:
      any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -71,7 +71,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            dependencies:
@@ -171,7 +171,7 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -219,7 +219,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
+        uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
        with:
          path: |
            ~/.nvm
@@ -232,7 +232,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
+        uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -50,7 +50,7 @@ jobs:
        steps.changed-files.outputs.integer_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -58,7 +58,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            integer:
@@ -112,7 +112,7 @@ jobs:
    timeout-minutes: 480 # 8 hours
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: "false"
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/aws_tfhe_noise_checks.yml
+++ b/.github/workflows/aws_tfhe_noise_checks.yml
@@ -60,7 +60,7 @@ jobs:
    timeout-minutes: 1440
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -51,7 +51,7 @@ jobs:
        steps.changed-files.outputs.integer_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -59,7 +59,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            integer:
@@ -112,7 +112,7 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: "false"
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -72,7 +72,7 @@ jobs:
      any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -80,7 +80,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            dependencies:
@@ -182,7 +182,7 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -64,7 +64,7 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -80,7 +80,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
+        uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
        with:
          path: |
            ~/.nvm
@@ -93,7 +93,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
+        uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
--- a/.github/workflows/benchmark_cpu.yml
+++ b/.github/workflows/benchmark_cpu.yml
@@ -1,6 +1,8 @@
 # Run benchmarks on an AWS instance and return parsed results to Slab CI bot.
 name: benchmark_cpu

+run-name: ${{ inputs.command }}::${{ inputs.bench_type}} (${{ inputs.op_flavor }}, ${{ inputs.precisions_set }}, ${{ inputs.params_type }})
+
 on:
  workflow_dispatch:
    inputs:
--- a/.github/workflows/benchmark_cpu_common.yml
+++ b/.github/workflows/benchmark_cpu_common.yml
@@ -149,7 +149,7 @@ jobs:
        params_type: ${{ fromJSON(needs.prepare-matrix.outputs.params_type) }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -223,13 +223,13 @@ jobs:
          results_type: ${{ inputs.additional_results_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: zama-ai/slab
          path: slab
@@ -251,7 +251,7 @@ jobs:
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CPU bencmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "CPU benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: benchmark_cpu_common/teardown-instance
--- a/.github/workflows/benchmark_cpu_weekly.yml
+++ b/.github/workflows/benchmark_cpu_weekly.yml
@@ -52,13 +52,13 @@ jobs:
          steps.check_bench_group_2.outputs.is_weekly_bench_group_2 == 'true'
        run: |
          echo "OP_FLAVOR=default" >> "${GITHUB_ENV}"
-          echo "PRECISIONS_SET=false" >> "${GITHUB_ENV}"
+          echo "PRECISIONS_SET=fast" >> "${GITHUB_ENV}"

      - name: Quarterly benchmarks
        if: steps.check_quarterly_bench.outputs.is_quarterly_bench == 'true'
        run: |
          echo "OP_FLAVOR=\"default,unchecked\"" >> "${GITHUB_ENV}"
-          echo "PRECISIONS_SET=true" >> "${GITHUB_ENV}"
+          echo "PRECISIONS_SET=all" >> "${GITHUB_ENV}"

      - name: Set operation flavor output
        id: set_op_flavor
--- a/.github/workflows/benchmark_ct_key_sizes.yml
+++ b/.github/workflows/benchmark_ct_key_sizes.yml
@@ -49,7 +49,7 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -99,13 +99,13 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ github.sha }}_ct_key_sizes
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: zama-ai/slab
          path: slab
--- a/.github/workflows/benchmark_documentation.yml
+++ b/.github/workflows/benchmark_documentation.yml
@@ -8,8 +8,13 @@ on:
        description: "Run CPU benchmarks"
        type: boolean
        default: true
-      run-gpu-benchmarks:
-        description: "Run GPU benchmarks"
+      # GPU benchmarks are split because of resource scarcity.
+      run-gpu-integer-benchmarks:
+        description: "Run GPU integer benchmarks"
+        type: boolean
+        default: true
+      run-gpu-core-crypto-benchmarks:
+        description: "Run GPU core-crypto benchmarks"
        type: boolean
        default: true
      run-hpu-benchmarks:
@@ -52,7 +57,7 @@ jobs:
  run-benchmarks-gpu-integer:
    name: benchmark_documentation/run-benchmarks-gpu-integer
    uses: ./.github/workflows/benchmark_gpu_common.yml
-    if: inputs.run-gpu-benchmarks
+    if: inputs.run-gpu-integer-benchmarks
    with:
      profile: multi-h100-sxm5
      hardware_name: n3-H100-SXM5x8
@@ -113,7 +118,7 @@ jobs:
  run-benchmarks-gpu-core-crypto:
    name: benchmark_documentation/run-benchmarks-gpu-core-crypto
    uses: ./.github/workflows/benchmark_gpu_common.yml
-    if: inputs.run-gpu-benchmarks
+    if: inputs.run-gpu-core-crypto-benchmarks
    with:
      profile: multi-h100-sxm5
      hardware_name: n3-H100-SXM5x8
@@ -133,7 +138,7 @@ jobs:
  generate-svgs-with-benchmarks-run:
    name: benchmark-documentation/generate-svgs-with-benchmarks-run
    if: ${{ always() &&
-      (inputs.run-cpu-benchmarks || inputs.run-gpu-benchmarks ||inputs.run-hpu-benchmarks) &&
+      (inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks ||inputs.run-hpu-benchmarks) &&
      inputs.generate-svgs }}
    needs: [
      run-benchmarks-cpu-integer, run-benchmarks-gpu-integer, run-benchmarks-hpu-integer,
@@ -143,7 +148,7 @@ jobs:
    with:
      time_span_days: 5
      generate-cpu-svgs: ${{ inputs.run-cpu-benchmarks }}
-      generate-gpu-svgs: ${{ inputs.run-gpu-benchmarks }}
+      generate-gpu-svgs: ${{ inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks }}
      generate-hpu-svgs: ${{ inputs.run-hpu-benchmarks }}
    secrets:
      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
@@ -152,7 +157,7 @@ jobs:

  generate-svgs-without-benchmarks-run:
    name: benchmark-documentation/generate-svgs-without-benchmarks-run
-    if: ${{ !(inputs.run-cpu-benchmarks || inputs.run-gpu-benchmarks || inputs.run-hpu-benchmarks) &&
+    if: ${{ !(inputs.run-cpu-benchmarks || inputs.run-gpu-integer-benchmarks || inputs.run-gpu-core-crypto-benchmarks || inputs.run-hpu-benchmarks) &&
      inputs.generate-svgs }}
    uses: ./.github/workflows/generate_svgs.yml
    with:
@@ -175,20 +180,22 @@ jobs:
      PATH_TO_DOC_ASSETS: tfhe/docs/.gitbook/assets
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'

      - name: Download SVG tables
-        uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
        with:
          path: svg_tables
          merge-multiple: 'true'

+      # Perform best effort to copy SVG tables. If the copy fails or files don't exist, the PR will still be created.
      - name: Copy SVG tables to documentation location
        run: |
-          cp -f svg_tables/*integer-benchmark*.svg "${PATH_TO_DOC_ASSETS}"
-          cp -f svg_tables/*pbs-benchmark-tuniform*.svg "${PATH_TO_DOC_ASSETS}"
+          cp -f svg_tables/*integer-benchmark*.svg "${PATH_TO_DOC_ASSETS}" 2>/dev/null
+          cp -f svg_tables/*pbs-benchmark-tuniform*.svg "${PATH_TO_DOC_ASSETS}" 2>/dev/null
+          cp -f svg_tables/cpu-gpu-hpu-integer-benchmark-fheuint64-tuniform-2m128-ciphertext.svg "${PATH_TO_DOC_ASSETS}" 2>/dev/null

      - name: Get current date
        id: get-date
@@ -196,7 +203,7 @@ jobs:
          echo "date=$(date '+%g_%m_%d_%Hh%Mm%Ss')" >> "${GITHUB_OUTPUT}"

      - name: Create pull-request
-        uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
+        uses: peter-evans/create-pull-request@98357b18bf14b5342f975ff684046ec3b2a07725 # v8.0.0
        with:
          sign-commits: true # Commit will be signed by github-actions bot
          add-paths: ${{ env.PATH_TO_DOC_ASSETS }}/*.svg
--- a/.github/workflows/benchmark_gpu.yml
+++ b/.github/workflows/benchmark_gpu.yml
@@ -1,6 +1,8 @@
 # Run CUDA benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
 name: benchmark_gpu

+run-name: ${{ inputs.command }}::${{ inputs.bench_type}} (${{ inputs.profile }}, ${{ inputs.op_flavor }}, ${{ inputs.precisions_set }}, ${{ inputs.params_type }})
+
 on:
  workflow_dispatch:
    inputs:
@@ -21,10 +23,9 @@ on:
      command:
        description: "Benchmark command to run"
        type: choice
-        default: integer_multi_bit
+        default: integer
        options:
          - integer
-          - integer_multi_bit
          - integer_compression
          - pbs
          - pbs128
--- a/.github/workflows/benchmark_gpu_4090.yml
+++ b/.github/workflows/benchmark_gpu_4090.yml
@@ -40,7 +40,7 @@ jobs:
    timeout-minutes: 1440 # 24 hours
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -63,7 +63,7 @@ jobs:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: zama-ai/slab
          path: slab
@@ -89,7 +89,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ github.sha }}_integer_multi_bit_gpu_default
          path: ${{ env.RESULTS_FILENAME }}
@@ -123,7 +123,7 @@ jobs:

    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -146,7 +146,7 @@ jobs:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: zama-ai/slab
          path: slab
@@ -173,7 +173,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_gpu_common.yml
+++ b/.github/workflows/benchmark_gpu_common.yml
@@ -175,7 +175,7 @@ jobs:
            gcc: 11
    steps:
      - name: Checkout tfhe-rs repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -209,7 +209,7 @@ jobs:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -281,13 +281,13 @@ jobs:
          BENCH_TYPE: ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: zama-ai/slab
          path: slab
--- a/.github/workflows/benchmark_gpu_coprocessor.yml
+++ b/.github/workflows/benchmark_gpu_coprocessor.yml
@@ -130,7 +130,7 @@ jobs:
          git lfs install

      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
        with:
          path: tfhe-rs
          persist-credentials: false
@@ -141,7 +141,7 @@ jobs:
          ls

      - name: Checkout fhevm
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
        with:
          repository: zama-ai/fhevm
          persist-credentials: 'false'
@@ -192,10 +192,10 @@ jobs:
          cargo install sqlx-cli

      - name: Install foundry
-        uses: foundry-rs/foundry-toolchain@50d5a8956f2e319df19e6b57539d7e2acb9f8c1e
+        uses: foundry-rs/foundry-toolchain@8b0419c685ef46cb79ec93fbdc131174afceb730

      - name: Cache cargo
-        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
+        uses: actions/cache@9255dc7a253b0ccc959486e2bca901246202afeb # v5.0.1
        with:
          path: |
            ~/.cargo/registry
@@ -223,7 +223,7 @@ jobs:
        working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker

      - name: Use Node.js
-        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
+        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0
        with:
          node-version: 20.x

@@ -262,7 +262,7 @@ jobs:
      - name: Upload profile artifact
        env:
          REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ env.REPORT_NAME }}
          path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}
@@ -293,13 +293,13 @@ jobs:
        working-directory: fhevm/

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
          path: fhevm/$${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: zama-ai/slab
          path: slab
--- a/.github/workflows/benchmark_hpu.yml
+++ b/.github/workflows/benchmark_hpu.yml
@@ -1,6 +1,8 @@
 # Run benchmarks on a permanent HPU instance and return parsed results to Slab CI bot.
 name: benchmark_hpu

+run-name: ${{ inputs.command }}::${{ inputs.bench_type}} (${{ inputs.op_flavor }}, ${{ inputs.precisions_set }})
+
 on:
  workflow_dispatch:
    inputs:
--- a/.github/workflows/benchmark_hpu_common.yml
+++ b/.github/workflows/benchmark_hpu_common.yml
@@ -126,7 +126,7 @@ jobs:
          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -185,13 +185,13 @@ jobs:
          BENCH_TYPE: ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ github.sha }}_${{ matrix.bench_type }}_integer_benchmarks
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: zama-ai/slab
          path: slab
--- a/.github/workflows/benchmark_perf_regression.yml
+++ b/.github/workflows/benchmark_perf_regression.yml
@@ -50,7 +50,7 @@ jobs:
      pull-requests: write # Needed to write a comment in a pull-request
    steps:
      - name: Checkout tfhe-rs repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
@@ -164,7 +164,7 @@ jobs:
            gcc: 11
    steps:
      - name: Checkout tfhe-rs repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
@@ -191,7 +191,7 @@ jobs:
        command: ${{ fromJson(needs.prepare-benchmarks.outputs.commands) }}
    steps:
      - name: Checkout tfhe-rs repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0  # Needed to get commit hash
          persist-credentials: 'false'
@@ -245,7 +245,7 @@ jobs:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: zama-ai/slab
          path: slab
@@ -280,7 +280,7 @@ jobs:
          BENCH_TYPE: ${{ env.__TFHE_RS_BENCH_TYPE }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ github.sha }}_regression_${{ env.RESULTS_FILE_SHA }} # RESULT_FILE_SHA is needed to avoid collision between matrix.command runs
          path: ${{ env.RESULTS_FILENAME }}
@@ -305,19 +305,19 @@ jobs:
      REF_NAME: ${{ github.head_ref || github.ref_name }}
    steps:
      - name: Checkout tfhe-rs repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Install recent Python
-        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
+        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
        with:
          python-version: '3.12'
+          pip-install: -r ci/data_extractor/requirements.txt -r ci/perf_regression/requirements.txt

      - name: Fetch data
        run: |
-          python3 -m pip install -r ci/data_extractor/requirements.txt
          python3 ci/data_extractor/src/data_extractor.py regression_data \
          --generate-regression-json \
          --regression-profiles ci/regression.toml \
@@ -330,13 +330,12 @@ jobs:
          REGRESSION_PROFILE: ${{ needs.prepare-benchmarks.outputs.selected-regression-profile }}
          TFHE_BACKEND: ${{ needs.prepare-benchmarks.outputs.tfhe-backend }}
          HARDWARE_NAME: ${{ needs.prepare-benchmarks.outputs.hardware-name }}
-          DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATABASE_HOST }}
-          DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATABASE_USER }}
-          DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATABASE_PASSWORD }}
+          DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
+          DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
+          DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}

      - name: Generate regression report
        run: |
-          python3 -m pip install -r ci/perf_regression/requirements.txt
          python3 ci/perf_regression/perf_regression.py check_regression \
          --results-file regression_data.json \
          --generate-report
--- a/.github/workflows/benchmark_tfhe_fft.yml
+++ b/.github/workflows/benchmark_tfhe_fft.yml
@@ -55,7 +55,7 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -96,13 +96,13 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ github.sha }}_fft
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: zama-ai/slab
          path: slab
--- a/.github/workflows/benchmark_tfhe_ntt.yml
+++ b/.github/workflows/benchmark_tfhe_ntt.yml
@@ -55,7 +55,7 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -96,13 +96,13 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ github.sha }}_ntt
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: zama-ai/slab
          path: slab
--- a/.github/workflows/benchmark_wasm_client.yml
+++ b/.github/workflows/benchmark_wasm_client.yml
@@ -39,7 +39,7 @@ jobs:
      wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -47,7 +47,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            wasm_bench:
@@ -91,7 +91,7 @@ jobs:
        browser: [ chrome, firefox ]
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -119,7 +119,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
+        uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
        with:
          path: |
            ~/.nvm
@@ -132,7 +132,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
+        uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -153,6 +153,12 @@ jobs:
        env:
          BROWSER: ${{ matrix.browser }}

+      - name: Run benchmarks (unsafe coop)
+        run: |
+          make bench_web_js_api_unsafe_coop_"${BROWSER}"_ci
+        env:
+          BROWSER: ${{ matrix.browser }}
+
      - name: Parse results
        run: |
          make parse_wasm_benchmarks
@@ -169,13 +175,13 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ github.sha }}_wasm_${{ matrix.browser }}
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: zama-ai/slab
          path: slab
--- a/.github/workflows/cargo_audit.yml
+++ b/.github/workflows/cargo_audit.yml
@@ -26,7 +26,7 @@ jobs:
    name: cargo_audit/audit
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/cargo_build.yml
+++ b/.github/workflows/cargo_build.yml
@@ -24,7 +24,7 @@ jobs:
    outputs:
      matrix_command: ${{ steps.set-pcc-commands-matrix.outputs.commands }}
    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: "false"
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/cargo_build_common.yml
+++ b/.github/workflows/cargo_build_common.yml
@@ -60,6 +60,8 @@ env:
  # Secrets will be available only to zama-ai organization members
  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"
+  LINELINT_VERSION: 0.0.6
+  LINELINT_CHECKSUM: "16b70fb7b471d6f95cbdc0b4e5dc2b0ac9e84ba9ecdc488f7bdf13df823aca4b"

 permissions:
  contents: read
@@ -138,7 +140,7 @@ jobs:
      result: ${{ steps.set_builds_result.outputs.result }}
    steps:
      - name: Checkout tfhe-rs repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -148,6 +150,15 @@ jobs:
        with:
          toolchain: stable

+      - name: Install newline linter
+        if: inputs.run-build && matrix.runner == env.EXTERNAL_CONTRIBUTION_RUNNER
+        run: |
+          wget "https://github.com/fernandrone/linelint/releases/download/${LINELINT_VERSION}/linelint-linux-amd64"
+          echo "${LINELINT_CHECKSUM} linelint-linux-amd64" > checksum
+          sha256sum -c checksum
+          chmod +x linelint-linux-amd64
+          ln -s "$(pwd)/linelint-linux-amd64" /usr/local/bin/linelint
+
      - name: Run pcc checks batch
        if: inputs.run-pcc-cpu-batch
        run: |
--- a/.github/workflows/cargo_build_tfhe_fft.yml
+++ b/.github/workflows/cargo_build_tfhe_fft.yml
@@ -26,7 +26,7 @@ jobs:
      fail-fast: false

    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/cargo_build_tfhe_ntt.yml
+++ b/.github/workflows/cargo_build_tfhe_ntt.yml
@@ -24,7 +24,7 @@ jobs:
        os: [ubuntu-latest, macos-latest, windows-latest]
      fail-fast: false
    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/cargo_test_fft.yml
+++ b/.github/workflows/cargo_test_fft.yml
@@ -29,7 +29,7 @@ jobs:
      fft_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.fft_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -37,7 +37,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            fft:
@@ -56,7 +56,7 @@ jobs:
        runner_type: [ ubuntu-latest, macos-latest, windows-latest ]
      fail-fast: false
    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -92,7 +92,7 @@ jobs:
    if: needs.should-run.outputs.fft_test == 'true'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/cargo_test_ntt.yml
+++ b/.github/workflows/cargo_test_ntt.yml
@@ -31,7 +31,7 @@ jobs:
      ntt_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ntt_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: "false"
@@ -39,7 +39,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            ntt:
@@ -87,7 +87,7 @@ jobs:
        os: ${{fromJson(needs.setup-instance.outputs.matrix_os)}}
      fail-fast: false
    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: "false"
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/ci_lint.yml
+++ b/.github/workflows/ci_lint.yml
@@ -20,7 +20,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -43,14 +43,14 @@ jobs:
          echo "version=$(make zizmor_version)" >> "${GITHUB_OUTPUT}"

      - name: Check workflows security
-        uses: zizmorcore/zizmor-action@e673c3917a1aef3c65c972347ed84ccd013ecda4 # v0.2.0
+        uses: zizmorcore/zizmor-action@e639db99335bc9038abc0e066dfcd72e23d26fb4 # v0.3.0
        with:
          advanced-security: 'false' # Print results directly in logs
          persona: pedantic
          version: ${{ steps.get_zizmor.outputs.version }}

      - name: Ensure SHA pinned actions
-        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@9e9574ef04ea69da568d6249bd69539ccc704e74 # v4.0.0
+        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@6124774845927d14c601359ab8138699fa5b70c3 # v4.0.1
        with:
          allowlist: |
            slsa-framework/slsa-github-generator
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -50,7 +50,7 @@ jobs:
    timeout-minutes: 5760 # 4 days
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -62,7 +62,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            tfhe:
@@ -92,7 +92,7 @@ jobs:
          make test_shortint_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7
+        uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -106,7 +106,7 @@ jobs:
          make test_integer_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7
+        uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
--- a/.github/workflows/csprng_randomness_tests.yml
+++ b/.github/workflows/csprng_randomness_tests.yml
@@ -62,7 +62,7 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/generate_svg_common.yml
+++ b/.github/workflows/generate_svg_common.yml
@@ -5,22 +5,20 @@ on:
    inputs:
      backend:
        type: string
-        required: true
      hardware_name:
        type: string
-        required: true
      layer:
        type: string
-        required: true
      pbs_kind: # Valid values are 'classical', 'multi_bit' or 'any'
        type: string
-        required: true
      grouping_factor: # Valid values are 2, 3, or 4
        type: string
        default: 4
      bench_type: # Valid values are 'latency', 'throughput'
        type: string
-        required: true
+      backend_comparison:
+        type: boolean
+        default: false
      time_span_days:
        type: string
        default: 60
@@ -45,11 +43,12 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'

      - name: Produce table from database
+        if: inputs.backend_comparison == false
        run: |
          python3 -m pip install -r ci/data_extractor/requirements.txt
          python3 ci/data_extractor/src/data_extractor.py "${OUTPUT_FILENAME}" \
@@ -77,9 +76,34 @@ jobs:
          DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}

      - name: Upload tables
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        if: inputs.backend_comparison == false
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: ${{ github.sha }}_${{ inputs.backend }}_${{ inputs.layer }}_${{ inputs.pbs_kind }}_${{ inputs.bench_type }}_tables
          # This will upload all the file generated
          path: ${{ inputs.output_filename }}*.svg
          retention-days: 60
+
+      - name: Produce backends comparison table from database
+        if: inputs.backend_comparison == true
+        run: |
+          python3 -m pip install -r ci/data_extractor/requirements.txt
+          python3 ci/data_extractor/src/data_extractor.py "${OUTPUT_FILENAME}" \
+          --generate-svg \
+          --backends-comparison \
+          --time-span-days "${TIME_SPAN}"
+        env:
+          OUTPUT_FILENAME: ${{ inputs.output_filename }}
+          TIME_SPAN: ${{ inputs.time_span_days }}
+          DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
+          DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
+          DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
+
+      - name: Upload comparison tables
+        if: inputs.backend_comparison == true
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        with:
+          name: ${{ github.sha }}_backends_comparison_tables
+          # This will upload all the file generated
+          path: ${{ inputs.output_filename }}*.svg
+          retention-days: 60
--- a/.github/workflows/generate_svgs.yml
+++ b/.github/workflows/generate_svgs.yml
@@ -137,6 +137,19 @@ jobs:
      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}

+  backend-comparison-latency-table:
+    name: generate_documentation_svgs/backend-comparison-latency-table
+    uses: ./.github/workflows/generate_svg_common.yml
+    if: inputs.generate-cpu-svgs && inputs.generate-gpu-svgs && inputs.generate-hpu-svgs
+    with:
+      backend_comparison: true
+      time_span_days: ${{ inputs.time_span_days }}
+      output_filename: cpu-gpu-hpu-integer-benchmark-fheuint64-tuniform-2m128-ciphertext
+    secrets:
+      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
+      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
+      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
+
  # -----------------------------------------------------------
  # PBS benchmarks tables
  # -----------------------------------------------------------
--- a/.github/workflows/gpu_4090_tests.yml
+++ b/.github/workflows/gpu_4090_tests.yml
@@ -41,7 +41,7 @@ jobs:

    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_code_validation_tests.yml
+++ b/.github/workflows/gpu_code_validation_tests.yml
@@ -23,8 +23,8 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  schedule:
-    # every 3 months
-    - cron: "0 0 1 */3 *"
+    # every month
+    - cron: "0 0 1 * *"

 permissions:
  contents: read
@@ -50,7 +50,7 @@ jobs:
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
          backend: hyperstack
-          profile: gpu-test
+          profile: single-h100

      # This instance will be spawned especially for pull-request from forked repository
      - name: Start GitHub instance
@@ -68,7 +68,7 @@ jobs:
      group: ${{ github.workflow_ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
-    timeout-minutes: 5760
+    timeout-minutes: 14400
    strategy:
      fail-fast: false
      # explicit include-based build matrix, of known valid options
@@ -79,7 +79,7 @@ jobs:
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_fast_h100_tests.yml
+++ b/.github/workflows/gpu_fast_h100_tests.yml
@@ -40,7 +40,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            gpu:
@@ -129,7 +129,7 @@ jobs:
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_fast_tests.yml
+++ b/.github/workflows/gpu_fast_tests.yml
@@ -39,7 +39,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -47,7 +47,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            gpu:
@@ -114,7 +114,7 @@ jobs:
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_full_h100_tests.yml
+++ b/.github/workflows/gpu_full_h100_tests.yml
@@ -68,7 +68,7 @@ jobs:
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_full_multi_gpu_tests.yml
+++ b/.github/workflows/gpu_full_multi_gpu_tests.yml
@@ -40,7 +40,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            gpu:
@@ -116,7 +116,7 @@ jobs:
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_integer_long_run_tests.yml
+++ b/.github/workflows/gpu_integer_long_run_tests.yml
@@ -65,7 +65,7 @@ jobs:
    timeout-minutes: 4320 # 72 hours
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_memory_sanitizer.yml
+++ b/.github/workflows/gpu_memory_sanitizer.yml
@@ -78,7 +78,7 @@ jobs:
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_memory_sanitizer_h100.yml
+++ b/.github/workflows/gpu_memory_sanitizer_h100.yml
@@ -78,7 +78,7 @@ jobs:
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_pcc.yml
+++ b/.github/workflows/gpu_pcc.yml
@@ -74,7 +74,7 @@ jobs:

    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_signed_integer_classic_tests.yml
+++ b/.github/workflows/gpu_signed_integer_classic_tests.yml
@@ -40,7 +40,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            gpu:
@@ -116,7 +116,7 @@ jobs:
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_signed_integer_h100_tests.yml
+++ b/.github/workflows/gpu_signed_integer_h100_tests.yml
@@ -40,7 +40,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            gpu:
@@ -129,7 +129,7 @@ jobs:
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_signed_integer_tests.yml
+++ b/.github/workflows/gpu_signed_integer_tests.yml
@@ -41,7 +41,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -49,7 +49,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            gpu:
@@ -117,7 +117,7 @@ jobs:
            gcc: 11
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_unsigned_integer_classic_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_classic_tests.yml
@@ -40,7 +40,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            gpu:
@@ -116,7 +116,7 @@ jobs:
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_unsigned_integer_h100_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_h100_tests.yml
@@ -40,7 +40,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            gpu:
@@ -129,7 +129,7 @@ jobs:
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/gpu_unsigned_integer_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_tests.yml
@@ -41,7 +41,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -49,7 +49,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            gpu:
@@ -117,7 +117,7 @@ jobs:
            gcc: 11
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/hpu_hlapi_tests.yml
+++ b/.github/workflows/hpu_hlapi_tests.yml
@@ -32,7 +32,7 @@ jobs:
      hpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.hpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -40,7 +40,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
        with:
          files_yaml: |
            hpu:
@@ -83,7 +83,7 @@ jobs:
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/integer_long_run_tests.yml
+++ b/.github/workflows/integer_long_run_tests.yml
@@ -53,7 +53,7 @@ jobs:
    timeout-minutes: 4320 # 72 hours
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
--- a/.github/workflows/m1_tests.yml
+++ b/.github/workflows/m1_tests.yml
@@ -41,7 +41,7 @@ jobs:
    timeout-minutes: 720

    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: "false"
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/make_release_common.yml
+++ b/.github/workflows/make_release_common.yml
@@ -52,7 +52,7 @@ jobs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -62,7 +62,7 @@ jobs:
          PACKAGE: ${{ inputs.package-name }}
        run: |
          cargo package -p "${PACKAGE}"
-      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
        with:
          name: crate-${{ inputs.package-name }}
          path: target/package/*.crate
@@ -93,14 +93,14 @@ jobs:
      id-token: write # Needed for OIDC token exchange on crates.io
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
        with:
          fetch-depth: 0
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Download artifact
-        uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
        with:
          name: crate-${{ inputs.package-name }}
          path: target/package
--- a/.github/workflows/make_release_cuda.yml
+++ b/.github/workflows/make_release_cuda.yml
@@ -64,7 +64,7 @@ jobs:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
        with:
          fetch-depth: 0
          persist-credentials: "false"
@@ -104,7 +104,7 @@ jobs:
        run: |
          cargo package -p tfhe-cuda-backend

-      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
        with:
          name: crate-tfhe-cuda-backend
          path: target/package/*.crate
@@ -174,7 +174,7 @@ jobs:
          GCC_VERSION: ${{ matrix.gcc }}

      - name: Download artifact
-        uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
        with:
          name: crate-tfhe-cuda-backend
          path: target/package
--- a/.github/workflows/make_release_tfhe.yml
+++ b/.github/workflows/make_release_tfhe.yml
@@ -41,6 +41,7 @@ jobs:
  make-release:
    name: make_release_tfhe/make-release
    uses: ./.github/workflows/make_release_common.yml
+    if: ${{ inputs.push_to_crates }}
    with:
      package-name: "tfhe"
      dry-run: ${{ inputs.dry_run }}
@@ -59,6 +60,7 @@ jobs:
  make-release-js:
    name: make_release_tfhe/make-release-js
    needs: make-release
+    if: ${{ always() && needs.make-release.result != 'failure' }}
    runs-on: ubuntu-latest
    # For provenance of npmjs publish
    permissions:
@@ -66,7 +68,7 @@ jobs:
      id-token: write # also needed for OIDC token exchange on crates.io and npmjs.com
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -83,9 +85,9 @@ jobs:
          make build_web_js_api_parallel

      - name: Authenticate on NPM
-        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
+        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0
        with:
-          node-version: '22'
+          node-version: '24'
          registry-url: 'https://registry.npmjs.org'

      - name: Publish web package
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -5,6 +5,9 @@ env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"

 on:
  pull_request:
@@ -30,10 +33,11 @@ jobs:
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
      - name: Start remote instance
        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
        with:
          mode: start
@@ -43,13 +47,20 @@ jobs:
          backend: aws
          profile: cpu-small

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
+
  params-curves-security-check:
    name: parameters_check/params-curves-security-check
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
@@ -60,7 +71,7 @@ jobs:
          toolchain: stable

      - name: Checkout lattice-estimator
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          repository: malb/lattice-estimator
          path: lattice_estimator
--- a/.github/workflows/unverified_prs.yml
+++ b/.github/workflows/unverified_prs.yml
@@ -17,7 +17,7 @@ jobs:
      issues: read # Needed to fetch all issues
      pull-requests: write # Needed to write message and close the PR
    steps:
-      - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
+      - uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # v10.1.1
        with:
          stale-pr-message: 'This PR is unverified and has been open for 2 days, it will now be closed. If you want to contribute please sign the CLA as indicated by the bot.'
          days-before-stale: 2
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ target/
 **/*.rmeta
 **/Cargo.lock
 **/*.bin
+**/.DS_Store

 # Some of our bench outputs
 /tfhe/benchmarks_parameters
--- a/10
+++ b/10
@@ -11,7 +11,7 @@
 /tfhe/src/core_crypto/gpu               @agnesLeroy
 /tfhe/src/core_crypto/hpu               @zama-ai/hardware

-/tfhe/src/shortint/                     @mayeul-zama
+/tfhe/src/shortint/                     @mayeul-zama @nsarlin-zama

 /tfhe/src/integer/                      @tmontaigu
 /tfhe/src/integer/gpu                   @agnesLeroy
@@ -19,10 +19,18 @@

 /tfhe/src/high_level_api/               @tmontaigu

+/tfhe-zk-pok/                           @nsarlin-zama
+
+/tfhe-benchmark/                        @soonum
+
+/utils/                                 @nsarlin-zama
+
 /Makefile                               @IceTDrinker @soonum

 /mockups/tfhe-hpu-mockup                @zama-ai/hardware

 /.github/                               @soonum
+/ci/                                    @soonum
+/scripts/                               @soonum

 /CODEOWNERS                             @IceTDrinker
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,7 +27,7 @@ rust-version = "1.91.1"
 [workspace.dependencies]
 aligned-vec = { version = "0.6", default-features = false }
 bytemuck = "1.24"
-dyn-stack = { version = "0.11", default-features = false }
+dyn-stack = { version = "0.13", default-features = false }
 itertools = "0.14"
 num-complex = "0.4"
 pulp = { version = "0.22", default-features = false }
@@ -36,6 +36,8 @@ rayon = "1.11"
 serde = { version = "1.0", default-features = false }
 wasm-bindgen = "0.2.101"
 getrandom = "0.2.8"
+# The project maintainers consider that this is the last version of the 1.3 branch, any newer version should not be trusted
+bincode = "=1.3.3"

 [profile.bench]
 lto = "fat"
--- a/90
+++ b/90
@@ -20,7 +20,7 @@ BENCH_TYPE?=latency
 BENCH_PARAM_TYPE?=classical
 BENCH_PARAMS_SET?=default
 BENCH_CUSTOM_COMMAND:=
-NODE_VERSION=22.6
+NODE_VERSION=24.12
 BACKWARD_COMPAT_DATA_DIR=utils/tfhe-backward-compat-data
 BACKWARD_COMPAT_DATA_GEN_VERSION:=$(TFHE_VERSION)
 TEST_VECTORS_DIR=apps/test-vectors
@@ -559,9 +559,9 @@ check_rust_bindings_did_not_change:

 .PHONY: tfhe_lints # Run custom tfhe-rs lints
 tfhe_lints: install_cargo_dylint
-	RUSTFLAGS="$(RUSTFLAGS)" cargo dylint --all -p tfhe --no-deps -- \
+	RUSTFLAGS="$(RUSTFLAGS) -Dwarnings" cargo dylint --all -p tfhe --no-deps -- \
 		--features=boolean,shortint,integer,strings,zk-pok
-	RUSTFLAGS="$(RUSTFLAGS)" cargo dylint --all -p tfhe-zk-pok --no-deps -- \
+	RUSTFLAGS="$(RUSTFLAGS) -Dwarnings" cargo dylint --all -p tfhe-zk-pok --no-deps -- \
 		--features=experimental

 .PHONY: audit_dependencies # Run cargo audit to check vulnerable dependencies
@@ -996,6 +996,15 @@ test_noise_check:
 		--features=boolean,shortint,integer -p tfhe -- noise_check \
 		--test-threads=1 --nocapture

+.PHONY: test_noise_check_gpu # Run dedicated noise and pfail check tests on gpu backend
+test_noise_check_gpu:
+	@# First run the sanity checks to make sure the atomic patterns are correct
+	RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
+		--features=boolean,shortint,integer,gpu -p tfhe -- gpu_sanity_check
+	RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
+		--features=boolean,shortint,integer,gpu -p tfhe -- gpu_noise_check \
+		--test-threads=1 --nocapture
+
 .PHONY: test_safe_serialization # Run the tests for safe serialization
 test_safe_serialization: install_cargo_nextest
 	RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
@@ -1237,6 +1246,10 @@ check_intra_md_links: install_mlc
 check_md_links: install_mlc
 	mlc --match-file-extension tfhe/docs

+.PHONY: check_main_readme_links # Check main README links
+check_main_readme_links: install_mlc
+	mlc README.md
+
 .PHONY: check_doc_paths_use_dash # Check paths use "-" instead of "_" in docs for gitbook compatibility
 check_doc_paths_use_dash:
 	python3 ./scripts/check_doc_paths_use_dash.py
@@ -1287,13 +1300,14 @@ run_web_js_api_parallel: build_web_js_api_parallel setup_venv
 	--browser-path $(browser_path) \
 	--driver-path $(driver_path) \
 	--browser-kind  $(browser_kind) \
-	--server-cmd "npm run server" \
+	--server-cmd $(server_cmd) \
 	--server-workdir "$(WEB_SERVER_DIR)" \
 	--id-pattern $(filter)

 test_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
 test_web_js_api_parallel_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
 test_web_js_api_parallel_chrome: browser_kind = chrome
+test_web_js_api_parallel_chrome: server_cmd = "npm run server:multithreaded"
 test_web_js_api_parallel_chrome: filter = Test

 .PHONY: test_web_js_api_parallel_chrome # Run tests for the web wasm api on Chrome
@@ -1309,6 +1323,7 @@ test_web_js_api_parallel_chrome_ci: setup_venv
 test_web_js_api_parallel_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
 test_web_js_api_parallel_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
 test_web_js_api_parallel_firefox: browser_kind = firefox
+test_web_js_api_parallel_firefox: server_cmd = "npm run server:multithreaded"
 test_web_js_api_parallel_firefox: filter = Test

 .PHONY: test_web_js_api_parallel_firefox # Run tests for the web wasm api on Firefox
@@ -1339,7 +1354,6 @@ dieharder_csprng: install_dieharder build_tfhe_csprng

 .PHONY: clippy_bench # Run clippy lints on tfhe-benchmark
 clippy_bench: install_rs_check_toolchain
-	! (grep --recursive "trivial" tfhe-benchmark && echo "trivial found in benches")
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
 		-p tfhe-benchmark -- --no-deps -D warnings
@@ -1559,6 +1573,7 @@ bench_pbs128_gpu: install_rs_check_toolchain
 bench_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
 bench_web_js_api_parallel_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
 bench_web_js_api_parallel_chrome: browser_kind = chrome
+bench_web_js_api_parallel_chrome: server_cmd = "npm run server:multithreaded"
 bench_web_js_api_parallel_chrome: filter = Bench

 .PHONY: bench_web_js_api_parallel_chrome # Run benchmarks for the web wasm api
@@ -1574,6 +1589,7 @@ bench_web_js_api_parallel_chrome_ci: setup_venv
 bench_web_js_api_parallel_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
 bench_web_js_api_parallel_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
 bench_web_js_api_parallel_firefox: browser_kind = firefox
+bench_web_js_api_parallel_firefox: server_cmd = "npm run server:multithreaded"
 bench_web_js_api_parallel_firefox: filter = Bench

 .PHONY: bench_web_js_api_parallel_firefox # Run benchmarks for the web wasm api
@@ -1586,15 +1602,49 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
 	nvm use $(NODE_VERSION) && \
 	$(MAKE) bench_web_js_api_parallel_firefox

+bench_web_js_api_unsafe_coop_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
+bench_web_js_api_unsafe_coop_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
+bench_web_js_api_unsafe_coop_chrome: browser_kind = chrome
+bench_web_js_api_unsafe_coop_chrome: server_cmd = "npm run server:unsafe-coop"
+bench_web_js_api_unsafe_coop_chrome: filter = ZeroKnowledgeBench # Only bench zk with unsafe coop
+
+.PHONY: bench_web_js_api_unsafe_coop_chrome # Run benchmarks for the web wasm api without cross-origin isolation
+bench_web_js_api_unsafe_coop_chrome: run_web_js_api_parallel
+
+.PHONY: bench_web_js_api_unsafe_coop_chrome_ci # Run benchmarks for the web wasm api without cross-origin isolation
+bench_web_js_api_unsafe_coop_chrome_ci: setup_venv
+	source ~/.nvm/nvm.sh && \
+	nvm install $(NODE_VERSION) && \
+	nvm use $(NODE_VERSION) && \
+	$(MAKE) bench_web_js_api_unsafe_coop_chrome
+
+bench_web_js_api_unsafe_coop_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
+bench_web_js_api_unsafe_coop_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
+bench_web_js_api_unsafe_coop_firefox: browser_kind = firefox
+bench_web_js_api_unsafe_coop_firefox: server_cmd = "npm run server:unsafe-coop"
+bench_web_js_api_unsafe_coop_firefox: filter = ZeroKnowledgeBench # Only bench zk with unsafe coop
+
+.PHONY: bench_web_js_api_unsafe_coop_firefox # Run benchmarks for the web wasm api without cross-origin isolation
+bench_web_js_api_unsafe_coop_firefox: run_web_js_api_parallel
+
+.PHONY: bench_web_js_api_unsafe_coop_firefox_ci # Run benchmarks for the web wasm api without cross-origin isolation
+bench_web_js_api_unsafe_coop_firefox_ci: setup_venv
+	source ~/.nvm/nvm.sh && \
+	nvm install $(NODE_VERSION) && \
+	nvm use $(NODE_VERSION) && \
+	$(MAKE) bench_web_js_api_unsafe_coop_firefox
+
 .PHONY: bench_hlapi # Run benchmarks for integer operations
 bench_hlapi: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi \
 	--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --

 .PHONY: bench_hlapi_gpu # Run benchmarks for integer operations on GPU
 bench_hlapi_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi \
 	--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --

@@ -1603,6 +1653,7 @@ bench_hlapi_hpu: install_rs_check_toolchain
 	source ./setup_hpu.sh --config $(HPU_CONFIG); \
 	export V80_PCIE_DEV=${V80_PCIE_DEV}; \
 	RUSTFLAGS="$(RUSTFLAGS)" \
+	__TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi \
 	--features=integer,internal-keycache,hpu,hpu-v80,pbs-stats -p tfhe-benchmark --
@@ -1616,7 +1667,14 @@ bench_hlapi_erc20: install_rs_check_toolchain

 .PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ERC20 operations on GPU
 bench_hlapi_erc20_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
+    cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench hlapi-erc20 \
+	--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
+
+.PHONY: bench_hlapi_erc20_gpu_classical # Run benchmarks for ERC20 operations on GPU with classical parameters
+bench_hlapi_erc20_gpu_classical: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_PARAM_TYPE=classical \
    cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-erc20 \
 	--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
@@ -1630,7 +1688,14 @@ bench_hlapi_dex: install_rs_check_toolchain

 .PHONY: bench_hlapi_dex_gpu # Run benchmarks for DEX operations on GPU
 bench_hlapi_dex_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE)  __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench hlapi-dex \
+	--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
+
+.PHONY: bench_hlapi_dex_gpu_classical # Run benchmarks for DEX operations on GPU with classical parameters
+bench_hlapi_dex_gpu_classical: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE)  __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-dex \
 	--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
@@ -1651,14 +1716,14 @@ bench_tfhe_zk_pok: install_rs_check_toolchain

 .PHONY: bench_hlapi_noise_squash # Run benchmarks for noise squash operation
 bench_hlapi_noise_squash: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-noise-squash \
 	--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --

 .PHONY: bench_hlapi_noise_squash_gpu # Run benchmarks for noise squash operation on GPU
 bench_hlapi_noise_squash_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-noise-squash \
 	--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
@@ -1854,8 +1919,7 @@ fpcc:
 	$(call run_recipe_with_details,check_md_docs_are_tested)
 	$(call run_recipe_with_details,check_intra_md_links)
 	$(call run_recipe_with_details,check_doc_paths_use_dash)
-	$(call run_recipe_with_details,clippy_fast)
-	$(call run_recipe_with_details,check_compile_tests)
+	$(call run_recipe_with_details,check_main_readme_links)

 .PHONY: conformance # Automatically fix problems that can be fixed
 conformance: fix_newline fmt fmt_js
--- a/README.md
+++ b/README.md
@@ -170,9 +170,9 @@ A document containing scientific and technical details about algorithms implemen
 <br></br>

 ### Tutorials
- [[Video tutorial] Implement signed integers using TFHE-rs ](https://www.zama.ai/post/video-tutorial-implement-signed-integers-ssing-tfhe-rs)
- [Homomorphic parity bit](https://docs.zama.ai/tfhe-rs/tutorials/parity_bit)
- [Homomorphic case changing on Ascii string](https://docs.zama.ai/tfhe-rs/tutorials/ascii_fhe_string)
+- [[Video tutorial] Implement signed integers using TFHE-rs ](https://www.zama.ai/post/video-tutorial-implement-signed-integers-sing-tfhe-rs)
+- [Homomorphic parity bit](https://docs.zama.ai/tfhe-rs/tutorials/parity-bit)
+- [Homomorphic case changing on Ascii string](https://docs.zama.ai/tfhe-rs/tutorials/ascii-fhe-string)
 - [Boolean SHA256 with TFHE-rs](https://www.zama.ai/post/boolean-sha256-tfhe-rs)
 - [Dark market with TFHE-rs](https://www.zama.ai/post/dark-market-tfhe-rs)
 - [Regular expression engine with TFHE-rs](https://www.zama.ai/post/regex-engine-tfhe-rs)
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -0,0 +1,14 @@
+# Security Policy
+
+We take security seriously. If you discover a vulnerability, please follow the guidelines below to report it to us responsibly.
+
+## Reporting a Vulnerability
+
+If you find a security-related bug in this project, we kindly ask you for responsible disclosure and for giving us
+appropriate time to react, analyze and develop a fix to mitigate the found security vulnerability.
+
+Please report any vulnerability privately using the [GitHub security advisory report](https://github.com/zama-ai/tfhe-rs/security/advisories/new).
+
+## Recognition
+
+We appreciate and acknowledge responsible reporters publicly (unless requested otherwise) in our security advisories and contributors list.
--- a/apps/test-vectors/Cargo.toml
+++ b/apps/test-vectors/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tfhe-test-vectors"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2024"
 rust-version.workspace = true

--- a/apps/test-vectors/checksums.sha256
+++ b/apps/test-vectors/checksums.sha256
@@ -1,28 +1,36 @@
-2abec9dc5d399ece68ac227f67f7cafcb9a6acac36ed734fe3d5244021eb1cda  data/toy_params/glwe_after_spec_br.cbor
+08f31a47c29cc4d72ad32c0b5411fa20b3deef5b84558dd2fb892d3cdf90528a  data/toy_params/glwe_after_id_br_karatsuba.cbor
+29b6e3e7d27700004b70dca24d225816500490e2d6ee49b9af05837fd421896b  data/valid_params_128/lwe_after_spec_pbs.cbor
 2c70d1d78cc3760733850a353ace2b9c4705e840141b75841739e90e51247e18  data/valid_params_128/small_lwe_secret_key.cbor
+2fb4bb45c259b8383da10fc8f9459c40a6972c49b1696eb107f0a75640724be5  data/toy_params/lwe_after_id_pbs_karatsuba.cbor
 36c9080b636475fcacca503ce041bbfeee800fd3e1890dee559ea18defff9fe8  data/toy_params/glwe_after_id_br.cbor
 377761beeb4216cf5aa2624a8b64b8259f5a75c32d28e850be8bced3a0cdd6f5  data/toy_params/ksk.cbor
 59dba26d457f96478eda130cab5301fce86f23c6a8807de42f2a1e78c4985ca7  data/valid_params_128/lwe_ks.cbor
+5d80dd93fefae4f4f89484dfcd65bbe99cc32e7e3b0a90c33dd0d77516c0a023  data/valid_params_128/glwe_after_id_br_karatsuba.cbor
 656f0009c7834c5bcb61621e222047516054b9bc5d0593d474ab8f1c086b67a6  data/valid_params_128/lwe_after_id_pbs.cbor
 699580ca92b9c2f9e1f57fb1e312c9e8cb29714f7acdef9d2ba05f798546751f  data/toy_params/lwe_sum.cbor
 6e54ab41056984595b077baff70236d934308cf5c0c33b4482fbfb129b3756c6  data/valid_params_128/glwe_after_id_br.cbor
 70f5e5728822de05b49071efb5ec28551b0f5cc87aa709a455d8e7f04b9c96ee  data/toy_params/lwe_after_id_pbs.cbor
+76a5c52cab7fec1dc167da676c6cd39479cda6b2bb9f4e0573cb7d99c2692faa  data/valid_params_128/lwe_after_id_pbs_karatsuba.cbor
 7cc6803f5fbc3d5a1bf597f2b979ce17eecd3d6baca12183dea21022a7b65c52  data/toy_params/bsk.cbor
 7f3c40a134623b44779a556212477fea26eaed22450f3b6faeb8721d63699972  data/valid_params_128/lwe_sum.cbor
 837b3bd3245d4d0534ed255fdef896fb4fa6998a258a14543dfdadd0bfc9b6dd  data/toy_params/lwe_prod.cbor
-8ee68ed99dd9103fb62b1e2c7c8cf483706ae2071b792d4bd16f9f93f64871f9  data/toy_params/lwe_after_spec_pbs.cbor
-99a19c5d6d5f4fd81d9164d0ff96719ef362eabda256bce6a55cba6cb69e42bf  data/valid_params_128/glwe_after_spec_br.cbor
+9ece8ca9c1436258b94e8c5e629b8722f9b18fdd415dd5209b6167a9dde8491c  data/toy_params/glwe_after_spec_br_karatsuba.cbor
 aa44aea29efd6d9e4d35a21a625d9cba155672e3f7ed3eddee1e211e62ad146b  data/valid_params_128/lwe_ms.cbor
 b7a037b9eaa88d6385167579b93e26a0cb6976d9b8967416fd1173e113bda199  data/valid_params_128/large_lwe_secret_key.cbor
+b7b8e3586128887bd682120f3e3a43156139bce5e3fe0b03284f8753a864d647  data/toy_params/lwe_after_spec_pbs_karatsuba.cbor
+bd00a8ae7494e400de5753029552ee1647efe7e17409b863a26a13b081099b8c  data/toy_params/lwe_after_spec_pbs.cbor
 c6df98676de04fe54b5ffc2eb30a82ebb706c9d7d5a4e0ed509700fec88761f7  data/toy_params/lwe_ms.cbor
 c7d5a864d5616a7d8ad50bbf40416e41e6c9b60c546dc14d4aa8fc40a418baa7  data/toy_params/large_lwe_secret_key.cbor
 c806533b325b1009db38be2f9bef5f3b2fad6b77b4c71f2855ccc9d3b4162e98  data/valid_params_128/lwe_b.cbor
 c9eb75bd2993639348a679cf48c06e3c38d1a513f48e5b0ce0047cea8cff6bbc  data/toy_params/lwe_a.cbor
+d3391969acf26dc69de0927ba279139d8d79999944069addc8ff469ad6c5ae2d  data/valid_params_128/lwe_after_spec_pbs_karatsuba.cbor
 d6da5baef0e787f6be56e218d8354e26904652602db964844156fdff08350ce6  data/toy_params/lwe_ks.cbor
-e44ffa6e5a50a03d32721180a051c8ce62f1791d4853aeaebed0200c183a57cf  data/valid_params_128/lwe_after_spec_pbs.cbor
 e591ab9af1b6a0aede273f9a3abb65a4c387feb5fa06a6959e9314058ca0f7e5  data/valid_params_128/ksk.cbor
+e59b002df3a9b01ad321ec51cf076fa35131ab9dbef141d1c54b717d61426c92  data/valid_params_128/glwe_after_spec_br_karatsuba.cbor
+e628354c81508a2d888016e8282df363dd12f1e19190b6475d4eb9d7ab8ae007  data/valid_params_128/glwe_after_spec_br.cbor
 e69d2d2c064fc8c0460b39191ca65338146990349954f5ec5ebd01d93610e7eb  data/valid_params_128/lwe_a.cbor
 e76c24b2a0c9a842ad13dda35473c2514f9e7d20983b5ea0759c4521a91626d9  data/valid_params_128/lwe_prod.cbor
 e9afe7019acba5cda926f13e06df9930571611729d2f2e8ce41956e1f5e1db6f  data/valid_params_128/bsk.cbor
 eadf2eff35133ffba075df11faecddd6e7af9ddc398011ec4568e5528812b3e2  data/toy_params/lwe_b.cbor
 ee9fcf45f1379ca3a7d7bf2b0e7a1cc920ceb496c0217e8604b0b58d2831749e  data/toy_params/small_lwe_secret_key.cbor
+f7a89ac440def1c8aa3f42ecc41813632509df38cd4544d1a507711831e629bd  data/toy_params/glwe_after_spec_br.cbor
--- a/apps/test-vectors/data/README.md
+++ b/apps/test-vectors/data/README.md
@@ -1,5 +1,5 @@
 # Test vectors for TFHE
-These test vectors are generated using [TFHE-rs](https://github.com/zama-ai/tfhe-rs), with the git tag `tfhe-test-vectors-0.1.0`.
+These test vectors are generated using [TFHE-rs](https://github.com/zama-ai/tfhe-rs), with the git tag `tfhe-test-vectors-0.2.0`.

 They are TFHE-rs objects serialized in the [cbor format](https://cbor.io/). You can deserialize them using any cbor library for the language of your choice. For example, using the [cbor2](https://pypi.org/project/cbor2/) program, run: `cbor2 --pretty toy_params/lwe_a.cbor`.

@@ -39,6 +39,9 @@ The following values are generated:
 | `glwe_after_spec_br` | The glwe returned by the application of the spec blind rotation on the mod switched ciphertexts.             | `GlweCiphertext<Vec<u64>>` | rot spec LUT |
 | `lwe_after_spec_pbs` | The lwe returned by the application of the sample extract operation on the output of the spec blind rotation | `LweCiphertext<Vec<u64>>`  | `spec(A)`    |

+Ciphertexts with the `_karatsuba` suffix are generated using the Karatsuba polynomial multiplication algorithm in the blind rotation, while default ciphertexts are generated using an FFT multiplication.
+This makes it easier to reproduce bit exact results.
+
 ### Encodings
 #### Non native encoding
 Warning: TFHE-rs uses a specific encoding for non native (ie: u32, u64) power of two ciphertext modulus. This encoding puts the encoded value in the high bits of the native integer.
--- a/apps/test-vectors/data/toy_params/glwe_after_id_br_karatsuba.cbor
+++ b/apps/test-vectors/data/toy_params/glwe_after_id_br_karatsuba.cbor
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08f31a47c29cc4d72ad32c0b5411fa20b3deef5b84558dd2fb892d3cdf90528a
+size 4679
--- a/apps/test-vectors/data/toy_params/glwe_after_spec_br.cbor
+++ b/apps/test-vectors/data/toy_params/glwe_after_spec_br.cbor
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2abec9dc5d399ece68ac227f67f7cafcb9a6acac36ed734fe3d5244021eb1cda
+oid sha256:f7a89ac440def1c8aa3f42ecc41813632509df38cd4544d1a507711831e629bd
 size 4679
--- a/apps/test-vectors/data/toy_params/glwe_after_spec_br_karatsuba.cbor
+++ b/apps/test-vectors/data/toy_params/glwe_after_spec_br_karatsuba.cbor
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ece8ca9c1436258b94e8c5e629b8722f9b18fdd415dd5209b6167a9dde8491c
+size 4679
--- a/apps/test-vectors/data/toy_params/lwe_after_id_pbs_karatsuba.cbor
+++ b/apps/test-vectors/data/toy_params/lwe_after_id_pbs_karatsuba.cbor
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fb4bb45c259b8383da10fc8f9459c40a6972c49b1696eb107f0a75640724be5
+size 2365
--- a/apps/test-vectors/data/toy_params/lwe_after_spec_pbs.cbor
+++ b/apps/test-vectors/data/toy_params/lwe_after_spec_pbs.cbor
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ee68ed99dd9103fb62b1e2c7c8cf483706ae2071b792d4bd16f9f93f64871f9
+oid sha256:bd00a8ae7494e400de5753029552ee1647efe7e17409b863a26a13b081099b8c
 size 2365
--- a/apps/test-vectors/data/toy_params/lwe_after_spec_pbs_karatsuba.cbor
+++ b/apps/test-vectors/data/toy_params/lwe_after_spec_pbs_karatsuba.cbor
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7b8e3586128887bd682120f3e3a43156139bce5e3fe0b03284f8753a864d647
+size 2365
--- a/apps/test-vectors/data/valid_params_128/glwe_after_id_br_karatsuba.cbor
+++ b/apps/test-vectors/data/valid_params_128/glwe_after_id_br_karatsuba.cbor
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d80dd93fefae4f4f89484dfcd65bbe99cc32e7e3b0a90c33dd0d77516c0a023
+size 36935
--- a/apps/test-vectors/data/valid_params_128/glwe_after_spec_br.cbor
+++ b/apps/test-vectors/data/valid_params_128/glwe_after_spec_br.cbor
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99a19c5d6d5f4fd81d9164d0ff96719ef362eabda256bce6a55cba6cb69e42bf
+oid sha256:e628354c81508a2d888016e8282df363dd12f1e19190b6475d4eb9d7ab8ae007
 size 36935
--- a/apps/test-vectors/data/valid_params_128/glwe_after_spec_br_karatsuba.cbor
+++ b/apps/test-vectors/data/valid_params_128/glwe_after_spec_br_karatsuba.cbor
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e59b002df3a9b01ad321ec51cf076fa35131ab9dbef141d1c54b717d61426c92
+size 36935
--- a/apps/test-vectors/data/valid_params_128/lwe_after_id_pbs_karatsuba.cbor
+++ b/apps/test-vectors/data/valid_params_128/lwe_after_id_pbs_karatsuba.cbor
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76a5c52cab7fec1dc167da676c6cd39479cda6b2bb9f4e0573cb7d99c2692faa
+size 18493
--- a/apps/test-vectors/data/valid_params_128/lwe_after_spec_pbs.cbor
+++ b/apps/test-vectors/data/valid_params_128/lwe_after_spec_pbs.cbor
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e44ffa6e5a50a03d32721180a051c8ce62f1791d4853aeaebed0200c183a57cf
+oid sha256:29b6e3e7d27700004b70dca24d225816500490e2d6ee49b9af05837fd421896b
 size 18493
--- a/apps/test-vectors/data/valid_params_128/lwe_after_spec_pbs_karatsuba.cbor
+++ b/apps/test-vectors/data/valid_params_128/lwe_after_spec_pbs_karatsuba.cbor
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3391969acf26dc69de0927ba279139d8d79999944069addc8ff469ad6c5ae2d
+size 18493
--- a/apps/test-vectors/src/main.rs
+++ b/apps/test-vectors/src/main.rs
@@ -1,5 +1,6 @@
 use serde::Serialize;
 use std::fs::{File, create_dir_all, read_dir, remove_dir_all, remove_file};
+use std::ops::Deref;
 use std::path::{Path, PathBuf};

 use tfhe::core_crypto::commons::generators::DeterministicSeeder;
@@ -38,7 +39,7 @@ const ENCODING: Encoding = Encoding {
    msg_bits: 4,
 };

-const SPEC_LUT: fn(u64) -> u64 = |x| (x * 2) & (1u64 << ENCODING.msg_bits);
+const SPEC_LUT: fn(u64) -> u64 = |x| (x * 2) % (ENCODING.msg_modulus() as u64);
 const ID_LUT: fn(u64) -> u64 = |x| x;

 const DATA_DIR: &str = "./data";
@@ -112,6 +113,10 @@ fn store_data<Data: Serialize, P: AsRef<Path>>(path: P, data: &Data, name: &str)
    ciborium::ser::into_writer(data, &mut file).unwrap();
 }

+fn assert_data_not_zero<Scalar: UnsignedInteger, Data: AsRef<[Scalar]>>(data: &Data) {
+    assert!(data.as_ref().iter().any(|&x| x != Scalar::ZERO));
+}
+
 #[allow(clippy::too_many_arguments)]
 fn generate_test_vectors<P: AsRef<Path>>(
    path: P,
@@ -139,10 +144,12 @@ fn generate_test_vectors<P: AsRef<Path>>(
    let glwe_secret_key: GlweSecretKey<Vec<u64>> =
        GlweSecretKey::generate_new_binary(glwe_dimension, polynomial_size, &mut secret_generator);
    let large_lwe_secret_key = glwe_secret_key.as_lwe_secret_key();
+    assert_data_not_zero(&large_lwe_secret_key);
    store_data(path, &large_lwe_secret_key, "large_lwe_secret_key");

    let small_lwe_secret_key: LweSecretKey<Vec<u64>> =
        LweSecretKey::generate_new_binary(lwe_dimension, &mut secret_generator);
+    assert_data_not_zero(&small_lwe_secret_key);
    store_data(path, &small_lwe_secret_key, "small_lwe_secret_key");

    let lwe_noise_distribution = Gaussian::from_standard_dev(StandardDev(lwe_noise_stddev), 0.);
@@ -156,6 +163,7 @@ fn generate_test_vectors<P: AsRef<Path>>(
        encoding.ciphertext_modulus,
        &mut encryption_generator,
    );
+    assert_data_not_zero(&lwe_a);
    store_data(path, &lwe_a, "lwe_a");

    let plaintext_b = encoding.encode(MSG_B);
@@ -166,6 +174,7 @@ fn generate_test_vectors<P: AsRef<Path>>(
        encoding.ciphertext_modulus,
        &mut encryption_generator,
    );
+    assert_data_not_zero(&lwe_b);
    store_data(path, &lwe_b, "lwe_b");

    let mut lwe_sum = LweCiphertext::new(
@@ -180,6 +189,7 @@ fn generate_test_vectors<P: AsRef<Path>>(
    let res = encoding.decode(decrypted_sum);

    assert_eq!(res, MSG_A + MSG_B);
+    assert_data_not_zero(&lwe_sum);
    store_data(path, &lwe_sum, "lwe_sum");

    let mut lwe_prod = LweCiphertext::new(
@@ -194,6 +204,7 @@ fn generate_test_vectors<P: AsRef<Path>>(
    let res = encoding.decode(decrypted_prod);

    assert_eq!(res, MSG_A * MSG_B);
+    assert_data_not_zero(&lwe_prod);
    store_data(path, &lwe_prod, "lwe_prod");

    let ksk = allocate_and_generate_new_lwe_keyswitch_key(
@@ -205,6 +216,7 @@ fn generate_test_vectors<P: AsRef<Path>>(
        encoding.ciphertext_modulus,
        &mut encryption_generator,
    );
+    assert_data_not_zero(&ksk);
    store_data(path, &ksk, "ksk");

    let mut lwe_ks = LweCiphertext::new(
@@ -218,6 +230,7 @@ fn generate_test_vectors<P: AsRef<Path>>(
    let res = encoding.decode(decrypted_ks);

    assert_eq!(res, MSG_A);
+    assert_data_not_zero(&lwe_ks);
    store_data(path, &lwe_ks, "lwe_ks");

    let bsk = par_allocate_and_generate_new_lwe_bootstrap_key(
@@ -229,6 +242,7 @@ fn generate_test_vectors<P: AsRef<Path>>(
        encoding.ciphertext_modulus,
        &mut encryption_generator,
    );
+    assert_data_not_zero(bsk.deref());
    store_data(path, &bsk, "bsk");

    let mut fourier_bsk = FourierLweBootstrapKey::new(
@@ -246,11 +260,15 @@ fn generate_test_vectors<P: AsRef<Path>>(

    let modswitched = lwe_ciphertext_modulus_switch(lwe_in_ms, log_modulus);
    let lwe_ms = modswitched_to_lwe(&modswitched);
+    assert_data_not_zero(&lwe_ms);
    store_data(path, &lwe_ms, "lwe_ms");

    let mut id_lut = encoding.encode_lut(glwe_dimension, polynomial_size, ID_LUT);
+    assert_data_not_zero(&id_lut);
+    let mut id_lut_karatsuba = id_lut.clone();

    blind_rotate_assign(&modswitched, &mut id_lut, &fourier_bsk);
+    assert_data_not_zero(&id_lut);
    store_data(path, &id_lut, "glwe_after_id_br");

    let mut lwe_pbs_id = LweCiphertext::new(
@@ -267,11 +285,38 @@ fn generate_test_vectors<P: AsRef<Path>>(
    let res = encoding.decode(decrypted_pbs_id);

    assert_eq!(res, MSG_A);
+    assert_data_not_zero(&lwe_pbs_id);
    store_data(path, &lwe_pbs_id, "lwe_after_id_pbs");

+    blind_rotate_karatsuba_assign(&modswitched, &mut id_lut_karatsuba, &bsk);
+    store_data(path, &id_lut_karatsuba, "glwe_after_id_br_karatsuba");
+
+    let mut lwe_pbs_karatsuba_id = LweCiphertext::new(
+        0u64,
+        glwe_dimension
+            .to_equivalent_lwe_dimension(polynomial_size)
+            .to_lwe_size(),
+        encoding.ciphertext_modulus,
+    );
+
+    extract_lwe_sample_from_glwe_ciphertext(
+        &id_lut_karatsuba,
+        &mut lwe_pbs_karatsuba_id,
+        MonomialDegree(0),
+    );
+
+    let decrypted_pbs_id = decrypt_lwe_ciphertext(&large_lwe_secret_key, &lwe_pbs_karatsuba_id);
+    let res = encoding.decode(decrypted_pbs_id);
+
+    assert_eq!(res, MSG_A);
+    store_data(path, &lwe_pbs_karatsuba_id, "lwe_after_id_pbs_karatsuba");
+
    let mut spec_lut = encoding.encode_lut(glwe_dimension, polynomial_size, SPEC_LUT);
+    assert_data_not_zero(&spec_lut);
+    let mut spec_lut_karatsuba = spec_lut.clone();

    blind_rotate_assign(&modswitched, &mut spec_lut, &fourier_bsk);
+    assert_data_not_zero(&spec_lut);
    store_data(path, &spec_lut, "glwe_after_spec_br");

    let mut lwe_pbs_spec = LweCiphertext::new(
@@ -288,7 +333,35 @@ fn generate_test_vectors<P: AsRef<Path>>(
    let res = encoding.decode(decrypted_pbs_spec);

    assert_eq!(res, SPEC_LUT(MSG_A));
+    assert_data_not_zero(&lwe_pbs_spec);
    store_data(path, &lwe_pbs_spec, "lwe_after_spec_pbs");
+
+    blind_rotate_karatsuba_assign(&modswitched, &mut spec_lut_karatsuba, &bsk);
+    store_data(path, &spec_lut_karatsuba, "glwe_after_spec_br_karatsuba");
+
+    let mut lwe_pbs_karatsuba_spec = LweCiphertext::new(
+        0u64,
+        glwe_dimension
+            .to_equivalent_lwe_dimension(polynomial_size)
+            .to_lwe_size(),
+        encoding.ciphertext_modulus,
+    );
+
+    extract_lwe_sample_from_glwe_ciphertext(
+        &spec_lut_karatsuba,
+        &mut lwe_pbs_karatsuba_spec,
+        MonomialDegree(0),
+    );
+
+    let decrypted_pbs_spec = decrypt_lwe_ciphertext(&large_lwe_secret_key, &lwe_pbs_karatsuba_spec);
+    let res = encoding.decode(decrypted_pbs_spec);
+
+    assert_eq!(res, SPEC_LUT(MSG_A));
+    store_data(
+        path,
+        &lwe_pbs_karatsuba_spec,
+        "lwe_after_spec_pbs_karatsuba",
+    );
 }

 fn rm_dir_except_readme<P: AsRef<Path>>(dir: P) {
--- a/backends/tfhe-cuda-backend/Cargo.toml
+++ b/backends/tfhe-cuda-backend/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tfhe-cuda-backend"
-version = "0.12.0"
+version = "0.13.0"
 edition = "2021"
 authors = ["Zama team"]
 license = "BSD-3-Clause-Clear"
--- a/backends/tfhe-cuda-backend/cuda/include/aes/aes_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/aes/aes_utilities.h
@@ -35,7 +35,8 @@ template <typename Torus> struct int_aes_lut_buffers {
        params.glwe_dimension, params.polynomial_size, params.message_modulus,
        params.carry_modulus, and_lambda, allocate_gpu_memory);
    auto active_streams_and_lut = streams.active_gpu_subset(
-        SBOX_MAX_AND_GATES * num_aes_inputs * sbox_parallelism);
+        SBOX_MAX_AND_GATES * num_aes_inputs * sbox_parallelism,
+        params.pbs_type);
    this->and_lut->broadcast_lut(active_streams_and_lut);
    this->and_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);

@@ -50,8 +51,8 @@ template <typename Torus> struct int_aes_lut_buffers {
        this->flush_lut->get_degree(0), this->flush_lut->get_max_degree(0),
        params.glwe_dimension, params.polynomial_size, params.message_modulus,
        params.carry_modulus, flush_lambda, allocate_gpu_memory);
-    auto active_streams_flush_lut =
-        streams.active_gpu_subset(AES_STATE_BITS * num_aes_inputs);
+    auto active_streams_flush_lut = streams.active_gpu_subset(
+        AES_STATE_BITS * num_aes_inputs, params.pbs_type);
    this->flush_lut->broadcast_lut(active_streams_flush_lut);
    this->flush_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);

@@ -65,7 +66,8 @@ template <typename Torus> struct int_aes_lut_buffers {
        this->carry_lut->get_degree(0), this->carry_lut->get_max_degree(0),
        params.glwe_dimension, params.polynomial_size, params.message_modulus,
        params.carry_modulus, carry_lambda, allocate_gpu_memory);
-    auto active_streams_carry_lut = streams.active_gpu_subset(num_aes_inputs);
+    auto active_streams_carry_lut =
+        streams.active_gpu_subset(num_aes_inputs, params.pbs_type);
    this->carry_lut->broadcast_lut(active_streams_carry_lut);
    this->carry_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
  }
--- a/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
+++ b/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
@@ -35,17 +35,9 @@ void cuda_centered_modulus_switch_64(void *stream, uint32_t gpu_index,
                                     uint32_t lwe_dimension,
                                     uint32_t log_modulus);

-void cuda_improve_noise_modulus_switch_64(
-    void *stream, uint32_t gpu_index, void *lwe_array_out,
-    void const *lwe_array_in, void const *lwe_array_indexes,
-    void const *encrypted_zeros, uint32_t lwe_size, uint32_t num_lwes,
-    uint32_t num_zeros, double input_variance, double r_sigma, double bound,
-    uint32_t log_modulus);
-
 void cuda_glwe_sample_extract_128(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *glwe_array_in, uint32_t const *nth_array, uint32_t num_nths,
    uint32_t lwe_per_glwe, uint32_t glwe_dimension, uint32_t polynomial_size);
 }
-
 #endif
--- a/backends/tfhe-cuda-backend/cuda/include/helper_multi_gpu.h
+++ b/backends/tfhe-cuda-backend/cuda/include/helper_multi_gpu.h
@@ -8,7 +8,8 @@

 extern std::mutex m;
 extern bool p2p_enabled;
-extern const int THRESHOLD_MULTI_GPU;
+extern const int THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS;
+extern const int THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;

 extern "C" {
 int32_t cuda_setup_multi_gpu(int device_0_id);
@@ -39,7 +40,8 @@ get_variant_element(const std::variant<std::vector<Torus>, Torus> &variant,
  }
 }

-uint32_t get_active_gpu_count(uint32_t num_inputs, uint32_t gpu_count);
+uint32_t get_active_gpu_count(uint32_t num_inputs, uint32_t gpu_count,
+                              PBS_TYPE pbs_type);

 int get_num_inputs_on_gpu(int total_num_inputs, int gpu_index, int gpu_count);

@@ -73,9 +75,10 @@ public:

  // Returns a subset of this set as an active subset. An active subset is one
  // that is temporarily used to perform some computation
-  CudaStreams active_gpu_subset(int num_radix_blocks) {
-    return CudaStreams(_streams, _gpu_indexes,
-                       get_active_gpu_count(num_radix_blocks, _gpu_count));
+  CudaStreams active_gpu_subset(int num_radix_blocks, PBS_TYPE pbs_type) {
+    return CudaStreams(
+        _streams, _gpu_indexes,
+        get_active_gpu_count(num_radix_blocks, _gpu_count, pbs_type));
  }

  // Returns a CudaStreams struct containing only the ith stream
@@ -183,6 +186,214 @@ public:
  }
 };

+struct InternalCudaStreams {
+private:
+  CudaStreams *_internal_cuda_streams;
+  uint32_t _num_internal_cuda_streams;
+  uint32_t _num_gpus;
+
+  cudaEvent_t _incoming_event;
+  cudaEvent_t *_outgoing_events;
+
+  InternalCudaStreams(const InternalCudaStreams &) = delete;
+  InternalCudaStreams &operator=(const InternalCudaStreams &) = delete;
+
+public:
+  InternalCudaStreams() {
+    _internal_cuda_streams = nullptr;
+    _incoming_event = nullptr;
+    _outgoing_events = nullptr;
+    _num_internal_cuda_streams = 0;
+    _num_gpus = 0;
+  }
+
+  void create_internal_cuda_streams_on_same_gpus(
+      const CudaStreams &base_streams, uint32_t num_internal_cuda_streams) {
+
+    PANIC_IF_FALSE(_internal_cuda_streams == nullptr,
+                   "InternalCudaStreams: object already initialized.");
+
+    _num_internal_cuda_streams = num_internal_cuda_streams;
+    _num_gpus = base_streams.count();
+
+    if (num_internal_cuda_streams > 0) {
+      _internal_cuda_streams = new CudaStreams[num_internal_cuda_streams];
+      for (uint32_t i = 0; i < num_internal_cuda_streams; ++i) {
+        _internal_cuda_streams[i].create_on_same_gpus(base_streams);
+      }
+    }
+
+    if (_num_gpus > 0) {
+      _incoming_event = cuda_create_event(base_streams.gpu_index(0));
+    }
+
+    uint32_t total_events = num_internal_cuda_streams * _num_gpus;
+
+    if (total_events > 0) {
+      _outgoing_events = new cudaEvent_t[total_events];
+      for (uint32_t s = 0; s < num_internal_cuda_streams; ++s) {
+        for (uint32_t g = 0; g < _num_gpus; ++g) {
+          _outgoing_events[s * _num_gpus + g] =
+              cuda_create_event(base_streams.gpu_index(g));
+        }
+      }
+    }
+  }
+
+  CudaStreams &operator[](uint32_t idx) const {
+    PANIC_IF_FALSE(idx < _num_internal_cuda_streams,
+                   "InternalCudaStreams index out of bounds");
+    return _internal_cuda_streams[idx];
+  }
+
+  uint32_t num_streams() const { return _num_internal_cuda_streams; }
+
+  void
+  internal_streams_wait_for_main_stream_0(const CudaStreams &main_streams) {
+
+    PANIC_IF_FALSE(main_streams.gpu_index(0) ==
+                       _internal_cuda_streams[0].gpu_index(0),
+                   "InternalCudaStreams: gpu_index(0) of main_streams should "
+                   "be the same as _internal_cuda_streams[0].");
+
+    cuda_event_record(_incoming_event, main_streams.stream(0),
+                      main_streams.gpu_index(0));
+
+    for (uint32_t s = 0; s < _num_internal_cuda_streams; ++s) {
+      for (uint32_t g = 0; g < _num_gpus; ++g) {
+        cuda_stream_wait_event(_internal_cuda_streams[s].stream(g),
+                               _incoming_event,
+                               _internal_cuda_streams[s].gpu_index(g));
+      }
+    }
+  }
+
+  void
+  internal_streams_slice_wait_for_main_stream_0(const CudaStreams &main_streams,
+                                                const uint32_t *stream_indices,
+                                                size_t num_indices) {
+
+    PANIC_IF_FALSE(main_streams.gpu_index(0) ==
+                       _internal_cuda_streams[0].gpu_index(0),
+                   "InternalCudaStreams: gpu_index(0) of main_streams should "
+                   "be the same as _internal_cuda_streams[0].");
+
+    cuda_event_record(_incoming_event, main_streams.stream(0),
+                      main_streams.gpu_index(0));
+
+    for (size_t i = 0; i < num_indices; ++i) {
+      uint32_t s_idx = stream_indices[i];
+      PANIC_IF_FALSE(s_idx < _num_internal_cuda_streams,
+                     "InternalCudaStreams: stream index out of bounds");
+
+      for (uint32_t g = 0; g < _num_gpus; ++g) {
+        cuda_stream_wait_event(_internal_cuda_streams[s_idx].stream(g),
+                               _incoming_event,
+                               _internal_cuda_streams[s_idx].gpu_index(g));
+      }
+    }
+  }
+
+  void
+  main_stream_0_wait_for_internal_streams(const CudaStreams &main_streams) {
+
+    PANIC_IF_FALSE(main_streams.gpu_index(0) ==
+                       _internal_cuda_streams[0].gpu_index(0),
+                   "InternalCudaStreams: gpu_index(0) of main_streams should "
+                   "be the same as _internal_cuda_streams[0].");
+
+    for (uint32_t s = 0; s < _num_internal_cuda_streams; ++s) {
+      for (uint32_t g = 0; g < _num_gpus; ++g) {
+        cuda_event_record(_outgoing_events[s * _num_gpus + g],
+                          _internal_cuda_streams[s].stream(g),
+                          _internal_cuda_streams[s].gpu_index(g));
+      }
+    }
+
+    for (uint32_t s = 0; s < _num_internal_cuda_streams; ++s) {
+      for (uint32_t g = 0; g < _num_gpus; ++g) {
+        cuda_stream_wait_event(main_streams.stream(0),
+                               _outgoing_events[s * _num_gpus + g],
+                               main_streams.gpu_index(0));
+      }
+    }
+  }
+
+  void
+  main_stream_0_wait_for_internal_streams_slice(const CudaStreams &main_streams,
+                                                const uint32_t *stream_indices,
+                                                size_t num_indices) {
+
+    PANIC_IF_FALSE(main_streams.gpu_index(0) ==
+                       _internal_cuda_streams[0].gpu_index(0),
+                   "InternalCudaStreams: gpu_index(0) of main_streams should "
+                   "be the same as _internal_cuda_streams[0].");
+
+    for (size_t i = 0; i < num_indices; ++i) {
+      uint32_t s_idx = stream_indices[i];
+      PANIC_IF_FALSE(s_idx < _num_internal_cuda_streams,
+                     "InternalCudaStreams: stream index out of bounds");
+
+      for (uint32_t g = 0; g < _num_gpus; ++g) {
+        cuda_event_record(_outgoing_events[s_idx * _num_gpus + g],
+                          _internal_cuda_streams[s_idx].stream(g),
+                          _internal_cuda_streams[s_idx].gpu_index(g));
+      }
+    }
+
+    for (size_t i = 0; i < num_indices; ++i) {
+      uint32_t s_idx = stream_indices[i];
+      for (uint32_t g = 0; g < _num_gpus; ++g) {
+        cuda_stream_wait_event(main_streams.stream(0),
+                               _outgoing_events[s_idx * _num_gpus + g],
+                               main_streams.gpu_index(0));
+      }
+    }
+  }
+
+  void release(const CudaStreams &main_streams) {
+
+    PANIC_IF_FALSE(main_streams.gpu_index(0) ==
+                       _internal_cuda_streams[0].gpu_index(0),
+                   "InternalCudaStreams: gpu_index(0) of main_streams should "
+                   "be the same as _internal_cuda_streams[0].");
+
+    cuda_synchronize_stream(main_streams.stream(0), main_streams.gpu_index(0));
+
+    if (_outgoing_events && _internal_cuda_streams) {
+      for (uint32_t s = 0; s < _num_internal_cuda_streams; ++s) {
+        for (uint32_t g = 0; g < _num_gpus; ++g) {
+          cuda_event_destroy(_outgoing_events[s * _num_gpus + g],
+                             _internal_cuda_streams[s].gpu_index(g));
+        }
+      }
+      delete[] _outgoing_events;
+      _outgoing_events = nullptr;
+    }
+
+    if (_incoming_event && _internal_cuda_streams) {
+      cuda_event_destroy(_incoming_event,
+                         _internal_cuda_streams[0].gpu_index(0));
+      _incoming_event = nullptr;
+    }
+
+    if (_internal_cuda_streams) {
+      for (uint32_t i = 0; i < _num_internal_cuda_streams; ++i) {
+        _internal_cuda_streams[i].release();
+      }
+      delete[] _internal_cuda_streams;
+      _internal_cuda_streams = nullptr;
+    }
+  }
+
+  ~InternalCudaStreams() {
+    PANIC_IF_FALSE(_internal_cuda_streams == nullptr &&
+                       _incoming_event == nullptr &&
+                       _outgoing_events == nullptr,
+                   "InternalCudaStreams: must call release before destruction");
+  }
+};
+
 struct CudaStreamsBarrier {
 private:
  std::vector<cudaEvent_t> _events;
--- a/backends/tfhe-cuda-backend/cuda/include/integer/bitwise_ops.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/bitwise_ops.h
@@ -20,7 +20,8 @@ template <typename Torus> struct boolean_bitop_buffer {
    gpu_memory_allocated = allocate_gpu_memory;
    this->op = op;
    this->params = params;
-    auto active_streams = streams.active_gpu_subset(lwe_ciphertext_count);
+    auto active_streams =
+        streams.active_gpu_subset(lwe_ciphertext_count, params.pbs_type);
    this->unchecked = is_unchecked;
    switch (op) {
    case BITAND:
@@ -119,7 +120,8 @@ template <typename Torus> struct int_bitop_buffer {
    gpu_memory_allocated = allocate_gpu_memory;
    this->op = op;
    this->params = params;
-    auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
    switch (op) {
    case BITAND:
    case BITOR:
@@ -216,7 +218,8 @@ template <typename Torus> struct boolean_bitnot_buffer {
          message_extract_lut->get_max_degree(0), params.glwe_dimension,
          params.polynomial_size, params.message_modulus, params.carry_modulus,
          lut_f_message_extract, gpu_memory_allocated);
-      auto active_streams = streams.active_gpu_subset(lwe_ciphertext_count);
+      auto active_streams =
+          streams.active_gpu_subset(lwe_ciphertext_count, params.pbs_type);
      message_extract_lut->broadcast_lut(active_streams);
    }
  }
--- a/backends/tfhe-cuda-backend/cuda/include/integer/cast.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/cast.h
@@ -39,7 +39,8 @@ template <typename Torus> struct int_extend_radix_with_sign_msb_buffer {
          },
          allocate_gpu_memory);

-      auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+      auto active_streams =
+          streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
      lut->broadcast_lut(active_streams);

      this->last_block = new CudaRadixCiphertextFFI;
@@ -127,3 +128,38 @@ template <typename Torus> struct int_cast_to_unsigned_buffer {
    cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
  }
 };
+
+template <typename Torus> struct int_cast_to_signed_buffer {
+  int_radix_params params;
+  bool allocate_gpu_memory;
+  uint32_t num_input_blocks;
+  uint32_t target_num_blocks;
+
+  int_extend_radix_with_sign_msb_buffer<Torus> *extend_buffer;
+
+  int_cast_to_signed_buffer(CudaStreams streams, int_radix_params params,
+                            uint32_t num_input_blocks,
+                            uint32_t target_num_blocks, bool input_is_signed,
+                            bool allocate_gpu_memory, uint64_t &size_tracker) {
+    this->params = params;
+    this->allocate_gpu_memory = allocate_gpu_memory;
+    this->num_input_blocks = num_input_blocks;
+    this->target_num_blocks = target_num_blocks;
+    this->extend_buffer = nullptr;
+
+    if (input_is_signed && target_num_blocks > num_input_blocks) {
+      uint32_t num_additional_blocks = target_num_blocks - num_input_blocks;
+      this->extend_buffer = new int_extend_radix_with_sign_msb_buffer<Torus>(
+          streams, params, num_input_blocks, num_additional_blocks,
+          allocate_gpu_memory, size_tracker);
+    }
+  }
+
+  void release(CudaStreams streams) {
+    if (this->extend_buffer) {
+      this->extend_buffer->release(streams);
+      delete this->extend_buffer;
+    }
+    cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
+  }
+};
--- a/backends/tfhe-cuda-backend/cuda/include/integer/cmux.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/cmux.h
@@ -14,7 +14,8 @@ template <typename Torus> struct int_zero_out_if_buffer {
                         uint64_t &size_tracker) {
    gpu_memory_allocated = allocate_gpu_memory;
    this->params = params;
-    auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);

    tmp = new CudaRadixCiphertextFFI;
    create_zero_radix_ciphertext_async<Torus>(
@@ -114,9 +115,11 @@ template <typename Torus> struct int_cmux_buffer {
        predicate_lut->get_lut_indexes(0, 0), h_lut_indexes,
        2 * num_radix_blocks * sizeof(Torus), streams.stream(0),
        streams.gpu_index(0), allocate_gpu_memory);
-    auto active_streams_pred = streams.active_gpu_subset(2 * num_radix_blocks);
+    auto active_streams_pred =
+        streams.active_gpu_subset(2 * num_radix_blocks, params.pbs_type);
    predicate_lut->broadcast_lut(active_streams_pred);
-    auto active_streams_msg = streams.active_gpu_subset(num_radix_blocks);
+    auto active_streams_msg =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
    message_extract_lut->broadcast_lut(active_streams_msg);
  }

--- a/backends/tfhe-cuda-backend/cuda/include/integer/comparison.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/comparison.h
@@ -52,7 +52,8 @@ template <typename Torus> struct int_are_all_block_true_buffer {
        params.glwe_dimension, params.polynomial_size, params.message_modulus,
        params.carry_modulus, is_max_value_f, gpu_memory_allocated);

-    auto active_streams = streams.active_gpu_subset(max_chunks);
+    auto active_streams =
+        streams.active_gpu_subset(max_chunks, params.pbs_type);
    is_max_value->broadcast_lut(active_streams);
  }

@@ -108,7 +109,8 @@ template <typename Torus> struct int_comparison_eq_buffer {
        params.glwe_dimension, params.polynomial_size, params.message_modulus,
        params.carry_modulus, is_non_zero_lut_f, gpu_memory_allocated);

-    auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
    is_non_zero_lut->broadcast_lut(active_streams);

    // Scalar may have up to num_radix_blocks blocks
@@ -238,7 +240,8 @@ template <typename Torus> struct int_tree_sign_reduction_buffer {
        tree_inner_leaf_lut->get_max_degree(0), params.glwe_dimension,
        params.polynomial_size, params.message_modulus, params.carry_modulus,
        block_selector_f, gpu_memory_allocated);
-    auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
    tree_inner_leaf_lut->broadcast_lut(active_streams);
  }

@@ -390,7 +393,8 @@ template <typename Torus> struct int_comparison_buffer {
    this->op = op;
    this->is_signed = is_signed;

-    auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);

    identity_lut_f = [](Torus x) -> Torus { return x; };

@@ -523,7 +527,7 @@ template <typename Torus> struct int_comparison_buffer {
          signed_lut->get_degree(0), signed_lut->get_max_degree(0),
          params.glwe_dimension, params.polynomial_size, params.message_modulus,
          params.carry_modulus, signed_lut_f, gpu_memory_allocated);
-      auto active_streams = streams.active_gpu_subset(1);
+      auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
      signed_lut->broadcast_lut(active_streams);
    }
    preallocated_h_lut = (Torus *)malloc(
--- a/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression_utilities.h
@@ -116,7 +116,8 @@ template <typename Torus> struct int_decompression {
          effective_compression_carry_modulus,
          encryption_params.message_modulus, encryption_params.carry_modulus,
          decompression_rescale_f, gpu_memory_allocated);
-      auto active_streams = streams.active_gpu_subset(num_blocks_to_decompress);
+      auto active_streams = streams.active_gpu_subset(
+          num_blocks_to_decompress, decompression_rescale_lut->params.pbs_type);
      decompression_rescale_lut->broadcast_lut(active_streams);
    }
  }
--- a/backends/tfhe-cuda-backend/cuda/include/integer/div_rem.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/div_rem.h
@@ -356,7 +356,8 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
          luts[j]->get_degree(0), luts[j]->get_max_degree(0),
          params.glwe_dimension, params.polynomial_size, params.message_modulus,
          params.carry_modulus, lut_f_message_extract, gpu_memory_allocated);
-      auto active_streams = streams.active_gpu_subset(num_blocks);
+      auto active_streams =
+          streams.active_gpu_subset(num_blocks, params.pbs_type);
      luts[j]->broadcast_lut(active_streams);
    }
  }
@@ -1012,7 +1013,7 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
          masking_luts_1[i]->get_max_degree(0), params.glwe_dimension,
          params.polynomial_size, params.message_modulus, params.carry_modulus,
          lut_f_masking, gpu_memory_allocated);
-      auto active_streams_1 = streams.active_gpu_subset(1);
+      auto active_streams_1 = streams.active_gpu_subset(1, params.pbs_type);
      masking_luts_1[i]->broadcast_lut(active_streams_1);

      generate_device_accumulator<Torus>(
@@ -1021,7 +1022,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
          masking_luts_2[i]->get_max_degree(0), params.glwe_dimension,
          params.polynomial_size, params.message_modulus, params.carry_modulus,
          lut_f_masking, gpu_memory_allocated);
-      auto active_streams_2 = streams.active_gpu_subset(num_blocks);
+      auto active_streams_2 =
+          streams.active_gpu_subset(num_blocks, params.pbs_type);
      masking_luts_2[i]->broadcast_lut(active_streams_2);
    }

@@ -1040,7 +1042,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {

    int_radix_lut<Torus> *luts[2] = {message_extract_lut_1,
                                     message_extract_lut_2};
-    auto active_streams = streams.active_gpu_subset(num_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(num_blocks, params.pbs_type);
    for (int j = 0; j < 2; j++) {
      generate_device_accumulator<Torus>(
          streams.stream(0), streams.gpu_index(0), luts[j]->get_lut(0, 0),
@@ -1128,7 +1131,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {

    // merge_overflow_flags_luts
    merge_overflow_flags_luts = new int_radix_lut<Torus> *[num_bits_in_message];
-    auto active_gpu_count_for_bits = streams.active_gpu_subset(1);
+    auto active_gpu_count_for_bits =
+        streams.active_gpu_subset(1, params.pbs_type);
    for (int i = 0; i < num_bits_in_message; i++) {
      auto lut_f_bit = [i](Torus x, Torus y) -> Torus {
        return (x == 0 && y == 0) << i;
@@ -1152,7 +1156,8 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
                              uint32_t num_blocks, bool allocate_gpu_memory,
                              uint64_t &size_tracker) {
    gpu_memory_allocated = allocate_gpu_memory;
-    auto active_streams = streams.active_gpu_subset(2 * num_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(2 * num_blocks, params.pbs_type);
    this->params = params;

    if (params.message_modulus == 4 && params.carry_modulus == 4 &&
@@ -1473,7 +1478,8 @@ template <typename Torus> struct int_div_rem_memory {
                     bool allocate_gpu_memory, uint64_t &size_tracker) {

    gpu_memory_allocated = allocate_gpu_memory;
-    this->active_streams = streams.active_gpu_subset(num_blocks);
+    this->active_streams =
+        streams.active_gpu_subset(num_blocks, params.pbs_type);
    this->params = params;
    this->is_signed = is_signed;

@@ -1559,7 +1565,7 @@ template <typename Torus> struct int_div_rem_memory {
          params.polynomial_size, params.message_modulus, params.carry_modulus,
          f_compare_extracted_signed_bits, gpu_memory_allocated);
      auto active_gpu_count_cmp =
-          streams.active_gpu_subset(1); // only 1 block needed
+          streams.active_gpu_subset(1, params.pbs_type); // only 1 block needed
      compare_signed_bits_lut->broadcast_lut(active_gpu_count_cmp);
    }
  }
--- a/backends/tfhe-cuda-backend/cuda/include/integer/ilog2.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/ilog2.h
@@ -20,7 +20,8 @@ template <typename Torus> struct int_prepare_count_of_consecutive_bits_buffer {
    this->allocate_gpu_memory = allocate_gpu_memory;
    this->direction = direction;
    this->bit_value = bit_value;
-    auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
    this->univ_lut_mem =
        new int_radix_lut<Torus>(streams, params, 1, num_radix_blocks,
                                 allocate_gpu_memory, size_tracker);
@@ -246,7 +247,8 @@ template <typename Torus> struct int_ilog2_buffer {
                                params.glwe_dimension, params.polynomial_size,
                                params.message_modulus, params.carry_modulus,
                                lut_message_lambda, allocate_gpu_memory);
-    auto active_streams = streams.active_gpu_subset(counter_num_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(counter_num_blocks, params.pbs_type);
    lut_message_not->broadcast_lut(active_streams);

    this->lut_carry_not =
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
@@ -131,25 +131,6 @@ void cuda_apply_univariate_lut_64(CudaStreamsFFI streams,
 void cleanup_cuda_apply_univariate_lut_64(CudaStreamsFFI streams,
                                          int8_t **mem_ptr_void);

-uint64_t scratch_cuda_apply_bivariate_lut_64(
-    CudaStreamsFFI streams, int8_t **mem_ptr, void const *input_lut,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
-    uint32_t pbs_base_log, uint32_t grouping_factor,
-    uint32_t input_lwe_ciphertext_count, uint32_t message_modulus,
-    uint32_t carry_modulus, PBS_TYPE pbs_type, uint64_t lut_degree,
-    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
-
-void cuda_apply_bivariate_lut_64(
-    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
-    CudaRadixCiphertextFFI const *input_radix_lwe_1,
-    CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
-    void *const *ksks, void *const *bsks, uint32_t num_radix_blocks,
-    uint32_t shift);
-
-void cleanup_cuda_apply_bivariate_lut_64(CudaStreamsFFI streams,
-                                         int8_t **mem_ptr_void);
-
 void cuda_apply_many_univariate_lut_64(
    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output_radix_lwe,
    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
@@ -610,25 +591,6 @@ void cuda_integer_unsigned_scalar_div_radix_64(
 void cleanup_cuda_integer_unsigned_scalar_div_radix_64(CudaStreamsFFI streams,
                                                       int8_t **mem_ptr_void);

-uint64_t scratch_cuda_extend_radix_with_sign_msb_64(
-    CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
-    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
-    uint32_t grouping_factor, uint32_t num_blocks,
-    uint32_t num_additional_blocks, uint32_t message_modulus,
-    uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
-    PBS_MS_REDUCTION_T noise_reduction_type);
-
-void cuda_extend_radix_with_sign_msb_64(CudaStreamsFFI streams,
-                                        CudaRadixCiphertextFFI *output,
-                                        CudaRadixCiphertextFFI const *input,
-                                        int8_t *mem_ptr,
-                                        uint32_t num_additional_blocks,
-                                        void *const *bsks, void *const *ksks);
-
-void cleanup_cuda_extend_radix_with_sign_msb_64(CudaStreamsFFI streams,
-                                                int8_t **mem_ptr_void);
-
 uint64_t scratch_cuda_integer_signed_scalar_div_radix_64(
    CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
    uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
@@ -921,6 +883,10 @@ void cuda_unchecked_first_index_in_clears_64(
    uint32_t num_unique, uint32_t num_blocks, uint32_t num_blocks_index,
    int8_t *mem, void *const *bsks, void *const *ksks);

+void cuda_small_scalar_multiplication_integer_64_inplace(
+    CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array, uint64_t scalar,
+    const uint32_t message_modulus, const uint32_t carry_modulus);
+
 void cleanup_cuda_unchecked_first_index_in_clears_64(CudaStreamsFFI streams,
                                                     int8_t **mem_ptr_void);

@@ -1004,6 +970,64 @@ void cuda_unchecked_index_of_clear_64(

 void cleanup_cuda_unchecked_index_of_clear_64(CudaStreamsFFI streams,
                                              int8_t **mem_ptr_void);
+
+uint64_t scratch_cuda_unchecked_all_eq_slices_64(
+    CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t big_lwe_dimension,
+    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
+    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
+    uint32_t num_inputs, uint32_t num_blocks, uint32_t message_modulus,
+    uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);
+
+void cuda_unchecked_all_eq_slices_64(CudaStreamsFFI streams,
+                                     CudaRadixCiphertextFFI *match_ct,
+                                     CudaRadixCiphertextFFI const *lhs,
+                                     CudaRadixCiphertextFFI const *rhs,
+                                     uint32_t num_inputs, uint32_t num_blocks,
+                                     int8_t *mem, void *const *bsks,
+                                     void *const *ksks);
+
+void cleanup_cuda_unchecked_all_eq_slices_64(CudaStreamsFFI streams,
+                                             int8_t **mem_ptr_void);
+
+uint64_t scratch_cuda_unchecked_contains_sub_slice_64(
+    CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t big_lwe_dimension,
+    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
+    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
+    uint32_t num_lhs, uint32_t num_rhs, uint32_t num_blocks,
+    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
+
+void cuda_unchecked_contains_sub_slice_64(CudaStreamsFFI streams,
+                                          CudaRadixCiphertextFFI *match_ct,
+                                          CudaRadixCiphertextFFI const *lhs,
+                                          CudaRadixCiphertextFFI const *rhs,
+                                          uint32_t num_rhs, uint32_t num_blocks,
+                                          int8_t *mem, void *const *bsks,
+                                          void *const *ksks);
+
+void cleanup_cuda_unchecked_contains_sub_slice_64(CudaStreamsFFI streams,
+                                                  int8_t **mem_ptr_void);
+
+uint64_t scratch_cuda_cast_to_signed_64(
+    CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
+    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
+    uint32_t grouping_factor, uint32_t num_input_blocks,
+    uint32_t target_num_blocks, uint32_t message_modulus,
+    uint32_t carry_modulus, PBS_TYPE pbs_type, bool input_is_signed,
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
+
+void cuda_cast_to_signed_64(CudaStreamsFFI streams,
+                            CudaRadixCiphertextFFI *output,
+                            CudaRadixCiphertextFFI const *input, int8_t *mem,
+                            bool input_is_signed, void *const *bsks,
+                            void *const *ksks);
+
+void cleanup_cuda_cast_to_signed_64(CudaStreamsFFI streams,
+                                    int8_t **mem_ptr_void);
 } // extern C

 #endif // CUDA_INTEGER_H
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
@@ -43,6 +43,8 @@ public:
              "parameters");                                                   \
    } else if ((msg_mod) == 0 && (carry_mod) == 0) {                           \
      break;                                                                   \
+    } else if ((msg_mod) == 4 && (carry_mod) == 32) {                          \
+      break;                                                                   \
    } else {                                                                   \
      PANIC("Invalid message modulus or carry modulus")                        \
    }                                                                          \
@@ -369,7 +371,8 @@ struct int_radix_lut_custom_input_output {
    this->num_input_blocks = num_input_blocks;
    this->gpu_memory_allocated = allocate_gpu_memory;

-    this->active_streams = streams.active_gpu_subset(num_radix_blocks);
+    this->active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
  }

  void setup_degrees() {
@@ -380,14 +383,18 @@ struct int_radix_lut_custom_input_output {

  void allocate_pbs_buffers(int_radix_params params, uint32_t num_radix_blocks,
                            bool allocate_gpu_memory, uint64_t &size_tracker) {
+
+    int threshold = (params.pbs_type == PBS_TYPE::MULTI_BIT)
+                        ? THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS
+                        : THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;
+
    for (uint i = 0; i < active_streams.count(); i++) {
      cuda_set_device(active_streams.gpu_index(i));
      int8_t *gpu_pbs_buffer;
-      auto num_blocks_on_gpu =
-          std::min((int)num_radix_blocks,
-                   std::max(THRESHOLD_MULTI_GPU,
-                            get_num_inputs_on_gpu(num_radix_blocks, i,
-                                                  active_streams.count())));
+      auto num_blocks_on_gpu = std::min(
+          (int)num_radix_blocks,
+          std::max(threshold, get_num_inputs_on_gpu(num_radix_blocks, i,
+                                                    active_streams.count())));

      uint64_t size = 0;
      execute_scratch_pbs<OutputTorus>(
@@ -422,18 +429,22 @@ struct int_radix_lut_custom_input_output {
    /// back to the original indexing
    multi_gpu_alloc_lwe_async(active_streams, lwe_array_in_vec,
                              num_radix_blocks, params.big_lwe_dimension + 1,
-                              size_tracker, allocate_gpu_memory);
+                              size_tracker, params.pbs_type,
+                              allocate_gpu_memory);
    multi_gpu_alloc_lwe_async(active_streams, lwe_after_ks_vec,
                              num_radix_blocks, params.small_lwe_dimension + 1,
-                              size_tracker, allocate_gpu_memory);
+                              size_tracker, params.pbs_type,
+                              allocate_gpu_memory);
    if (num_many_lut > 1) {
      multi_gpu_alloc_lwe_many_lut_output_async(
          active_streams, lwe_after_pbs_vec, num_radix_blocks, num_many_lut,
-          params.big_lwe_dimension + 1, size_tracker, allocate_gpu_memory);
+          params.big_lwe_dimension + 1, size_tracker, params.pbs_type,
+          allocate_gpu_memory);
    } else {
      multi_gpu_alloc_lwe_async(active_streams, lwe_after_pbs_vec,
                                num_radix_blocks, params.big_lwe_dimension + 1,
-                                size_tracker, allocate_gpu_memory);
+                                size_tracker, params.pbs_type,
+                                allocate_gpu_memory);
    }
    multi_gpu_alloc_array_async(active_streams, lwe_trivial_indexes_vec,
                                num_radix_blocks, size_tracker,
@@ -449,12 +460,14 @@ struct int_radix_lut_custom_input_output {
  }

  void setup_gemm_batch_ks_temp_buffers(uint64_t &size_tracker) {
+    int threshold = (params.pbs_type == PBS_TYPE::MULTI_BIT)
+                        ? THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS
+                        : THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;

-    auto inputs_on_gpu =
-        std::min((int)num_input_blocks,
-                 std::max(THRESHOLD_MULTI_GPU,
-                          get_num_inputs_on_gpu(num_input_blocks, 0,
-                                                active_streams.count())));
+    auto inputs_on_gpu = std::min(
+        (int)num_input_blocks,
+        std::max(threshold, get_num_inputs_on_gpu(num_input_blocks, 0,
+                                                  active_streams.count())));

    if (inputs_on_gpu >= get_threshold_ks_gemm()) {
      for (auto i = 0; i < active_streams.count(); ++i) {
@@ -796,16 +809,20 @@ struct int_radix_lut_custom_input_output {
  void allocate_lwe_vector_for_non_trivial_indexes(
      CudaStreams streams, uint64_t max_num_radix_blocks,
      uint64_t &size_tracker, bool allocate_gpu_memory) {
+
+    int threshold = (params.pbs_type == PBS_TYPE::MULTI_BIT)
+                        ? THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS
+                        : THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;
+
    // We need to create the auxiliary array only in GPU 0
    if (active_streams.count() > 1) {
      lwe_aligned_vec.resize(active_streams.count());
      for (uint i = 0; i < active_streams.count(); i++) {
        uint64_t size_tracker_on_array_i = 0;
-        auto inputs_on_gpu =
-            std::min((int)max_num_radix_blocks,
-                     std::max(THRESHOLD_MULTI_GPU,
-                              get_num_inputs_on_gpu(max_num_radix_blocks, i,
-                                                    active_streams.count())));
+        auto inputs_on_gpu = std::min(
+            (int)max_num_radix_blocks,
+            std::max(threshold, get_num_inputs_on_gpu(max_num_radix_blocks, i,
+                                                      active_streams.count())));
        InputTorus *d_array =
            (InputTorus *)cuda_malloc_with_size_tracking_async(
                inputs_on_gpu * (params.big_lwe_dimension + 1) *
@@ -996,8 +1013,8 @@ template <typename Torus> struct int_bit_extract_luts_buffer {
        num_radix_blocks * bits_per_block * sizeof(Torus), streams.stream(0),
        streams.gpu_index(0), allocate_gpu_memory);

-    auto active_streams =
-        streams.active_gpu_subset(bits_per_block * num_radix_blocks);
+    auto active_streams = streams.active_gpu_subset(
+        bits_per_block * num_radix_blocks, params.pbs_type);
    lut->broadcast_lut(active_streams);

    /**
@@ -1101,8 +1118,9 @@ template <typename Torus> struct int_fullprop_buffer {
    cuda_memcpy_with_size_tracking_async_to_gpu(
        lwe_indexes, h_lwe_indexes, lwe_indexes_size, streams.stream(0),
        streams.gpu_index(0), allocate_gpu_memory);
-    auto active_streams = streams.active_gpu_subset(2);
-    lut->broadcast_lut(active_streams);
+    //
+    // No broadcast is needed because full prop is done on 1 single GPU.
+    //

    tmp_small_lwe_vector = new CudaRadixCiphertextFFI;
    create_zero_radix_ciphertext_async<Torus>(
@@ -1263,7 +1281,8 @@ template <typename Torus> struct int_sum_ciphertexts_vec_memory {
          luts_message_carry->get_max_degree(1), params.glwe_dimension,
          params.polynomial_size, message_modulus, params.carry_modulus,
          lut_f_carry, gpu_memory_allocated);
-      auto active_gpu_count_mc = streams.active_gpu_subset(pbs_count);
+      auto active_gpu_count_mc =
+          streams.active_gpu_subset(pbs_count, params.pbs_type);
      luts_message_carry->broadcast_lut(active_gpu_count_mc);
    }
  }
@@ -1433,7 +1452,8 @@ template <typename Torus> struct int_seq_group_prop_memory {
    cuda_memcpy_with_size_tracking_async_to_gpu(
        seq_lut_indexes, h_seq_lut_indexes, num_seq_luts * sizeof(Torus),
        streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
-    auto active_streams = streams.active_gpu_subset(num_seq_luts);
+    auto active_streams =
+        streams.active_gpu_subset(num_seq_luts, params.pbs_type);
    lut_sequential_algorithm->broadcast_lut(active_streams);
    free(h_seq_lut_indexes);
  };
@@ -1487,7 +1507,8 @@ template <typename Torus> struct int_hs_group_prop_memory {
        lut_hillis_steele->get_max_degree(0), glwe_dimension, polynomial_size,
        message_modulus, carry_modulus, f_lut_hillis_steele,
        gpu_memory_allocated);
-    auto active_streams = streams.active_gpu_subset(num_groups);
+    auto active_streams =
+        streams.active_gpu_subset(num_groups, params.pbs_type);
    lut_hillis_steele->broadcast_lut(active_streams);
  };
  void release(CudaStreams streams) {
@@ -1664,7 +1685,8 @@ template <typename Torus> struct int_shifted_blocks_and_states_memory {
        lut_indexes, h_lut_indexes, lut_indexes_size, streams.stream(0),
        streams.gpu_index(0), allocate_gpu_memory);
    // Do I need to do something else for the multi-gpu?
-    auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
    luts_array_first_step->broadcast_lut(active_streams);
  };
  void release(CudaStreams streams) {
@@ -1929,7 +1951,8 @@ template <typename Torus> struct int_prop_simu_group_carries_memory {
        scalar_array_cum_sum, h_scalar_array_cum_sum,
        num_radix_blocks * sizeof(Torus), streams.stream(0),
        streams.gpu_index(0), allocate_gpu_memory);
-    auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
    luts_array_second_step->broadcast_lut(active_streams);

    if (use_sequential_algorithm_to_resolve_group_carries) {
@@ -1954,7 +1977,8 @@ template <typename Torus> struct int_prop_simu_group_carries_memory {
    cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
        lut_indexes, new_lut_indexes, new_num_blocks * sizeof(Torus),
        streams.stream(0), streams.gpu_index(0), gpu_memory_allocated);
-    auto new_active_streams = streams.active_gpu_subset(new_num_blocks);
+    auto new_active_streams = streams.active_gpu_subset(
+        new_num_blocks, luts_array_second_step->params.pbs_type);
    // We just need to update the lut indexes so we use false here
    luts_array_second_step->broadcast_lut(new_active_streams, false);

@@ -2121,7 +2145,7 @@ template <typename Torus> struct int_sc_prop_memory {
          polynomial_size, message_modulus, carry_modulus, f_overflow_fp,
          gpu_memory_allocated);

-      auto active_streams = streams.active_gpu_subset(1);
+      auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
      lut_overflow_flag_prep->broadcast_lut(active_streams);
    }

@@ -2193,7 +2217,8 @@ template <typename Torus> struct int_sc_prop_memory {
          (num_radix_blocks + 1) * sizeof(Torus), streams.stream(0),
          streams.gpu_index(0), allocate_gpu_memory);
    }
-    auto active_streams = streams.active_gpu_subset(num_radix_blocks + 1);
+    auto active_streams =
+        streams.active_gpu_subset(num_radix_blocks + 1, params.pbs_type);
    lut_message_extract->broadcast_lut(active_streams);
  };

@@ -2390,7 +2415,8 @@ template <typename Torus> struct int_shifted_blocks_and_borrow_states_memory {
        lut_indexes, h_lut_indexes, lut_indexes_size, streams.stream(0),
        streams.gpu_index(0), allocate_gpu_memory);
    // Do I need to do something else for the multi-gpu?
-    auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+    auto active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
    luts_array_first_step->broadcast_lut(active_streams);
  };

@@ -2401,7 +2427,8 @@ template <typename Torus> struct int_shifted_blocks_and_borrow_states_memory {
    cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
        lut_indexes, new_lut_indexes, new_num_blocks * sizeof(Torus),
        streams.stream(0), streams.gpu_index(0), gpu_memory_allocated);
-    auto new_active_streams = streams.active_gpu_subset(new_num_blocks);
+    auto new_active_streams = streams.active_gpu_subset(
+        new_num_blocks, luts_array_first_step->params.pbs_type);
    // We just need to update the lut indexes so we use false here
    luts_array_first_step->broadcast_lut(new_active_streams, false);
  }
@@ -2442,12 +2469,7 @@ template <typename Torus> struct int_borrow_prop_memory {
  int_radix_params params;

  CudaStreams active_streams;
-  CudaStreams sub_streams_1;
-  CudaStreams sub_streams_2;
-
-  cudaEvent_t *incoming_events;
-  cudaEvent_t *outgoing_events1;
-  cudaEvent_t *outgoing_events2;
+  InternalCudaStreams internal_streams;

  uint32_t compute_overflow;
  bool gpu_memory_allocated;
@@ -2501,7 +2523,8 @@ template <typename Torus> struct int_borrow_prop_memory {
        lut_message_extract->get_max_degree(0), glwe_dimension, polynomial_size,
        message_modulus, carry_modulus, f_message_extract,
        gpu_memory_allocated);
-    active_streams = streams.active_gpu_subset(num_radix_blocks);
+    active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);

    lut_message_extract->broadcast_lut(active_streams);

@@ -2522,21 +2545,10 @@ template <typename Torus> struct int_borrow_prop_memory {
      lut_borrow_flag->broadcast_lut(active_streams);
    }

-    active_streams = streams.active_gpu_subset(num_radix_blocks);
-    sub_streams_1.create_on_same_gpus(active_streams);
-    sub_streams_2.create_on_same_gpus(active_streams);
-
-    incoming_events =
-        (cudaEvent_t *)malloc(active_streams.count() * sizeof(cudaEvent_t));
-    outgoing_events1 =
-        (cudaEvent_t *)malloc(active_streams.count() * sizeof(cudaEvent_t));
-    outgoing_events2 =
-        (cudaEvent_t *)malloc(active_streams.count() * sizeof(cudaEvent_t));
-    for (uint j = 0; j < active_streams.count(); j++) {
-      incoming_events[j] = cuda_create_event(active_streams.gpu_index(j));
-      outgoing_events1[j] = cuda_create_event(active_streams.gpu_index(j));
-      outgoing_events2[j] = cuda_create_event(active_streams.gpu_index(j));
-    }
+    active_streams =
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
+    internal_streams.create_internal_cuda_streams_on_same_gpus(active_streams,
+                                                               2);
  };

  // needed for the division to update the lut indexes
@@ -2563,21 +2575,9 @@ template <typename Torus> struct int_borrow_prop_memory {
      delete lut_borrow_flag;
    }

-    // The substreams have to be synchronized before destroying events
+    internal_streams.release(streams);
+
    cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
-
-    // release events
-    for (uint j = 0; j < active_streams.count(); j++) {
-      cuda_event_destroy(incoming_events[j], active_streams.gpu_index(j));
-      cuda_event_destroy(outgoing_events1[j], active_streams.gpu_index(j));
-      cuda_event_destroy(outgoing_events2[j], active_streams.gpu_index(j));
-    }
-    free(incoming_events);
-    free(outgoing_events1);
-    free(outgoing_events2);
-
-    sub_streams_1.release();
-    sub_streams_2.release();
  };
 };
 std::pair<bool, bool> get_invert_flags(COMPARISON_TYPE compare);
--- a/backends/tfhe-cuda-backend/cuda/include/integer/multiplication.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/multiplication.h
@@ -45,7 +45,8 @@ template <typename Torus> struct int_mul_memory {
          params.polynomial_size, params.message_modulus, params.carry_modulus,
          zero_out_predicate_lut_f, gpu_memory_allocated);

-      auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+      auto active_streams =
+          streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
      zero_out_predicate_lut->broadcast_lut(active_streams);

      zero_out_mem = new int_zero_out_if_buffer<Torus>(
@@ -122,7 +123,8 @@ template <typename Torus> struct int_mul_memory {
          streams.stream(0), streams.gpu_index(0),
          luts_array->get_lut_indexes(0, lsb_vector_block_count), 1,
          msb_vector_block_count);
-    auto active_streams = streams.active_gpu_subset(total_block_count);
+    auto active_streams =
+        streams.active_gpu_subset(total_block_count, params.pbs_type);
    luts_array->broadcast_lut(active_streams);
    // create memory object for sum ciphertexts
    sum_ciphertexts_mem = new int_sum_ciphertexts_vec_memory<Torus>(
--- a/backends/tfhe-cuda-backend/cuda/include/integer/oprf.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/oprf.h
@@ -126,9 +126,11 @@ template <typename Torus> struct int_grouped_oprf_memory {
        luts->get_lut_indexes(0, 0), this->h_lut_indexes,
        num_blocks_to_process * sizeof(Torus), streams.stream(0),
        streams.gpu_index(0), allocate_gpu_memory);
-    auto active_streams = streams.active_gpu_subset(num_blocks_to_process);
+    auto active_streams =
+        streams.active_gpu_subset(num_blocks_to_process, params.pbs_type);
    luts->broadcast_lut(active_streams);

+    cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
    free(h_corrections);
  }

--- a/backends/tfhe-cuda-backend/cuda/include/integer/radix_ciphertext.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/radix_ciphertext.h
@@ -6,6 +6,8 @@ void release_radix_ciphertext_async(cudaStream_t const stream,
                                    CudaRadixCiphertextFFI *data,
                                    const bool gpu_memory_allocated);

+void release_cpu_radix_ciphertext_async(CudaRadixCiphertextFFI *data);
+
 void reset_radix_ciphertext_blocks(CudaRadixCiphertextFFI *data,
                                   uint32_t new_num_blocks);

--- a/backends/tfhe-cuda-backend/cuda/include/integer/scalar_shifts.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/scalar_shifts.h
@@ -91,7 +91,8 @@ template <typename Torus> struct int_logical_scalar_shift_buffer {
          cur_lut_bivariate->get_max_degree(0), params.glwe_dimension,
          params.polynomial_size, params.message_modulus, params.carry_modulus,
          shift_lut_f, gpu_memory_allocated);
-      auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+      auto active_streams =
+          streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
      cur_lut_bivariate->broadcast_lut(active_streams);

      lut_buffers_bivariate.push_back(cur_lut_bivariate);
@@ -177,7 +178,8 @@ template <typename Torus> struct int_logical_scalar_shift_buffer {
          cur_lut_bivariate->get_max_degree(0), params.glwe_dimension,
          params.polynomial_size, params.message_modulus, params.carry_modulus,
          shift_lut_f, gpu_memory_allocated);
-      auto active_streams = streams.active_gpu_subset(num_radix_blocks);
+      auto active_streams =
+          streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
      cur_lut_bivariate->broadcast_lut(active_streams);

      lut_buffers_bivariate.push_back(cur_lut_bivariate);
@@ -220,7 +222,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
                                     uint64_t &size_tracker) {
    gpu_memory_allocated = allocate_gpu_memory;

-    auto active_streams = streams.active_gpu_subset(1);
+    auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
    // In the arithmetic shift, a PBS has to be applied to the last rotated
    // block twice: once to shift it, once to compute the padding block to be
    // copied onto all blocks to the left of the last rotated block
@@ -276,7 +278,8 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
          shift_last_block_lut_univariate->get_max_degree(0),
          params.glwe_dimension, params.polynomial_size, params.message_modulus,
          params.carry_modulus, last_block_lut_f, gpu_memory_allocated);
-      auto active_streams_shift_last = streams.active_gpu_subset(1);
+      auto active_streams_shift_last =
+          streams.active_gpu_subset(1, params.pbs_type);
      shift_last_block_lut_univariate->broadcast_lut(active_streams_shift_last);

      lut_buffers_univariate.push_back(shift_last_block_lut_univariate);
@@ -302,7 +305,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
        padding_block_lut_univariate->get_max_degree(0), params.glwe_dimension,
        params.polynomial_size, params.message_modulus, params.carry_modulus,
        padding_block_lut_f, gpu_memory_allocated);
-    // auto active_streams = streams.active_gpu_subset(1);
+    // auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
    padding_block_lut_univariate->broadcast_lut(active_streams);

    lut_buffers_univariate.push_back(padding_block_lut_univariate);
@@ -344,7 +347,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
          params.polynomial_size, params.message_modulus, params.carry_modulus,
          blocks_lut_f, gpu_memory_allocated);
      auto active_streams_shift_blocks =
-          streams.active_gpu_subset(num_radix_blocks);
+          streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
      shift_blocks_lut_bivariate->broadcast_lut(active_streams_shift_blocks);

      lut_buffers_bivariate.push_back(shift_blocks_lut_bivariate);
--- a/backends/tfhe-cuda-backend/cuda/include/integer/shift_and_rotate.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/shift_and_rotate.h
@@ -119,8 +119,8 @@ template <typename Torus> struct int_shift_and_rotate_buffer {
        mux_lut->get_degree(0), mux_lut->get_max_degree(0),
        params.glwe_dimension, params.polynomial_size, params.message_modulus,
        params.carry_modulus, mux_lut_f, gpu_memory_allocated);
-    auto active_gpu_count_mux =
-        streams.active_gpu_subset(bits_per_block * num_radix_blocks);
+    auto active_gpu_count_mux = streams.active_gpu_subset(
+        bits_per_block * num_radix_blocks, params.pbs_type);
    mux_lut->broadcast_lut(active_gpu_count_mux);

    auto cleaning_lut_f = [params](Torus x) -> Torus {
@@ -132,7 +132,7 @@ template <typename Torus> struct int_shift_and_rotate_buffer {
        params.glwe_dimension, params.polynomial_size, params.message_modulus,
        params.carry_modulus, cleaning_lut_f, gpu_memory_allocated);
    auto active_gpu_count_cleaning =
-        streams.active_gpu_subset(num_radix_blocks);
+        streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
    cleaning_lut->broadcast_lut(active_gpu_count_cleaning);
  }

--- a/Show More
+++ b/Show More