fix(gpu): fix clear shift computation in long run tests

fix(zk): add a size check for the public key
chore(ci): update lattice estimator version
2026-01-11 15:48:20 -05:00 · 2025-09-12 16:31:39 +02:00 · 2025-09-12 11:10:06 +02:00 · 2025-09-12 11:07:25 +02:00 · 2025-09-11 13:10:18 -03:00 · 2025-09-11 13:55:42 +02:00
619 changed files with 34108 additions and 10713 deletions
--- a/.github/actions/gpu_setup/action.yml
+++ b/.github/actions/gpu_setup/action.yml
@@ -23,38 +23,58 @@ runs:
        echo "${CMAKE_SCRIPT_SHA} cmake-${CMAKE_VERSION}-linux-x86_64.sh" > checksum
        sha256sum -c checksum
        sudo bash cmake-"${CMAKE_VERSION}"-linux-x86_64.sh --skip-license --prefix=/usr/ --exclude-subdir
+        sudo apt remove -y unattended-upgrades
        sudo apt update
        sudo apt install -y cmake-format libclang-dev
      env:
        CMAKE_VERSION: 3.29.6
        CMAKE_SCRIPT_SHA: "6e4fada5cba3472ae503a11232b6580786802f0879cead2741672bf65d97488a"

+    - name: Install GCC
+      if: inputs.github-instance == 'true'
+      shell: bash
+      env:
+        GCC_VERSION: ${{ inputs.gcc-version }}
+      run: |
+        sudo apt-get install gcc-"{GCC_VERSION}" g++-"{GCC_VERSION}"
+        sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"{GCC_VERSION}" 20
+        sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"{GCC_VERSION}" 20
+
+    - name: Check GCC
+      shell: bash
+      env:
+        GCC_VERSION: ${{ inputs.gcc-version }}
+      run: |
+        which gcc-"${GCC_VERSION}"
+
    - name: Install CUDA
      if: inputs.github-instance == 'true'
      shell: bash
+      env:
+        CUDA_VERSION: ${{ inputs.cuda-version }}
+        CUDA_KEYRING_PACKAGE: cuda-keyring_1.1-1_all.deb
+        CUDA_KEYRING_SHA: "d93190d50b98ad4699ff40f4f7af50f16a76dac3bb8da1eaaf366d47898ff8df"
      run: |
        # Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
        # shellcheck disable=SC2001
        TOOLKIT_VERSION="$(echo "${CUDA_VERSION}" | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
-        wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/${env.CUDA_KEYRING_PACKAGE}
+        wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/${CUDA_KEYRING_PACKAGE}
        echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
        sha256sum -c checksum
        sudo dpkg -i "${CUDA_KEYRING_PACKAGE}"
        sudo apt update
        sudo apt -y install cuda-toolkit-"${TOOLKIT_VERSION}"
-      env:
-        CUDA_VERSION: ${{ inputs.cuda-version }}
-        CUDA_KEYRING_PACKAGE: cuda-keyring_1.1-1_all.deb
-        CUDA_KEYRING_SHA: "d93190d50b98ad4699ff40f4f7af50f16a76dac3bb8da1eaaf366d47898ff8df"

    - name: Export CUDA variables
      shell: bash
      run: |
+        find /usr/local -executable -name "nvcc"
        CUDA_PATH=/usr/local/cuda-"${CUDA_VERSION}"
        {
          echo "CUDA_PATH=$CUDA_PATH";
          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH";
          echo "CUDA_MODULE_LOADER=EAGER";
+          echo "PATH=$PATH:$CUDA_PATH/bin"; 
        } >> "${GITHUB_ENV}"
        {
          echo "PATH=$PATH:$CUDA_PATH/bin"; 
@@ -74,6 +94,11 @@ runs:
      env:
        GCC_VERSION: ${{ inputs.gcc-version }}

+    - name: Check setup
+      shell: bash
+      run: |
+        which nvcc
+
    - name: Check device is detected
      shell: bash
      run: nvidia-smi
--- a/.github/workflows/approve_label.yml
+++ b/.github/workflows/approve_label.yml
@@ -1,5 +1,5 @@
 # Add labels in pull request
-name: PR label manager
+name: approve_label

 on:
  pull_request:
@@ -11,6 +11,7 @@ permissions: {}

 jobs:
  trigger-tests:
+    name: approve_label/trigger-tests
    runs-on: ubuntu-latest
    permissions:
      pull-requests: write
--- a/.github/workflows/aws_tfhe_backward_compat_tests.yml
+++ b/.github/workflows/aws_tfhe_backward_compat_tests.yml
@@ -1,5 +1,5 @@
 # Run backward compatibility tests
-name: Backward compatibility Tests on CPU
+name: aws_tfhe_backward_compat_tests

 env:
  CARGO_TERM_COLOR: always
@@ -22,13 +22,16 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
+  push:
+    branches:
+      - main

 permissions:
  contents: read

 jobs:
  setup-instance:
-    name: Setup instance (backward-compat-tests)
+    name: aws_tfhe_backward_compat_tests/setup-instance
    runs-on: ubuntu-latest
    outputs:
      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
@@ -53,21 +56,21 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  backward-compat-tests:
-    name: Backward compatibility tests
+    name: aws_tfhe_backward_compat_tests/backward-compat-tests (bpr)
    needs: [ setup-instance ]
    concurrency:
-      group: ${{ github.workflow_ref }}
-      cancel-in-progress: true
+      group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -80,7 +83,7 @@ jobs:

      - name: Retrieve data from cache
        id: retrieve-data-cache
-        uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
+        uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
        with:
          path: |
            utils/tfhe-backward-compat-data/**/*.cbor
@@ -99,7 +102,7 @@ jobs:
      - name: Store data in cache
        if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
        continue-on-error: true
-        uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
+        uses: actions/cache/save@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
        with:
          path: |
            utils/tfhe-backward-compat-data/**/*.cbor
@@ -123,7 +126,7 @@ jobs:
          SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (backward-compat-tests)
+    name: aws_tfhe_backward_compat_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, backward-compat-tests ]
    runs-on: ubuntu-latest
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -1,5 +1,5 @@
 # Run a small subset of tests to ensure quick feedback.
-name: Fast AWS Tests on CPU
+name: aws_tfhe_fast_tests

 env:
  CARGO_TERM_COLOR: always
@@ -29,6 +29,7 @@ permissions:

 jobs:
  should-run:
+    name: aws_tfhe_fast_tests/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -60,7 +61,7 @@ jobs:
      any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -132,7 +133,7 @@ jobs:
          echo "any_changed=true" >> "$GITHUB_OUTPUT"

  setup-instance:
-    name: Setup instance (fast-tests)
+    name: aws_tfhe_fast_tests/setup-instance
    if: github.event_name == 'workflow_dispatch' ||
      (github.event_name != 'workflow_dispatch' && needs.should-run.outputs.any_file_changed == 'true')
    needs: should-run
@@ -168,13 +169,13 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -216,7 +217,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
+        uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
        with:
          path: |
            ~/.nvm
@@ -229,7 +230,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
+        uses: actions/cache/save@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -288,7 +289,7 @@ jobs:
          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (fast-tests)
+    name: aws_tfhe_fast_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, fast-tests ]
    runs-on: ubuntu-latest
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -1,4 +1,4 @@
-name: AWS Unsigned Integer Tests on CPU
+name: aws_tfhe_integer_tests

 env:
  CARGO_TERM_COLOR: always
@@ -35,6 +35,7 @@ permissions:

 jobs:
  should-run:
+    name: aws_tfhe_integer_tests/should-run
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
      (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
@@ -47,7 +48,7 @@ jobs:
        steps.changed-files.outputs.integer_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -69,7 +70,7 @@ jobs:
              - .github/workflows/aws_tfhe_integer_tests.yml

  setup-instance:
-    name: Setup instance (unsigned-integer-tests)
+    name: aws_tfhe_integer_tests/setup-instance
    needs: should-run
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
@@ -100,7 +101,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  unsigned-integer-tests:
-    name: Unsigned integer tests
+    name: aws_tfhe_integer_tests/unsigned-integer-tests
    needs: setup-instance
    concurrency:
      group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
@@ -108,13 +109,13 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: "false"
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -156,7 +157,7 @@ jobs:
          SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (unsigned-integer-tests)
+    name: aws_tfhe_integer_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [setup-instance, unsigned-integer-tests]
    runs-on: ubuntu-latest
--- a/.github/workflows/aws_tfhe_noise_checks.yml
+++ b/.github/workflows/aws_tfhe_noise_checks.yml
@@ -0,0 +1,115 @@
+name: aws_tfhe_noise_checks
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  SLACKIFY_MARKDOWN: true
+  PULL_REQUEST_MD_LINK: ""
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  setup-instance:
+    name: aws_tfhe_noise_checks/setup-instance
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+    steps:
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          # We want an hpc7a more compute, will be faster
+          profile: bench
+
+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "Cannot run this without secrets"
+          exit 1
+
+  noise-checks:
+    name: aws_tfhe_noise_checks/noise-checks
+    needs: setup-instance
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    timeout-minutes: 1440
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: stable
+
+      - name: Run noise checks
+        timeout-minutes: 1440
+        run: |
+          make test_noise_check
+
+      - name: Set pull-request URL
+        if: ${{ !success() }}
+        run: |
+          echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), "  >> "${GITHUB_ENV}"
+        env:
+          PR_BASE_URL: ${{ vars.PR_BASE_URL }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+
+      - name: Slack Notification
+        if: ${{ !success() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Noise checks tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
+
+  teardown-instance:
+    name: aws_tfhe_noise_checks/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, noise-checks ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop remote instance
+        id: stop-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ !success() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (noise-checks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -1,4 +1,4 @@
-name: AWS Signed Integer Tests on CPU
+name: aws_tfhe_signed_integer_tests

 env:
  CARGO_TERM_COLOR: always
@@ -35,6 +35,7 @@ permissions:

 jobs:
  should-run:
+    name: aws_tfhe_signed_integer_tests/should-run
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
@@ -48,7 +49,7 @@ jobs:
        steps.changed-files.outputs.integer_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -70,7 +71,7 @@ jobs:
              - .github/workflows/aws_tfhe_signed_integer_tests.yml

  setup-instance:
-    name: Setup instance (unsigned-integer-tests)
+    name: aws_tfhe_signed_integer_tests/setup-instance
    needs: should-run
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
@@ -101,7 +102,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  signed-integer-tests:
-    name: Signed integer tests
+    name: aws_tfhe_signed_integer_tests/signed-integer-tests
    needs: setup-instance
    concurrency:
      group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
@@ -109,13 +110,13 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: "false"
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -161,7 +162,7 @@ jobs:
          SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (signed-integer-tests)
+    name: aws_tfhe_signed_integer_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [setup-instance, signed-integer-tests]
    runs-on: ubuntu-latest
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -1,4 +1,4 @@
-name: AWS Tests on CPU
+name: aws_tfhe_tests

 env:
  CARGO_TERM_COLOR: always
@@ -32,6 +32,7 @@ permissions:

 jobs:
  should-run:
+    name: aws_tfhe_tests/should-run
    runs-on: ubuntu-latest
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
@@ -69,7 +70,7 @@ jobs:
      any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -141,7 +142,7 @@ jobs:
          echo "any_changed=true" >> "$GITHUB_OUTPUT"

  setup-instance:
-    name: Setup instance (cpu-tests)
+    name: aws_tfhe_tests/setup-instance
    if: github.event_name != 'pull_request' ||
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.any_file_changed == 'true')
    needs: should-run
@@ -169,7 +170,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cpu-tests:
-    name: CPU tests
+    name: aws_tfhe_tests/cpu-tests
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    needs: [ should-run, setup-instance ]
@@ -179,13 +180,13 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -268,7 +269,7 @@ jobs:
          SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (cpu-tests)
+    name: aws_tfhe_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cpu-tests ]
    runs-on: ubuntu-latest
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -1,4 +1,4 @@
-name: AWS WASM Tests on CPU
+name: aws_tfhe_wasm_tests

 env:
  CARGO_TERM_COLOR: always
@@ -28,7 +28,7 @@ permissions:

 jobs:
  setup-instance:
-    name: Setup instance (wasm-tests)
+    name: aws_tfhe_wasm_tests/setup-instance
    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
    runs-on: ubuntu-latest
    outputs:
@@ -54,7 +54,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  wasm-tests:
-    name: WASM tests
+    name: aws_tfhe_wasm_tests/wasm-tests
    needs: setup-instance
    concurrency:
      group: ${{ github.workflow_ref }}
@@ -62,13 +62,13 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -78,7 +78,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
+        uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
        with:
          path: |
            ~/.nvm
@@ -91,7 +91,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
+        uses: actions/cache/save@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -137,7 +137,7 @@ jobs:
          SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (wasm-tests)
+    name: aws_tfhe_wasm_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, wasm-tests ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_boolean.yml
+++ b/.github/workflows/benchmark_boolean.yml
@@ -1,5 +1,5 @@
 # Run boolean benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: Boolean benchmarks
+name: benchmark_boolean

 on:
  workflow_dispatch:
@@ -23,7 +23,7 @@ permissions: {}

 jobs:
  setup-instance:
-    name: Setup instance (boolean-benchmarks)
+    name: benchmark_boolean/setup-instance
    runs-on: ubuntu-latest
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
@@ -42,7 +42,7 @@ jobs:
          profile: bench

  boolean-benchmarks:
-    name: Execute boolean benchmarks in EC2
+    name: benchmark_boolean/boolean-benchmarks
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
@@ -50,7 +50,7 @@ jobs:
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -68,7 +68,7 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

@@ -107,7 +107,7 @@ jobs:
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -132,7 +132,7 @@ jobs:
          SLACK_MESSAGE: "Boolean benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (boolean-benchmarks)
+    name: benchmark_boolean/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, boolean-benchmarks ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_core_crypto.yml
+++ b/.github/workflows/benchmark_core_crypto.yml
@@ -1,5 +1,5 @@
 # Run core crypto benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: Core crypto benchmarks
+name: benchmark_core_crypto

 on:
  workflow_dispatch:
@@ -23,7 +23,7 @@ permissions: {}

 jobs:
  setup-instance:
-    name: Setup instance (core-crypto-benchmarks)
+    name: benchmark_core_crypto/setup-instance
    runs-on: ubuntu-latest
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
@@ -42,7 +42,7 @@ jobs:
          profile: bench

  core-crypto-benchmarks:
-    name: Execute core crypto benchmarks in EC2
+    name: benchmark_core_crypto/core-crypto-benchmarks
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
@@ -50,7 +50,7 @@ jobs:
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -68,7 +68,7 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

@@ -100,7 +100,7 @@ jobs:
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -125,7 +125,7 @@ jobs:
          SLACK_MESSAGE: "PBS benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (core-crypto-benchmarks)
+    name: benchmark_core_crypto/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, core-crypto-benchmarks ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_dex.yml
+++ b/.github/workflows/benchmark_dex.yml
@@ -1,5 +1,5 @@
 # Run all DEX benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: DEX benchmarks
+name: benchmark_dex

 on:
  workflow_dispatch:
@@ -22,7 +22,7 @@ permissions: {}

 jobs:
  setup-instance:
-    name: Setup instance (dex-benchmarks)
+    name: benchmark_dex/setup-instance
    runs-on: ubuntu-latest
    if: github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
@@ -41,7 +41,7 @@ jobs:
          profile: bench

  dex-benchmarks:
-    name: Execute DEX benchmarks
+    name: benchmark_dex/dex-benchmarks
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
@@ -50,7 +50,7 @@ jobs:
    timeout-minutes: 720  # 12 hours
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -68,12 +68,12 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -146,7 +146,7 @@ jobs:
          SLACK_MESSAGE: "DEX benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (dex-benchmarks)
+    name: benchmark_dex/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, dex-benchmarks ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_erc20.yml
+++ b/.github/workflows/benchmark_erc20.yml
@@ -1,5 +1,5 @@
 # Run all ERC20 benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: ERC20 benchmarks
+name: benchmark_erc20

 on:
  workflow_dispatch:
@@ -23,7 +23,7 @@ permissions: {}

 jobs:
  setup-instance:
-    name: Setup instance (erc20-benchmarks)
+    name: benchmark_erc20/setup-instance
    runs-on: ubuntu-latest
    if: github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
@@ -42,7 +42,7 @@ jobs:
          profile: bench

  erc20-benchmarks:
-    name: Execute ERC20 benchmarks
+    name: benchmark_erc20/erc20-benchmarks
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
@@ -51,7 +51,7 @@ jobs:
    timeout-minutes: 720  # 12 hours
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -69,12 +69,12 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -129,7 +129,7 @@ jobs:
          SLACK_MESSAGE: "ERC20 benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (erc20-benchmarks)
+    name: benchmark_erc20/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, erc20-benchmarks ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_gpu.yml
+++ b/.github/workflows/benchmark_gpu.yml
@@ -1,5 +1,5 @@
 # Run CUDA benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
-name: Cuda benchmarks
+name: benchmark_gpu

 on:
  workflow_dispatch:
@@ -66,6 +66,7 @@ permissions: {}

 jobs:
  parse-inputs:
+    name: benchmark_gpu/parse-inputs
    runs-on: ubuntu-latest
    outputs:
      profile: ${{ steps.parse_profile.outputs.profile }}
@@ -90,7 +91,7 @@ jobs:
          echo "name=${NAME}" >> "${GITHUB_OUTPUT}"

  run-benchmarks:
-    name: Run benchmarks
+    name: benchmark_gpu/run-benchmarks
    needs: parse-inputs
    uses: ./.github/workflows/benchmark_gpu_common.yml
    with:
--- a/.github/workflows/benchmark_gpu_4090.yml
+++ b/.github/workflows/benchmark_gpu_4090.yml
@@ -1,5 +1,5 @@
 # Run benchmarks on an RTX 4090 machine and return parsed results to Slab CI bot.
-name: TFHE Cuda Backend - 4090 benchmarks
+name: benchmark_gpu_4090

 env:
  CARGO_TERM_COLOR: always
@@ -27,7 +27,7 @@ permissions:

 jobs:
  cuda-integer-benchmarks:
-    name: Cuda integer benchmarks (RTX 4090)
+    name: benchmark_gpu_4090/cuda-integer-benchmarks
    if: ${{ github.event_name == 'workflow_dispatch' ||
      github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs' ||
      contains(github.event.label.name, '4090_bench') }}
@@ -38,7 +38,7 @@ jobs:
    timeout-minutes: 1440 # 24 hours
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -57,12 +57,12 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -111,7 +111,7 @@ jobs:
          SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  cuda-core-crypto-benchmarks:
-    name: Cuda core crypto benchmarks  (RTX 4090)
+    name: benchmark_gpu_4090/cuda-core-crypto-benchmarks
    if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || contains(github.event.label.name, '4090_bench') }}
    needs: cuda-integer-benchmarks
    concurrency:
@@ -122,7 +122,7 @@ jobs:

    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -140,12 +140,12 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -195,7 +195,7 @@ jobs:
          SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  remove_github_label:
-    name: Remove 4090 bench label
+    name: benchmark_gpu_4090/remove_github_label
    if: ${{ always() && github.event_name == 'pull_request' }}
    needs: [cuda-integer-benchmarks, cuda-core-crypto-benchmarks]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_gpu_common.yml
+++ b/.github/workflows/benchmark_gpu_common.yml
@@ -1,5 +1,5 @@
 # Run benchmarks on CUDA instance and return parsed results to Slab CI bot.
-name: Cuda benchmarks - common
+name: benchmark_gpu_common

 on:
  workflow_call:
@@ -63,7 +63,7 @@ permissions: {}

 jobs:
  prepare-matrix:
-    name: Prepare operations matrix
+    name: benchmark_gpu_common/prepare-matrix
    runs-on: ubuntu-latest
    outputs:
      command: ${{ steps.set_command.outputs.command }}
@@ -141,7 +141,7 @@ jobs:
          echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}" >> "${GITHUB_OUTPUT}"

  setup-instance:
-    name: Setup instance (cuda-${{ inputs.profile }}-benchmarks)
+    name: benchmark_gpu_common/setup-instance
    needs: prepare-matrix
    runs-on: ubuntu-latest
    outputs:
@@ -185,18 +185,18 @@ jobs:

  # Install dependencies only once since cuda-benchmarks uses a matrix strategy, thus running multiple times.
  install-dependencies:
-    name: Install dependencies
+    name: benchmark_gpu_common/install-dependencies
    needs: [ setup-instance ]
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
      matrix:
        # explicit include-based build matrix, of known valid options
        include:
-          - cuda: "12.2"
+          - cuda: "12.8"
            gcc: 11
    steps:
      - name: Checkout tfhe-rs repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -210,7 +210,7 @@ jobs:
          gcc-version: ${{ matrix.gcc }}

  cuda-benchmarks:
-    name: Cuda benchmarks (${{ inputs.profile }})
+    name: benchmark_gpu_common/cuda-benchmarks
    needs: [ prepare-matrix, setup-instance, install-dependencies ]
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    timeout-minutes: 1440 # 24 hours
@@ -224,13 +224,13 @@ jobs:
        params_type: ${{ fromJSON(needs.prepare-matrix.outputs.params_type) }}
        # explicit include-based build matrix, of known valid options
        include:
-          - cuda: "12.2"
+          - cuda: "12.8"
            gcc: 11
    env:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -269,7 +269,7 @@ jobs:
          GCC_VERSION: ${{ matrix.gcc }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

@@ -312,7 +312,7 @@ jobs:
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -329,7 +329,7 @@ jobs:
          SLAB_URL: ${{ secrets.SLAB_URL }}

  slack-notify:
-    name: Slack Notification
+    name: benchmark_gpu_common/slack-notify
    needs: [ setup-instance, cuda-benchmarks ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-benchmarks.result != 'skipped' && failure() }}
@@ -342,7 +342,7 @@ jobs:
          SLACK_MESSAGE: "Cuda benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (cuda-${{ inputs.profile }}-benchmarks)
+    name: benchmark_gpu_common/teardown-instance
    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
    needs: [ setup-instance, cuda-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_gpu_dex.yml
+++ b/.github/workflows/benchmark_gpu_dex.yml
@@ -1,5 +1,5 @@
 # Run CUDA DEX benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
-name: Cuda DEX benchmarks
+name: benchmark_gpu_dex/

 on:
  workflow_dispatch:
@@ -23,6 +23,7 @@ permissions: {}

 jobs:
  parse-inputs:
+    name: benchmark_gpu_dex/parse-inputs
    runs-on: ubuntu-latest
    outputs:
      profile: ${{ steps.parse_profile.outputs.profile }}
@@ -47,7 +48,7 @@ jobs:
          echo "name=${NAME}" >> "${GITHUB_OUTPUT}"

  run-benchmarks:
-    name: Run benchmarks
+    name: benchmark_gpu_dex/run-benchmarks
    needs: parse-inputs
    uses: ./.github/workflows/benchmark_gpu_dex_common.yml
    with:
--- a/.github/workflows/benchmark_gpu_dex_common.yml
+++ b/.github/workflows/benchmark_gpu_dex_common.yml
@@ -1,5 +1,5 @@
 # Run DEX benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
-name: Cuda DEX benchmarks - common
+name: benchmark_gpu_dex_common

 on:
  workflow_call:
@@ -47,7 +47,7 @@ permissions: {}

 jobs:
  setup-instance:
-    name: Setup instance (cuda-dex-benchmarks)
+    name: benchmark_gpu_dex_common/setup-instance
    runs-on: ubuntu-latest
    if:  github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
@@ -91,7 +91,7 @@ jobs:
          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"

  cuda-dex-benchmarks:
-    name: Cuda DEX benchmarks (${{ inputs.profile }})
+    name: benchmark_gpu_dex_common/cuda-dex-benchmarks
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -100,11 +100,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -129,7 +129,7 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

@@ -160,7 +160,7 @@ jobs:
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -177,7 +177,7 @@ jobs:
          SLAB_URL: ${{ secrets.SLAB_URL }}

  slack-notify:
-    name: Slack Notification
+    name: benchmark_gpu_dex_common/slack-notify
    needs: [ setup-instance, cuda-dex-benchmarks ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-dex-benchmarks.result != 'skipped' && failure() }}
@@ -190,7 +190,7 @@ jobs:
          SLACK_MESSAGE: "Cuda DEX benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-dex-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (cuda-dex-${{ inputs.profile }}-benchmarks)
+    name: benchmark_gpu_dex_common/teardown-instance
    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
    needs: [ setup-instance, cuda-dex-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_gpu_dex_weekly.yml
+++ b/.github/workflows/benchmark_gpu_dex_weekly.yml
@@ -1,5 +1,5 @@
 # Run CUDA DEX benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot.
-name: Cuda DEX weekly benchmarks
+name: benchmark_gpu_dex_weekly

 on:
  schedule:
@@ -10,7 +10,7 @@ permissions: {}

 jobs:
  run-benchmarks-1-h100:
-    name: Run benchmarks (1xH100)
+    name: benchmark_gpu_dex_weekly/run-benchmarks-1-h100
    if: github.repository == 'zama-ai/tfhe-rs'
    uses: ./.github/workflows/benchmark_gpu_dex_common.yml
    with:
@@ -27,7 +27,7 @@ jobs:
      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}

  run-benchmarks-2-h100:
-    name: Run benchmarks (2xH100)
+    name: benchmark_gpu_dex_weekly/run-benchmarks-2-h100
    if: github.repository == 'zama-ai/tfhe-rs'
    uses: ./.github/workflows/benchmark_gpu_dex_common.yml
    with:
@@ -44,7 +44,7 @@ jobs:
      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}

  run-benchmarks-8-h100:
-    name: Run benchmarks (8xH100)
+    name: benchmark_gpu_dex_weekly/run-benchmarks-8-h100
    if: github.repository == 'zama-ai/tfhe-rs'
    uses: ./.github/workflows/benchmark_gpu_dex_common.yml
    with:
--- a/.github/workflows/benchmark_gpu_erc20.yml
+++ b/.github/workflows/benchmark_gpu_erc20.yml
@@ -1,5 +1,5 @@
 # Run CUDA ERC20 benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
-name: Cuda ERC20 benchmarks
+name: benchmark_gpu_erc20

 on:
  workflow_dispatch:
@@ -24,6 +24,7 @@ permissions: {}

 jobs:
  parse-inputs:
+    name: benchmark_gpu_erc20/parse-inputs
    runs-on: ubuntu-latest
    outputs:
      profile: ${{ steps.parse_profile.outputs.profile }}
@@ -48,7 +49,7 @@ jobs:
          echo "name=${NAME}" >> "${GITHUB_OUTPUT}"

  run-benchmarks:
-    name: Run benchmarks
+    name: benchmark_gpu_erc20/run-benchmarks
    needs: parse-inputs
    uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
    with:
--- a/.github/workflows/benchmark_gpu_erc20_common.yml
+++ b/.github/workflows/benchmark_gpu_erc20_common.yml
@@ -1,5 +1,5 @@
 # Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
-name: Cuda ERC20 benchmarks - common
+name: benchmark_gpu_erc20_common

 on:
  workflow_call:
@@ -48,7 +48,7 @@ permissions: {}

 jobs:
  setup-instance:
-    name: Setup instance (cuda-erc20-benchmarks)
+    name: benchmark_gpu_erc20_common/setup-instance
    runs-on: ubuntu-latest
    if:  github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
@@ -92,7 +92,7 @@ jobs:
          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"

  cuda-erc20-benchmarks:
-    name: Cuda ERC20 benchmarks (${{ inputs.profile }})
+    name: benchmark_gpu_erc20_common/cuda-erc20-benchmarks
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -101,11 +101,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -130,7 +130,7 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

@@ -161,7 +161,7 @@ jobs:
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -178,7 +178,7 @@ jobs:
          SLAB_URL: ${{ secrets.SLAB_URL }}

  slack-notify:
-    name: Slack Notification
+    name: benchmark_gpu_erc20_common/slack-notify
    needs: [ setup-instance, cuda-erc20-benchmarks ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-erc20-benchmarks.result != 'skipped' && failure() }}
@@ -191,7 +191,7 @@ jobs:
          SLACK_MESSAGE: "Cuda ERC20 benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (cuda-erc20-${{ inputs.profile }}-benchmarks)
+    name: benchmark_gpu_erc20_common/teardown-instance
    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
    needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_gpu_erc20_weekly.yml
+++ b/.github/workflows/benchmark_gpu_erc20_weekly.yml
@@ -1,5 +1,5 @@
 # Run CUDA ERC20 benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot.
-name: Cuda ERC20 weekly benchmarks
+name: benchmark_gpu_erc20_weekly

 on:
  schedule:
@@ -11,7 +11,7 @@ permissions: {}

 jobs:
  run-benchmarks-1-h100:
-    name: Run benchmarks (1xH100)
+    name: benchmark_gpu_erc20_weekly/run-benchmarks-1-h100
    if: github.repository == 'zama-ai/tfhe-rs'
    uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
    with:
@@ -28,7 +28,7 @@ jobs:
      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}

  run-benchmarks-2-h100:
-    name: Run benchmarks (2xH100)
+    name: benchmark_gpu_erc20_weekly/run-benchmarks-2-h100
    if: github.repository == 'zama-ai/tfhe-rs'
    uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
    with:
@@ -45,7 +45,7 @@ jobs:
      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}

  run-benchmarks-8-h100:
-    name: Run benchmarks (8xH100)
+    name: benchmark_gpu_erc20_weekly/run-benchmarks-8-h100
    if: github.repository == 'zama-ai/tfhe-rs'
    uses: ./.github/workflows/benchmark_gpu_erc20_common.yml
    with:
--- a/.github/workflows/benchmark_gpu_weekly.yml
+++ b/.github/workflows/benchmark_gpu_weekly.yml
@@ -1,5 +1,5 @@
 # Run CUDA benchmarks on multiple Hyperstack VMs and return parsed results to Slab CI bot.
-name: Cuda weekly benchmarks
+name: benchmark_gpu_weekly

 on:
  schedule:
@@ -11,7 +11,7 @@ permissions: {}

 jobs:
  run-benchmarks-8-h100-sxm5-integer:
-    name: Run integer benchmarks (8xH100-SXM5)
+    name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer
    if: github.repository == 'zama-ai/tfhe-rs'
    uses: ./.github/workflows/benchmark_gpu_common.yml
    with:
@@ -32,7 +32,7 @@ jobs:
      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}

  run-benchmarks-8-h100-sxm5-integer-compression:
-    name: Run integer compression benchmarks (8xH100-SXM5)
+    name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer-compression
    if: github.repository == 'zama-ai/tfhe-rs'
    uses: ./.github/workflows/benchmark_gpu_common.yml
    with:
@@ -53,7 +53,7 @@ jobs:
      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}

  run-benchmarks-8-h100-sxm5-integer-zk:
-    name: Run integer zk benchmarks (8xH100-SXM5)
+    name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer-zk
    if: github.repository == 'zama-ai/tfhe-rs'
    uses: ./.github/workflows/benchmark_gpu_common.yml
    with:
@@ -74,7 +74,7 @@ jobs:
      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}

  run-benchmarks-8-h100-sxm5-noise-squash:
-    name: Run integer zk benchmarks (8xH100-SXM5)
+    name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-noise-squash
    if: github.repository == 'zama-ai/tfhe-rs'
    uses: ./.github/workflows/benchmark_gpu_common.yml
    with:
@@ -95,7 +95,7 @@ jobs:
      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}

  run-benchmarks-1-h100-core-crypto:
-    name: Run core-crypto benchmarks (1xH100)
+    name: benchmark_gpu_weekly/run-benchmarks-1-h100-core-crypto (1xH100)
    if: github.repository == 'zama-ai/tfhe-rs'
    uses: ./.github/workflows/benchmark_gpu_common.yml
    with:
--- a/.github/workflows/benchmark_hpu_hlapi.yml
+++ b/.github/workflows/benchmark_hpu_hlapi.yml
@@ -0,0 +1,98 @@
+# Run all integer benchmarks on a permanent HPU instance and return parsed results to Slab CI bot.
+name: Hpu Hlapi Benchmarks
+
+on:
+  workflow_dispatch:
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+
+permissions: {}
+
+jobs:
+  hlapi-benchmarks-hpu:
+    name: Execute HLAPI benchmarks for HPU backend
+    runs-on: v80-desktop
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    timeout-minutes: 1440  # 24 hours
+    steps:
+      # Needed as long as hw_regmap repository is private
+      - name: Configure SSH
+        uses: webfactory/ssh-agent@a6f90b1f127823b31d4d4a8d96047790581349bd # v0.9.1
+        with:
+          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          lfs: true
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Get benchmark details
+        run: |
+          COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=${COMMIT_DATE}";
+            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+          } >> "${GITHUB_ENV}"
+        env:
+          SHA: ${{ github.sha }}
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Run benchmarks
+        run: |
+          make pull_hpu_files
+          export V80_SERIAL_NUMBER=XFL12E4XJXWK
+          source /opt/xilinx/Vivado/2024.2/settings64.sh
+          make bench_hlapi_erc20_hpu
+          make bench_hlapi_hpu
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
+          --database tfhe_rs \
+          --hardware "hpu_x1" \
+          --backend hpu \
+          --project-version "${COMMIT_HASH}" \
+          --branch "${REF_NAME}" \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${BENCH_DATE}" \
+          --walk-subdirs
+        env:
+          REF_NAME: ${{ github.ref_name }}
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
+        with:
+          name: ${{ github.sha }}_hlapi_benchmarks
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
+          --slab-url "${SLAB_URL}"
+        env:
+          JOB_SECRET: ${{ secrets.JOB_SECRET }}
+          SLAB_URL: ${{ secrets.SLAB_URL }}
--- a/.github/workflows/benchmark_hpu_integer.yml
+++ b/.github/workflows/benchmark_hpu_integer.yml
@@ -1,8 +1,20 @@
 # Run all integer benchmarks on a permanent HPU instance and return parsed results to Slab CI bot.
-name: Hpu Integer Benchmarks
+name: benchmark_hpu_integer

 on:
  workflow_dispatch:
+    inputs:
+      all_precisions:
+        description: "Run all precisions"
+        type: boolean
+      bench_type:
+        description: "Benchmarks type"
+        type: choice
+        default: both
+        options:
+          - latency
+          - throughput
+          - both

 env:
  CARGO_TERM_COLOR: always
@@ -10,17 +22,51 @@ env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUST_BACKTRACE: "full"
  RUST_MIN_STACK: "8388608"
+  FAST_BENCH: TRUE

 permissions: {}

 jobs:
+  prepare-matrix:
+    name: Prepare operations matrix
+    runs-on: v80-desktop
+    outputs:
+      bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
+    steps:
+      - name: Set benchmark types
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          if [[ -z $INPUTS_BENCH_TYPE || "${INPUTS_BENCH_TYPE}" == "both" ]]; then
+            echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
+          else
+            echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}"
+          fi
+        env:
+          INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}
+
+      - name: Default benchmark type
+        if: github.event_name != 'workflow_dispatch'
+        run: |
+          echo "BENCH_TYPE=[\"latency\"]" >> "${GITHUB_ENV}"
+
+
+      - name: Set benchmark types output
+        id: set_bench_type
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
+
  integer-benchmarks-hpu:
-    name: Execute integer & erc20 benchmarks for HPU backend
+    name: benchmark_hpu_integer/integer-benchmarks-hpu
+    needs: prepare-matrix
    runs-on: v80-desktop
    concurrency:
      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    timeout-minutes: 1440  # 24 hours
+    strategy:
+      max-parallel: 1
+      matrix:
+        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
    steps:
      # Needed as long as hw_regmap repository is private
      - name: Configure SSH
@@ -29,7 +75,7 @@ jobs:
          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -48,23 +94,31 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

+      - name: Should run benchmarks with all precisions
+        if: inputs.all_precisions
+        run: |
+          echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
+
      - name: Run benchmarks
        run: |
          make pull_hpu_files
-          make bench_integer_hpu
-          make bench_hlapi_erc20_hpu
+          export V80_SERIAL_NUMBER=XFL12E4XJXWK
+          source /opt/xilinx/Vivado/2024.2/settings64.sh
+          make BENCH_TYPE="${BENCH_TYPE}" bench_integer_hpu
+        env:
+          BENCH_TYPE: ${{ matrix.bench_type }}

      - name: Parse results
        run: |
@@ -76,14 +130,16 @@ jobs:
          --branch "${REF_NAME}" \
          --commit-date "${COMMIT_DATE}" \
          --bench-date "${BENCH_DATE}" \
-          --walk-subdirs
+          --walk-subdirs \
+          --bench-type "${BENCH_TYPE}"
        env:
          REF_NAME: ${{ github.ref_name }}
+          BENCH_TYPE: ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
        with:
-          name: ${{ github.sha }}_integer_benchmarks
+          name: ${{ github.sha }}_${{ matrix.bench_type }}_integer_benchmarks
          path: ${{ env.RESULTS_FILENAME }}

      - name: Send data to Slab
--- a/.github/workflows/benchmark_integer.yml
+++ b/.github/workflows/benchmark_integer.yml
@@ -1,5 +1,5 @@
 # Run all integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: Integer benchmarks
+name: benchmark_integer

 on:
  workflow_dispatch:
@@ -41,7 +41,7 @@ permissions: {}

 jobs:
  prepare-matrix:
-    name: Prepare operations matrix
+    name: benchmark_integer/prepare-matrix
    runs-on: ubuntu-latest
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
@@ -87,7 +87,7 @@ jobs:
          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"

  setup-instance:
-    name: Setup instance (integer-benchmarks)
+    name: benchmark_integer/setup-instance
    needs: prepare-matrix
    runs-on: ubuntu-latest
    outputs:
@@ -105,7 +105,7 @@ jobs:
          profile: bench

  integer-benchmarks:
-    name: Execute integer benchmarks for all operations flavor
+    name: benchmark_integer/integer-benchmarks
    needs: [ prepare-matrix, setup-instance ]
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
@@ -120,7 +120,7 @@ jobs:
        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -138,12 +138,12 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -211,7 +211,7 @@ jobs:
          SLACK_MESSAGE: "Integer full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (integer-benchmarks)
+    name: benchmark_integer/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, integer-benchmarks ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_shortint.yml
+++ b/.github/workflows/benchmark_shortint.yml
@@ -1,5 +1,5 @@
 # Run all shortint benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: Shortint full benchmarks
+name: benchmark_shortint

 on:
  workflow_dispatch:
@@ -27,7 +27,7 @@ permissions: {}

 jobs:
  prepare-matrix:
-    name: Prepare operations matrix
+    name: benchmark_shortint/prepare-matrix
    runs-on: ubuntu-latest
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
@@ -51,7 +51,7 @@ jobs:
          echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"

  setup-instance:
-    name: Setup instance (shortint-benchmarks)
+    name: benchmark_shortint/setup-instance
    needs: prepare-matrix
    runs-on: ubuntu-latest
    outputs:
@@ -69,7 +69,7 @@ jobs:
          profile: bench

  shortint-benchmarks:
-    name: Execute shortint benchmarks for all operations flavor
+    name: benchmark_shortint/shortint-benchmarks
    needs: [ prepare-matrix, setup-instance ]
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
@@ -81,7 +81,7 @@ jobs:
        op_flavor: ${{ fromJson(needs.prepare-matrix.outputs.op_flavor) }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -99,12 +99,12 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -168,7 +168,7 @@ jobs:
          SLACK_MESSAGE: "Shortint full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (shortint-benchmarks)
+    name: benchmark_shortint/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, shortint-benchmarks ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_signed_integer.yml
+++ b/.github/workflows/benchmark_signed_integer.yml
@@ -1,5 +1,5 @@
 # Run all signed integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: Signed Integer full benchmarks
+name: benchmark_signed_integer

 on:
  workflow_dispatch:
@@ -41,7 +41,7 @@ permissions: {}

 jobs:
  prepare-matrix:
-    name: Prepare operations matrix
+    name: benchmark_signed_integer/prepare-matrix
    runs-on: ubuntu-latest
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
@@ -87,7 +87,7 @@ jobs:
          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"

  setup-instance:
-    name: Setup instance (signed-integer-benchmarks)
+    name: benchmark_signed_integer/setup-instance
    needs: prepare-matrix
    runs-on: ubuntu-latest
    outputs:
@@ -105,7 +105,7 @@ jobs:
          profile: bench

  signed-integer-benchmarks:
-    name: Execute signed integer benchmarks for all operations flavor
+    name: benchmark_signed_integer/signed-integer-benchmarks
    needs: [ prepare-matrix, setup-instance ]
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
@@ -120,7 +120,7 @@ jobs:
        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -138,12 +138,12 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -203,7 +203,7 @@ jobs:
          SLACK_MESSAGE: "Signed integer full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (integer-benchmarks)
+    name: benchmark_signed_integer/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, signed-integer-benchmarks ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_tfhe_fft.yml
+++ b/.github/workflows/benchmark_tfhe_fft.yml
@@ -1,5 +1,5 @@
 # Run FFT benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: FFT benchmarks
+name: benchmark_tfhe_fft

 env:
  CARGO_TERM_COLOR: always
@@ -27,8 +27,8 @@ on:
 permissions: {}

 jobs:
-  setup-ec2:
-    name: Setup EC2 instance (fft-benchmarks)
+  setup-instance:
+    name: benchmark_tfhe_fft/setup-instance
    runs-on: ubuntu-latest
    outputs:
      runner-name: ${{ steps.start-instance.outputs.label }}
@@ -45,15 +45,15 @@ jobs:
          profile: bench

  fft-benchmarks:
-    name: Execute FFT benchmarks in EC2
-    needs: setup-ec2
+    name: benchmark_tfhe_fft/fft-benchmarks
+    needs: setup-instance
    concurrency:
      group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
      cancel-in-progress: true
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -100,7 +100,7 @@ jobs:
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -124,10 +124,10 @@ jobs:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "tfhe-fft benchmarks failed. (${{ env.ACTION_RUN_URL }})"

-  teardown-ec2:
-    name: Teardown EC2 instance (fft-benchmarks)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, fft-benchmarks ]
+  teardown-instance:
+    name: benchmark_tfhe_fft/teardown-instance
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    needs: [ setup-instance, fft-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
@@ -138,7 +138,7 @@ jobs:
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}

      - name: Slack Notification
        if: ${{ failure() }}
@@ -146,4 +146,4 @@ jobs:
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (fft-benchmarks) failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (fft-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_tfhe_ntt.yml
+++ b/.github/workflows/benchmark_tfhe_ntt.yml
@@ -1,5 +1,5 @@
 # Run NTT benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: NTT benchmarks
+name: benchmark_tfhe_ntt

 env:
  CARGO_TERM_COLOR: always
@@ -27,8 +27,8 @@ on:
 permissions: {}

 jobs:
-  setup-ec2:
-    name: Setup EC2 instance (ntt-benchmarks)
+  setup-instance:
+    name: benchmark_tfhe_ntt/setup-instance
    runs-on: ubuntu-latest
    outputs:
      runner-name: ${{ steps.start-instance.outputs.label }}
@@ -45,15 +45,15 @@ jobs:
          profile: bench

  ntt-benchmarks:
-    name: Execute NTT benchmarks in EC2
-    needs: setup-ec2
+    name: benchmark_tfhe_ntt/ntt-benchmarks
+    needs: setup-instance
    concurrency:
      group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
      cancel-in-progress: true
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -100,7 +100,7 @@ jobs:
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -124,10 +124,10 @@ jobs:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "tfhe-ntt benchmarks failed. (${{ env.ACTION_RUN_URL }})"

-  teardown-ec2:
-    name: Teardown EC2 instance (ntt-benchmarks)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [setup-ec2, ntt-benchmarks]
+  teardown-instance:
+    name: benchmark_tfhe_ntt/teardown-instance
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    needs: [setup-instance, ntt-benchmarks]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
@@ -138,7 +138,7 @@ jobs:
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}

      - name: Slack Notification
        if: ${{ failure() }}
@@ -146,4 +146,4 @@ jobs:
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (ntt-benchmarks) failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "EC2 teardown (ntt-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_tfhe_zk_pok.yml
+++ b/.github/workflows/benchmark_tfhe_zk_pok.yml
@@ -1,5 +1,5 @@
 # Run benchmarks of the tfhe-zk-pok crate on an instance and return parsed results to Slab CI bot.
-name: tfhe-zk-pok benchmarks
+name: benchmark_tfhe_zk_pok

 on:
  workflow_dispatch:
@@ -35,6 +35,7 @@ permissions: {}

 jobs:
  should-run:
+    name: benchmark_tfhe_zk_pok/should-run
    runs-on: ubuntu-latest
    if: github.event_name == 'workflow_dispatch' ||
      ((github.event_name == 'push' || github.event_name == 'schedule') && github.repository == 'zama-ai/tfhe-rs')
@@ -42,7 +43,7 @@ jobs:
      zk_pok_changed: ${{ steps.changed-files.outputs.zk_pok_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -58,7 +59,7 @@ jobs:
              - .github/workflows/benchmark_tfhe_zk_pok.yml

  setup-instance:
-    name: Setup instance (tfhe-zk-pok-benchmarks)
+    name: benchmark_tfhe_zk_pok/setup-instance
    runs-on: ubuntu-latest
    needs: should-run
    if: github.event_name == 'workflow_dispatch' ||
@@ -81,7 +82,7 @@ jobs:
          profile: bench

  tfhe-zk-pok-benchmarks:
-    name: Execute tfhe-zk-pok benchmarks
+    name: benchmark_tfhe_zk_pok/tfhe-zk-pok-benchmarks
    if: needs.setup-instance.result != 'skipped'
    needs: setup-instance
    concurrency:
@@ -90,7 +91,7 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -108,12 +109,12 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -148,7 +149,7 @@ jobs:
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -173,7 +174,7 @@ jobs:
          SLACK_MESSAGE: "tfhe-zk-pok benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (tfhe-zk-pok-benchmarks)
+    name: benchmark_tfhe_zk_pok/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, tfhe-zk-pok-benchmarks ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_wasm_client.yml
+++ b/.github/workflows/benchmark_wasm_client.yml
@@ -1,5 +1,5 @@
 # Run WASM client benchmarks on an instance and return parsed results to Slab CI bot.
-name: WASM client benchmarks
+name: benchmark_wasm_client

 on:
  workflow_dispatch:
@@ -26,6 +26,7 @@ permissions: {}

 jobs:
  should-run:
+    name: benchmark_wasm_client/should-run
    runs-on: ubuntu-latest
    if: github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
@@ -36,7 +37,7 @@ jobs:
      wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -57,7 +58,7 @@ jobs:
              - .github/workflows/wasm_client_benchmark.yml

  setup-instance:
-    name: Setup instance (wasm-client-benchmarks)
+    name: benchmark_wasm_client/setup-instance
    if: github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.wasm_bench)
@@ -78,7 +79,7 @@ jobs:
          profile: cpu-small

  wasm-client-benchmarks:
-    name: Execute WASM client benchmarks
+    name: benchmark_wasm_client/wasm-client-benchmarks
    needs: setup-instance
    if: needs.setup-instance.result != 'skipped'
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
@@ -88,7 +89,7 @@ jobs:
        browser: [ chrome, firefox ]
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -106,7 +107,7 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

@@ -116,7 +117,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
+        uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
        with:
          path: |
            ~/.nvm
@@ -129,7 +130,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
+        uses: actions/cache/save@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -185,7 +186,7 @@ jobs:
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -210,7 +211,7 @@ jobs:
          SLACK_MESSAGE: "WASM benchmarks (${{ matrix.browser }}) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (wasm-client-benchmarks)
+    name: benchmark_wasm_client/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, wasm-client-benchmarks ]
    runs-on: ubuntu-latest
--- a/.github/workflows/benchmark_zk_pke.yml
+++ b/.github/workflows/benchmark_zk_pke.yml
@@ -1,5 +1,5 @@
 # Run PKE Zero-Knowledge benchmarks on an instance and return parsed results to Slab CI bot.
-name: PKE ZK benchmarks
+name: benchmark_zk_pke

 on:
  workflow_dispatch:
@@ -36,6 +36,7 @@ permissions: {}

 jobs:
  should-run:
+    name: benchmark_zk_pke/should-run
    runs-on: ubuntu-latest
    if: github.event_name == 'workflow_dispatch' ||
      ((github.event_name == 'push' || github.event_name == 'schedule') && github.repository == 'zama-ai/tfhe-rs')
@@ -43,7 +44,7 @@ jobs:
      zk_pok_changed: ${{ steps.changed-files.outputs.zk_pok_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -67,7 +68,7 @@ jobs:
              - .github/workflows/zk_pke_benchmark.yml

  prepare-matrix:
-    name: Prepare operations matrix
+    name: benchmark_zk_pke/prepare-matrix
    runs-on: ubuntu-latest
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
@@ -96,7 +97,7 @@ jobs:
          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"

  setup-instance:
-    name: Setup instance (pke-zk-benchmarks)
+    name: benchmark_zk_pke/setup-instance
    runs-on: ubuntu-latest
    needs: [ should-run, prepare-matrix ]
    if: github.event_name == 'workflow_dispatch' ||
@@ -119,7 +120,7 @@ jobs:
          profile: bench

  pke-zk-benchmarks:
-    name: Execute PKE ZK benchmarks
+    name: benchmark_zk_pke/pke-zk-benchmarks
    if: needs.setup-instance.result != 'skipped'
    needs: [ prepare-matrix, setup-instance ]
    concurrency:
@@ -132,7 +133,7 @@ jobs:
        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
    steps:
      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -150,12 +151,12 @@ jobs:
          SHA: ${{ github.sha }}

      - name: Install rust
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -198,7 +199,7 @@ jobs:
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
@@ -223,7 +224,7 @@ jobs:
          SLACK_MESSAGE: "PKE ZK benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (pke-zk-benchmarks)
+    name: benchmark_zk_pke/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, pke-zk-benchmarks ]
    runs-on: ubuntu-latest
--- a/.github/workflows/cargo_build.yml
+++ b/.github/workflows/cargo_build.yml
@@ -1,4 +1,4 @@
-name: Cargo Build TFHE-rs
+name: cargo_build

 on:
  pull_request:
@@ -19,6 +19,7 @@ permissions:

 jobs:
  cargo-builds:
+    name: cargo_build/cargo-builds (bpr)
    runs-on: ${{ matrix.os }}

    strategy:
@@ -29,13 +30,13 @@ jobs:
      fail-fast: false

    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -49,14 +50,6 @@ jobs:
          mv linelint-linux-amd64 /usr/local/bin/linelint
          make check_newline

-      # This is needed for the ws tests clippy checks
-      - name: Use specific data branch
-        if: ${{ contains(github.event.pull_request.labels.*.name, 'data_PR') }}
-        env:
-          PR_BRANCH: ${{ github.head_ref || github.ref_name }}
-        run: |
-          echo "BACKWARD_COMPAT_DATA_BRANCH=${PR_BRANCH}" >> "${GITHUB_ENV}"
-
      - name: Run pcc checks
        if: ${{ contains(matrix.os, 'ubuntu') }}
        run: |
@@ -67,6 +60,11 @@ jobs:
        run: |
          make build_tfhe_csprng

+      - name: Build with MSRV
+        if: ${{ contains(matrix.os, 'ubuntu') }}
+        run: |
+          make build_tfhe_msrv
+
      - name: Build Release core
        if: ${{ contains(matrix.os, 'ubuntu') }}
        run: |
--- a/.github/workflows/cargo_build_tfhe_fft.yml
+++ b/.github/workflows/cargo_build_tfhe_fft.yml
@@ -1,5 +1,5 @@
 # Build tfhe-fft
-name: Cargo Build tfhe-fft
+name: cargo_build_tfhe_fft

 on:
  pull_request:
@@ -17,6 +17,7 @@ permissions:

 jobs:
  cargo-builds-fft:
+    name: cargo_build_tfhe_fft/cargo-builds-fft (bpr)
    runs-on: ${{ matrix.runner_type }}

    strategy:
@@ -25,7 +26,7 @@ jobs:
      fail-fast: false

    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/cargo_build_tfhe_ntt.yml
+++ b/.github/workflows/cargo_build_tfhe_ntt.yml
@@ -1,5 +1,5 @@
 # Build tfhe-ntt
-name: Cargo Build tfhe-ntt
+name: cargo_build_tfhe_ntt

 on:
  pull_request:
@@ -17,13 +17,14 @@ permissions:

 jobs:
  cargo-builds-ntt:
+    name: cargo_build_tfhe_ntt/cargo-builds-ntt (bpr)
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
      fail-fast: false
    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/cargo_test_fft.yml
+++ b/.github/workflows/cargo_test_fft.yml
@@ -1,5 +1,5 @@
 # Test tfhe-fft
-name: Cargo Test tfhe-fft
+name: cargo_test_fft

 on:
  pull_request:
@@ -21,6 +21,7 @@ permissions:

 jobs:
  should-run:
+    name: cargo_test_fft/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -28,7 +29,7 @@ jobs:
      fft_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.fft_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -46,6 +47,7 @@ jobs:
              - '.github/workflows/cargo_test_fft.yml'

  cargo-tests-fft:
+    name: cargo_test_fft/cargo-tests-fft
    needs: should-run
    if: needs.should-run.outputs.fft_test == 'true'
    runs-on: ${{ matrix.runner_type }}
@@ -54,7 +56,7 @@ jobs:
        runner_type: [ ubuntu-latest, macos-latest, windows-latest ]
      fail-fast: false
    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -77,6 +79,7 @@ jobs:
          make test_fft_no_std

  cargo-tests-fft-nightly:
+    name: cargo_test_fft/cargo-tests-fft-nightly
    needs: should-run
    if: needs.should-run.outputs.fft_test == 'true'
    runs-on: ${{ matrix.runner_type }}
@@ -84,7 +87,7 @@ jobs:
      matrix:
        runner_type: [ ubuntu-latest, macos-latest, windows-latest ]
    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -104,11 +107,12 @@ jobs:
          make test_fft_no_std_nightly

  cargo-tests-fft-node-js:
+    name: cargo_test_fft/cargo-tests-fft-node-js
    needs: should-run
    if: needs.should-run.outputs.fft_test == 'true'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -119,6 +123,7 @@ jobs:
          make test_fft_node_js_ci

  cargo-tests-fft-successful:
+    name: cargo_test_fft/cargo-tests-fft-successful (bpr)
    needs: [ should-run, cargo-tests-fft, cargo-tests-fft-nightly, cargo-tests-fft-node-js ]
    if: ${{ always() }}
    runs-on: ubuntu-latest
--- a/.github/workflows/cargo_test_ntt.yml
+++ b/.github/workflows/cargo_test_ntt.yml
@@ -1,5 +1,5 @@
 # Test tfhe-ntt
-name: Cargo Test tfhe-ntt
+name: cargo_test_ntt

 on:
  pull_request:
@@ -11,6 +11,7 @@ env:
  CARGO_TERM_COLOR: always
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}

 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
@@ -21,6 +22,7 @@ permissions:

 jobs:
  should-run:
+    name: cargo_test_ntt/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -28,10 +30,10 @@ jobs:
      ntt_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ntt_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
+          persist-credentials: "false"
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
@@ -45,18 +47,48 @@ jobs:
              - tfhe-ntt/**
              - '.github/workflows/cargo_test_ntt.yml'

-  cargo-tests-ntt:
+  setup-instance:
+    name: cargo_test_ntt/setup-instance
    needs: should-run
    if: needs.should-run.outputs.ntt_test == 'true'
+    runs-on: ubuntu-latest
+    outputs:
+      matrix_os: ${{ steps.set-os-matrix.outputs.matrix_os }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label }}
+    steps:
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          profile: cpu-small
+
+      - name: Set os matrix
+        id: set-os-matrix
+        env:
+          SLAB_INSTANCE: ${{ steps.start-remote-instance.outputs.label }}
+        run: |
+          INSTANCE_TO_USE="${SLAB_INSTANCE:-ubuntu-latest}"
+          echo "matrix_os=[\"${INSTANCE_TO_USE}\", \"macos-latest\", \"windows-latest\"]" >> "$GITHUB_OUTPUT"
+
+  cargo-tests-ntt:
+    name: cargo_test_ntt/cargo-tests-ntt
+    needs: [should-run, setup-instance]
+    if: needs.should-run.outputs.ntt_test == 'true'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ ubuntu-latest, macos-latest, windows-latest ]
+        os: ${{fromJson(needs.setup-instance.outputs.matrix_os)}}
      fail-fast: false
    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
-          persist-credentials: 'false'
+          persist-credentials: "false"
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install Rust
@@ -72,16 +104,17 @@ jobs:
        run: make test_ntt_no_std

  cargo-tests-ntt-nightly:
-    needs: should-run
+    name: cargo_test_ntt/cargo-tests-ntt-nightly
+    needs: [should-run, setup-instance]
    if: needs.should-run.outputs.ntt_test == 'true'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ ubuntu-latest, macos-latest, windows-latest ]
+        os: ${{fromJson(needs.setup-instance.outputs.matrix_os)}}
    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
-          persist-credentials: 'false'
+          persist-credentials: "false"
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install Rust
@@ -97,7 +130,8 @@ jobs:
        run: make test_ntt_no_std_nightly

  cargo-tests-ntt-successful:
-    needs: [ should-run, cargo-tests-ntt, cargo-tests-ntt-nightly ]
+    name: cargo_test_ntt/cargo-tests-ntt-successful (bpr)
+    needs: [should-run, cargo-tests-ntt, cargo-tests-ntt-nightly]
    if: ${{ always() }}
    runs-on: ubuntu-latest
    steps:
@@ -120,3 +154,28 @@ jobs:
        run: |
          echo "Some tfhe-ntt tests failed"
          exit 1
+
+  teardown-instance:
+    name: cargo_test_ntt/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [setup-instance, cargo-tests-ntt-successful]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop remote instance
+        id: stop-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (cargo-tests-ntt) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/check_commit.yml
+++ b/.github/workflows/check_commit.yml
@@ -1,5 +1,5 @@
 # Check commit and PR compliance
-name: Check commit and PR compliance
+name: check_commit
 on:
  pull_request:

@@ -7,7 +7,7 @@ permissions: {}

 jobs:
  check-commit-pr:
-    name: Check commit and PR
+    name: check_commit/check-commit-pr (bpr)
    runs-on: ubuntu-latest
    permissions:
      contents: read
--- a/.github/workflows/ci_lint.yml
+++ b/.github/workflows/ci_lint.yml
@@ -1,5 +1,5 @@
 # Lint and check CI
-name: CI Lint and Checks
+name: ci_lint

 on:
  pull_request:
@@ -14,11 +14,11 @@ permissions:

 jobs:
  lint-check:
-    name: Lint and checks
+    name: ci_lint/lint-check (bpr)
    runs-on: ubuntu-latest
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -1,4 +1,4 @@
-name: Code Coverage
+name: code_coverage

 env:
  CARGO_TERM_COLOR: always
@@ -22,7 +22,7 @@ permissions:

 jobs:
  setup-instance:
-    name: Setup instance (code-coverage)
+    name: code_coverage/setup-instance
    runs-on: ubuntu-latest
    outputs:
      runner-name: ${{ steps.start-instance.outputs.label }}
@@ -38,8 +38,8 @@ jobs:
          backend: aws
          profile: cpu-small

-  code-coverage:
-    name: Code coverage tests
+  code-coverage-tests:
+    name: code_coverage/code-coverage-tests
    needs: setup-instance
    concurrency:
      group: ${{ github.workflow_ref }}_${{ github.event_name }}
@@ -48,13 +48,13 @@ jobs:
    timeout-minutes: 5760 # 4 days
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -90,7 +90,7 @@ jobs:
          make test_shortint_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24
+        uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -104,7 +104,7 @@ jobs:
          make test_integer_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24
+        uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -121,9 +121,9 @@ jobs:
          SLACK_MESSAGE: "Code coverage finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (code-coverage)
+    name: code_coverage/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
-    needs: [ setup-instance, code-coverage ]
+    needs: [ setup-instance, code-coverage-tests ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
@@ -142,4 +142,4 @@ jobs:
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (code-coverage) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (code-coverage-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/coprocessor-benchmark-gpu.yml
+++ b/.github/workflows/coprocessor-benchmark-gpu.yml
@@ -0,0 +1,291 @@
+# Run all fhevm coprocessor benchmarks on a GPU instance on Hyperstack and return parsed results to Slab CI bot.
+name: coprocessor-benchmark-gpu
+
+on:
+  workflow_dispatch:
+  schedule:
+    # Weekly tests @ 1AM
+    - cron: "0 1 * * 6"
+
+permissions:
+  contents: read
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  PROFILE: "multi-h100-sxm5 (n3-H100x8-SXM5)"
+  BENCHMARK_TYPE: "ALL"
+  OPTIMIZATION_TARGET: "throughput"
+  BATCH_SIZE: "5000"
+  SCHEDULING_POLICY: "MAX_PARALLELISM"
+  BENCHMARKS: "erc20"
+  BRANCH_NAME: ${{ github.ref_name }}
+  COMMIT_SHA: ${{ github.sha }}
+  SLAB_SECRET: ${{ secrets.JOB_SECRET }}
+
+jobs:
+  parse-inputs:
+    name: coprocessor-benchmark-gpu/parse-inputs
+    runs-on: ubuntu-latest
+    permissions:
+      contents: 'read'
+    outputs:
+      profile: ${{ steps.parse_profile.outputs.profile }}
+      hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
+    steps:
+      - name: Parse profile
+        id: parse_profile
+        run: |
+          # shellcheck disable=SC2001
+          PROFILE_VAL=$(echo "${PROFILE}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
+          echo "profile=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
+
+      - name: Parse hardware name
+        id: parse_hardware_name
+        run: |
+          # shellcheck disable=SC2001
+          PROFILE_VAL=$(echo "${PROFILE}" | sed 's|.*[[:space:]](\(.*\))|\1|')
+          echo "name=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
+
+  setup-instance:
+    name: coprocessor-benchmark-gpu/setup-instance
+    needs: parse-inputs
+    runs-on: ubuntu-latest
+    permissions:
+      contents: 'read'
+    outputs:
+      runner-name: ${{ steps.start-remote-instance.outputs.label }}
+    steps:
+      - name: Start remote instance
+        id: start-remote-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: hyperstack
+          profile: ${{ needs.parse-inputs.outputs.profile }}
+
+  benchmark-gpu:
+    name: coprocessor-benchmark-gpu/benchmark-gpu (bpr)
+    needs: [ parse-inputs, setup-instance ]
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    continue-on-error: true
+    timeout-minutes: 720  # 12 hours
+    permissions:
+      contents: 'read'
+      packages: 'read'
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.8"
+            gcc: 11
+    env:
+      HW_NAME: "${{ needs.parse-inputs.outputs.hardware_name }}"
+
+    steps:
+      - name: Install git LFS
+        run: |
+          sudo apt-get remove -y unattended-upgrades
+          sudo apt-get update
+          sudo apt-get install -y git-lfs protobuf-compiler
+          git lfs install
+
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        with:
+          path: tfhe-rs
+          persist-credentials: false
+
+      - name: Check fhEVM and TFHE-rs repos
+        run: |
+          pwd
+          ls
+
+      - name: Checkout fhevm
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        with:
+          repository: zama-ai/fhevm
+          persist-credentials: 'false'
+          fetch-depth: 0
+          lfs: true
+          ref: antoniu/use-tfhe-main-benches
+          path: fhevm
+
+      - name: Get benchmark details
+        run: |
+          COMMIT_DATE_ENV=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${COMMIT_SHA}")
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=$COMMIT_DATE_ENV";
+            echo "COMMIT_HASH=$(git rev-parse HEAD)";
+          } >> "${GITHUB_ENV}"
+        working-directory: tfhe-rs/
+
+      - name: Check fhEVM and TFHE-rs repos
+        run: |
+          pwd
+          ls
+          mv tfhe-rs fhevm/coprocessor/
+
+      - name: Checkout LFS objects
+        run: git lfs checkout
+        working-directory: fhevm/
+
+      - name: Setup Hyperstack dependencies
+        uses: ./fhevm/.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: nightly
+
+      - name: Install cargo dependencies
+        run: |
+          sudo apt-get install -y protobuf-compiler cmake pkg-config libssl-dev \
+                                  libclang-dev docker-compose-v2 docker.io acl
+          sudo usermod -aG docker "$USER"
+          newgrp docker
+          sudo setfacl --modify user:"$USER":rw /var/run/docker.sock
+          cargo install sqlx-cli
+
+      - name: Install foundry
+        uses: foundry-rs/foundry-toolchain@82dee4ba654bd2146511f85f0d013af94670c4de
+
+      - name: Cache cargo
+        uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            target
+          key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: ${{ runner.os }}-cargo-
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Init database
+        run: make init_db
+        working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
+
+      - name: Use Node.js
+        uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
+        with:
+          node-version: 20.x
+
+      - name: Build contracts
+        env:
+          HARDHAT_NETWORK: hardhat
+        run: |
+          ls
+          pwd
+          cp ./host-contracts/.env.example ./host-contracts/.env
+          npm --prefix ./host-contracts ci --include=optional
+          cd host-contracts && npm install && npm run deploy:emptyProxies && npx hardhat compile
+        working-directory: fhevm/
+
+      - name: Profile erc20 no-cmux benchmark on GPU
+        run: |
+          BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "profile_erc20_gpu"
+        working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
+
+      - name: Get nsys profile name
+        id: nsys_profile_name
+        run: echo "profile=coprocessor_profile_$(date +"%Y-%m-%d-%Hh").nsys-rep" >> "$GITHUB_OUTPUT"
+
+      - name: Timestamp nsys profile # zizmor: ignore[template-injection]
+        env:
+          REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
+        run: |
+          mv report1.nsys-rep ${{ env.REPORT_NAME }}
+        working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
+
+      - name: Upload profile artifact
+        env:
+          REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
+        with:
+          name: ${{ env.REPORT_NAME }}
+          path: fhevm/coprocessor/fhevm-engine/coprocessor/${{ env.REPORT_NAME }}
+
+      - name: Run latency benchmark on GPU
+        run: |
+          BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
+        working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
+
+      - name: Run throughput benchmarks on GPU
+        run: |
+          BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="THROUGHPUT_200" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
+        working-directory: fhevm/coprocessor/fhevm-engine/coprocessor
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py coprocessor/fhevm-engine/target/criterion "${RESULTS_FILENAME}" \
+          --database coprocessor \
+          --hardware "${HW_NAME}" \
+          --backend gpu \
+          --project-version "${COMMIT_HASH}" \
+          --branch "${BRANCH_NAME}" \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${BENCH_DATE}" \
+          --walk-subdirs \
+          --crate "coprocessor/fhevm-engine/coprocessor" \
+          --name-suffix "operation_batch_size_${BATCH_SIZE}-schedule_${SCHEDULING_POLICY}-optimization_target_${OPTIMIZATION_TARGET}"
+        working-directory: fhevm/
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
+        with:
+          name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
+          path: fhevm/$${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        env:
+          SLAB_URL: ${{ secrets.SLAB_URL }}
+        run: |
+          python3 slab/scripts/data_sender.py fhevm/"${RESULTS_FILENAME}" "${SLAB_SECRET}" \
+          --slab-url "${SLAB_URL}"
+
+  teardown-instance:
+    name: coprocessor-benchmark-gpu/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, benchmark-gpu ]
+    runs-on: ubuntu-latest
+    permissions:
+      contents: 'read'
+    steps:
+      - name: Stop remote instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
--- a/.github/workflows/csprng_randomness_tests.yml
+++ b/.github/workflows/csprng_randomness_tests.yml
@@ -1,4 +1,4 @@
-name: CSPRNG randomness testing Workflow
+name: csprng_randomness_tests

 env:
  CARGO_TERM_COLOR: always
@@ -26,7 +26,7 @@ permissions:

 jobs:
  setup-instance:
-    name: Setup instance (csprng-randomness-tests)
+    name: csprng_randomness_tests/setup-instance
    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
    runs-on: ubuntu-latest
    outputs:
@@ -52,7 +52,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  csprng-randomness-tests:
-    name: CSPRNG randomness tests
+    name: csprng_randomness_tests/csprng-randomness-tests
    needs: setup-instance
    concurrency:
      group: ${{ github.workflow_ref }}
@@ -60,13 +60,13 @@ jobs:
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -83,7 +83,7 @@ jobs:
          SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (csprng-randomness-tests)
+    name: csprng_randomness_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, csprng-randomness-tests ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_4090_tests.yml
+++ b/.github/workflows/gpu_4090_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an RTX 4090 machine
-name: Cuda - 4090 full tests
+name: gpu_4090_tests

 env:
  CARGO_TERM_COLOR: always
@@ -27,7 +27,7 @@ permissions:

 jobs:
  cuda-tests-linux:
-    name: CUDA tests (RTX 4090)
+    name: gpu_4090_tests/cuda-tests-linux
    if: github.event_name == 'workflow_dispatch' ||
      contains(github.event.label.name, '4090_test') ||
      (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs')
@@ -39,13 +39,13 @@ jobs:

    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

--- a/.github/workflows/gpu_code_validation_tests.yml
+++ b/.github/workflows/gpu_code_validation_tests.yml
@@ -0,0 +1,152 @@
+# Compile and test tfhe-cuda-backend on an AWS instance
+name: gpu_code_validation_tests
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  SLACKIFY_MARKDOWN: true
+  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  PULL_REQUEST_MD_LINK: ""
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+  schedule:
+    # every 3 months
+    - cron: "0 0 1 */3 *"
+
+permissions:
+  contents: read
+
+jobs:
+  setup-instance:
+    name: gpu_code_validation_tests/setup-instance
+    runs-on: ubuntu-latest
+    if: github.event_name != 'pull_request' ||
+      (github.event.action == 'labeled' && github.event.label.name == 'approved')
+    outputs:
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+    steps:
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: hyperstack
+          profile: gpu-test
+
+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
+
+  cuda-tests-linux:
+    name: gpu_code_validation_tests/cuda-tests-linux
+    needs: [ setup-instance ]
+    if: github.event_name != 'pull_request' ||
+      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
+    concurrency:
+      group: ${{ github.workflow_ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    timeout-minutes: 5760
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.8"
+            gcc: 11 
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}
+
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Find tools
+        run: |
+          sudo apt update && sudo apt install -y valgrind 
+          find /usr -executable -name "compute-sanitizer"
+          which valgrind
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: stable
+
+      - name: Run memory sanitizer
+        run: |
+          make test_high_level_api_gpu_valgrind
+
+  slack-notify:
+    name: gpu_code_validation_tests/slack-notify
+    needs: [ setup-instance, cuda-tests-linux ]
+    runs-on: ubuntu-latest
+    if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
+    continue-on-error: true
+    steps:
+      - name: Set pull-request URL
+        if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
+        run: |
+          echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), "  >> "${GITHUB_ENV}"
+        env:
+          PR_BASE_URL: ${{ vars.PR_BASE_URL }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+
+      - name: Send message
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
+          SLACK_MESSAGE: "GPU Memory Checks tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
+
+  teardown-instance:
+    name: gpu_code_validation_tests/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, cuda-tests-linux ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop remote instance
+        id: stop-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_fast_h100_tests.yml
+++ b/.github/workflows/gpu_fast_h100_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
-name: Cuda - Fast tests on H100
+name: gpu_fast_h100_tests

 env:
  CARGO_TERM_COLOR: always
@@ -30,6 +30,7 @@ permissions:

 jobs:
  should-run:
+    name: gpu_fast_h100_tests/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -37,7 +38,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -66,7 +67,7 @@ jobs:
              - ci/slab.toml

  setup-instance:
-    name: Setup instance (cuda-h100-tests)
+    name: gpu_fast_h100_tests/setup-instance
    needs: should-run
    if: github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
@@ -108,7 +109,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-tests-linux:
-    name: CUDA H100 tests
+    name: gpu_fast_h100_tests/cuda-tests-linux
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
@@ -122,11 +123,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -140,10 +141,12 @@ jobs:
          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable
-
+      - name: Enable nvidia multi-process service
+        run: |
+          nvidia-cuda-mps-control -d
      - name: Run core crypto and internal CUDA backend tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
@@ -163,7 +166,7 @@ jobs:
          BIG_TESTS_INSTANCE=TRUE make test_high_level_api_gpu

  slack-notify:
-    name: Slack Notification
+    name: gpu_fast_h100_tests/slack-notify
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
@@ -185,7 +188,7 @@ jobs:
          SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (cuda-h100-tests)
+    name: gpu_fast_h100_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_fast_tests.yml
+++ b/.github/workflows/gpu_fast_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an AWS instance
-name: Cuda - Fast tests
+name: gpu_fast_tests

 env:
  CARGO_TERM_COLOR: always
@@ -29,6 +29,7 @@ permissions:

 jobs:
  should-run:
+    name: gpu_fast_tests/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -36,7 +37,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -65,7 +66,7 @@ jobs:
              - ci/slab.toml

  setup-instance:
-    name: Setup instance (cuda-tests)
+    name: gpu_fast_tests/setup-instance
    needs: should-run
    if: github.event_name == 'workflow_dispatch' ||
      needs.should-run.outputs.gpu_test == 'true'
@@ -93,7 +94,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-tests-linux:
-    name: CUDA tests
+    name: gpu_fast_tests/cuda-tests-linux
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
@@ -107,11 +108,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -124,10 +125,14 @@ jobs:
          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

+      - name: Enable nvidia multi-process service
+        run: |
+          nvidia-cuda-mps-control -d
+
      - name: Run core crypto and internal CUDA backend tests
        run: |
          make test_core_crypto_gpu
@@ -147,7 +152,7 @@ jobs:
          make test_high_level_api_gpu

  slack-notify:
-    name: Slack Notification
+    name: gpu_fast_tests/slack-notify
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
@@ -169,7 +174,7 @@ jobs:
          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (cuda-tests)
+    name: gpu_fast_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_full_h100_tests.yml
+++ b/.github/workflows/gpu_full_h100_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
-name: Cuda - Full tests on H100
+name: gpu_full_h100_tests

 env:
  CARGO_TERM_COLOR: always
@@ -20,7 +20,7 @@ permissions: {}

 jobs:
  setup-instance:
-    name: Setup instance (cuda-h100-tests)
+    name: gpu_full_h100_tests/setup-instance
    runs-on: ubuntu-latest
    outputs:
      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
@@ -50,7 +50,7 @@ jobs:
          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"

  cuda-tests-linux:
-    name: CUDA H100 tests
+    name: gpu_full_h100_tests/cuda-tests-linux
    needs: [ setup-instance ]
    concurrency:
      group: ${{ github.workflow_ref }}
@@ -62,11 +62,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
@@ -79,10 +79,12 @@ jobs:
          gcc-version: ${{ matrix.gcc }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable
-
+      - name: Enable nvidia multi-process service
+        run: |
+          nvidia-cuda-mps-control -d
      - name: Run core crypto, integer and internal CUDA backend tests
        run: |
          make test_gpu
@@ -100,7 +102,7 @@ jobs:
          make test_high_level_api_gpu

  slack-notify:
-    name: Slack Notification
+    name: gpu_full_h100_tests/slack-notify
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    if: ${{ failure() }}
@@ -113,7 +115,7 @@ jobs:
          SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (cuda-h100-tests)
+    name: gpu_full_h100_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_full_multi_gpu_tests.yml
+++ b/.github/workflows/gpu_full_multi_gpu_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an AWS instance
-name: Cuda - Full tests multi-GPU
+name: gpu_full_multi_gpu_tests

 env:
  CARGO_TERM_COLOR: always
@@ -30,6 +30,7 @@ permissions:

 jobs:
  should-run:
+    name: gpu_full_multi_gpu_tests/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -37,7 +38,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -66,7 +67,7 @@ jobs:
              - ci/slab.toml

  setup-instance:
-    name: Setup instance (cuda-tests-multi-gpu)
+    name: gpu_full_multi_gpu_tests/setup-instance
    needs: should-run
    if: github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
@@ -95,7 +96,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-tests-linux:
-    name: CUDA multi-GPU tests
+    name: gpu_full_multi_gpu_tests/cuda-tests-linux
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
@@ -109,11 +110,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -126,10 +127,12 @@ jobs:
          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable
-
+      - name: Enable nvidia multi-process service
+        run: |
+          nvidia-cuda-mps-control -d
      - name: Run multi-bit CUDA integer compression tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_integer_compression_gpu
@@ -152,7 +155,7 @@ jobs:
          make test_high_level_api_gpu

  slack-notify:
-    name: Slack Notification
+    name: gpu_full_multi_gpu_tests/slack-notify
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
@@ -174,7 +177,7 @@ jobs:
          SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (cuda-tests-multi-gpu)
+    name: gpu_full_multi_gpu_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_integer_long_run_tests.yml
+++ b/.github/workflows/gpu_integer_long_run_tests.yml
@@ -1,4 +1,4 @@
-name: Cuda - Long Run Tests on GPU
+name: gpu_integer_long_run_tests

 env:
  CARGO_TERM_COLOR: always
@@ -11,6 +11,7 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  IS_PR: ${{ github.event_name == 'pull_request' }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -18,13 +19,15 @@ on:
  schedule:
    # Nightly tests will be triggered each evening 8p.m.
    - cron: "0 20 * * *"
+  pull_request:
+

 permissions:
  contents: read

 jobs:
  setup-instance:
-    name: Setup instance (gpu-tests)
+    name: gpu_integer_long_run_tests/setup-instance
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
    runs-on: ubuntu-latest
@@ -43,7 +46,7 @@ jobs:
          profile: multi-gpu-test

  cuda-tests:
-    name: Long run GPU tests
+    name: gpu_integer_long_run_tests/cuda-tests
    needs: [ setup-instance ]
    concurrency:
      group: ${{ github.workflow_ref }}_${{github.event_name}}
@@ -55,12 +58,12 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11 
    timeout-minutes: 4320 # 72 hours
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -72,16 +75,22 @@ jobs:
          gcc-version: ${{ matrix.gcc }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable
-
+      - name: Enable nvidia multi-process service
+        run: |
+          nvidia-cuda-mps-control -d
      - name: Run tests
        run: |
-          make test_integer_long_run_gpu
+          if [[ "${IS_PR}" == "true" ]]; then
+            make test_integer_short_run_gpu
+          else
+            make test_integer_long_run_gpu
+          fi

  slack-notify:
-    name: Slack Notification
+    name: gpu_integer_long_run_tests/slack-notify
    needs: [ setup-instance, cuda-tests ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-tests.result != 'skipped' && failure() }}
@@ -94,7 +103,7 @@ jobs:
          SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (gpu-tests)
+    name: gpu_integer_long_run_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_memory_sanitizer.yml
+++ b/.github/workflows/gpu_memory_sanitizer.yml
@@ -0,0 +1,149 @@
+# Compile and test tfhe-cuda-backend on an AWS instance
+name: gpu_memory_sanitizer
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  SLACKIFY_MARKDOWN: true
+  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  PULL_REQUEST_MD_LINK: ""
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  pull_request:
+    types: [ labeled ]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  setup-instance:
+    name: gpu_memory_sanitizer/setup-instance
+    runs-on: ubuntu-latest
+    if: github.event_name != 'pull_request' ||
+      (github.event.action == 'labeled' && github.event.label.name == 'approved')
+    outputs:
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+    steps:
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: hyperstack
+          profile: gpu-test
+
+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
+
+  cuda-tests-linux:
+    name: gpu_memory_sanitizer/cuda-tests-linux
+    needs: [ setup-instance ]
+    if: github.event_name != 'pull_request' ||
+      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
+    concurrency:
+      group: ${{ github.workflow_ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    timeout-minutes: 240
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.8"
+            gcc: 11 
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}
+
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Find tools
+        run: |
+          find /usr -executable -name "compute-sanitizer"
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: stable
+
+      - name: Run memory sanitizer
+        run: |
+          make test_high_level_api_gpu_sanitizer
+
+  slack-notify:
+    name: gpu_memory_sanitizer/slack-notify
+    needs: [ setup-instance, cuda-tests-linux ]
+    runs-on: ubuntu-latest
+    if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
+    continue-on-error: true
+    steps:
+      - name: Set pull-request URL
+        if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
+        run: |
+          echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), "  >> "${GITHUB_ENV}"
+        env:
+          PR_BASE_URL: ${{ vars.PR_BASE_URL }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+
+      - name: Send message
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
+          SLACK_MESSAGE: "GPU Memory Checks tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
+
+  teardown-instance:
+    name: gpu_memory_sanitizer/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, cuda-tests-linux ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop remote instance
+        id: stop-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_pcc.yml
+++ b/.github/workflows/gpu_pcc.yml
@@ -1,5 +1,5 @@
 # Perform tfhe-cuda-backend post-commit checks on an AWS instance
-name: Cuda - Post-commit Checks
+name: gpu_pcc

 env:
  CARGO_TERM_COLOR: always
@@ -28,7 +28,7 @@ permissions:

 jobs:
  setup-instance:
-    name: Setup instance (cuda-pcc)
+    name: gpu_pcc/setup-instance
    runs-on: ubuntu-latest
    outputs:
      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
@@ -53,7 +53,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-pcc:
-    name: CUDA post-commit checks
+    name: gpu_pcc/cuda-pcc (bpr)
    needs: setup-instance
    concurrency:
      group: ${{ github.workflow_ref }}
@@ -72,7 +72,7 @@ jobs:

    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -94,7 +94,7 @@ jobs:
          CUDA_VERSION: ${{ matrix.cuda }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -149,7 +149,7 @@ jobs:
          SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (cuda-pcc)
+    name: cuda_pcc/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-pcc ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_signed_integer_classic_tests.yml
+++ b/.github/workflows/gpu_signed_integer_classic_tests.yml
@@ -1,5 +1,5 @@
 # Signed integer GPU tests on an RTXA6000 VM on hyperstack with classical PBS
-name: Cuda - Signed integer tests with classical PBS
+name: gpu_signed_integer_classic_tests

 env:
  CARGO_TERM_COLOR: always
@@ -30,6 +30,7 @@ permissions:

 jobs:
  should-run:
+    name: gpu_signed_integer_classic_tests/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -37,7 +38,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -66,7 +67,7 @@ jobs:
              - ci/slab.toml

  setup-instance:
-    name: Setup instance (cuda-signed-classic-tests)
+    name: gpu_signed_integer_classic_tests/setup-instance
    needs: should-run
    if: github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
@@ -95,7 +96,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-tests-linux:
-    name: CUDA signed integer tests with classical PBS
+    name: gpu_signed_integer_classic_tests/cuda-tests-linux
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
@@ -109,11 +110,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -126,16 +127,18 @@ jobs:
          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable
-
+      - name: Enable nvidia multi-process service
+        run: |
+          nvidia-cuda-mps-control -d
      - name: Run signed integer tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci

  slack-notify:
-    name: Slack Notification
+    name: gpu_signed_integer_classic_tests/slack-notify
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
@@ -157,7 +160,7 @@ jobs:
          SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (cuda-signed-classic-tests)
+    name: gpu_signed_integer_classic_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_signed_integer_h100_tests.yml
+++ b/.github/workflows/gpu_signed_integer_h100_tests.yml
@@ -1,5 +1,5 @@
 # Signed integer GPU tests on an H100 VM on hyperstack
-name: Cuda - Signed integer tests on H100
+name: gpu_signed_integer_h100_tests

 env:
  CARGO_TERM_COLOR: always
@@ -30,6 +30,7 @@ permissions:

 jobs:
  should-run:
+    name: gpu_signed_integer_h100_tests/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -37,7 +38,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -66,7 +67,7 @@ jobs:
              - ci/slab.toml

  setup-instance:
-    name: Setup instance (cuda-h100-tests)
+    name: gpu_signed_integer_h100_tests/setup-instance
    needs: should-run
    if: github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
@@ -108,7 +109,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-tests-linux:
-    name: CUDA H100 signed integer tests
+    name: gpu_signed_integer_h100_tests/cuda-tests-linux
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
@@ -122,11 +123,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -140,16 +141,18 @@ jobs:
          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable
-
+      - name: Enable nvidia multi-process service
+        run: |
+          nvidia-cuda-mps-control -d
      - name: Run signed integer multi-bit tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci

  slack-notify:
-    name: Slack Notification
+    name: gpu_signed_integer_h100_tests/slack-notify
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
@@ -171,7 +174,7 @@ jobs:
          SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (cuda-h100-tests)
+    name: gpu_signed_integer_h100_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_signed_integer_tests.yml
+++ b/.github/workflows/gpu_signed_integer_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend signed integer on an AWS instance
-name: Cuda - Signed integer tests
+name: gpu_signed_integer_tests

 env:
  CARGO_TERM_COLOR: always
@@ -31,6 +31,7 @@ permissions:

 jobs:
  should-run:
+    name: gpu_signed_integer_tests/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -38,7 +39,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -67,7 +68,7 @@ jobs:
              - ci/slab.toml

  setup-instance:
-    name: Setup instance (cuda-signed-integer-tests)
+    name: gpu_signed_integer_tests/setup-instance
    runs-on: ubuntu-latest
    needs: should-run
    if: (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
@@ -96,7 +97,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-signed-integer-tests:
-    name: CUDA signed integer tests
+    name: gpu_signed_integer_tests/cuda-signed-integer-tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
@@ -110,11 +111,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -127,10 +128,12 @@ jobs:
          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable
-
+      - name: Enable nvidia multi-process service
+        run: |
+          nvidia-cuda-mps-control -d
      - name: Should run nightly tests
        if: github.event_name == 'schedule'
        run: |
@@ -144,7 +147,7 @@ jobs:
          make test_signed_integer_multi_bit_gpu_ci

  slack-notify:
-    name: Slack Notification
+    name: gpu_signed_integer_tests/slack-notify
    needs: [ setup-instance, cuda-signed-integer-tests ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-signed-integer-tests.result != 'skipped' && failure() }}
@@ -166,7 +169,7 @@ jobs:
          SLACK_MESSAGE: "Signed GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (cuda-tests)
+    name: gpu_signed_integer_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-signed-integer-tests ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_unsigned_integer_classic_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_classic_tests.yml
@@ -1,5 +1,5 @@
 # Test unsigned integers on an RTXA6000 VM on hyperstack with the classical PBS
-name: Cuda - Unsigned integer tests with classical PBS
+name: gpu_unsigned_integer_classic_tests

 env:
  CARGO_TERM_COLOR: always
@@ -30,6 +30,7 @@ permissions:

 jobs:
  should-run:
+    name: gpu_unsigned_integer_classic_tests/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -37,7 +38,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -66,7 +67,7 @@ jobs:
              - ci/slab.toml

  setup-instance:
-    name: Setup instance (cuda-unsigned-classic-tests)
+    name: gpu_unsigned_integer_classic_tests/setup-instance
    needs: should-run
    if: github.event_name == 'workflow_dispatch' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
@@ -95,7 +96,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-tests-linux:
-    name: CUDA unsigned integer tests with classical PBS
+    name: gpu_unsigned_integer_classic_tests/cuda-tests-linux
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
@@ -109,11 +110,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -126,16 +127,18 @@ jobs:
          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable
-
+      - name: Enable nvidia multi-process service
+        run: |
+          nvidia-cuda-mps-control -d
      - name: Run unsigned integer tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci

  slack-notify:
-    name: Slack Notification
+    name: gpu_unsigned_integer_classic_tests/slack-notify
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
@@ -157,7 +160,7 @@ jobs:
          SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (cuda-unsigned-classic-tests)
+    name: gpu_unsigned_integer_classic_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_unsigned_integer_h100_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_h100_tests.yml
@@ -1,5 +1,5 @@
 # Test unsigned integers on an H100 VM on hyperstack
-name: Cuda - Unsigned integer tests on H100
+name: gpu_unsigned_integer_h100_tests/

 env:
  CARGO_TERM_COLOR: always
@@ -30,6 +30,7 @@ permissions:

 jobs:
  should-run:
+    name: gpu_unsigned_integer_h100_tests/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -37,7 +38,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -66,7 +67,7 @@ jobs:
              - ci/slab.toml

  setup-instance:
-    name: Setup instance (cuda-h100-tests)
+    name: gpu_unsigned_integer_h100_tests/setup-instance
    needs: should-run
    if: github.event_name == 'workflow_dispatch' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
@@ -108,7 +109,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-tests-linux:
-    name: CUDA H100 unsigned integer tests
+    name: gpu_unsigned_integer_h100_tests/cuda-tests-linux
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
@@ -122,11 +123,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11 
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -140,16 +141,18 @@ jobs:
          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable
-
+      - name: Enable nvidia multi-process service
+        run: |
+          nvidia-cuda-mps-control -d
      - name: Run unsigned integer multi-bit tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci

  slack-notify:
-    name: Slack Notification
+    name: gpu_unsigned_integer_h100_tests/slack-notify
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
@@ -171,7 +174,7 @@ jobs:
          SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (cuda-h100-tests)
+    name: gpu_unsigned_integer_h100_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
--- a/.github/workflows/gpu_unsigned_integer_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend unsigned integer on an AWS instance
-name: Cuda - Unsigned integer tests
+name: gpu_unsigned_integer_tests

 env:
  CARGO_TERM_COLOR: always
@@ -31,6 +31,7 @@ permissions:

 jobs:
  should-run:
+    name: gpu_unsigned_integer_tests/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -38,7 +39,7 @@ jobs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -67,7 +68,7 @@ jobs:
              - ci/slab.toml

  setup-instance:
-    name: Setup instance (cuda-unsigned-integer-tests)
+    name: gpu_unsigned_integer_tests/setup-instance
    runs-on: ubuntu-latest
    needs: should-run
    if: (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
@@ -96,7 +97,7 @@ jobs:
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-unsigned-integer-tests:
-    name: CUDA unsigned integer tests
+    name: gpu_unsigned_integer_tests/cuda-unsigned-integer-tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
@@ -110,11 +111,11 @@ jobs:
      matrix:
        include:
          - os: ubuntu-22.04
-            cuda: "12.2"
+            cuda: "12.8"
            gcc: 11
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
@@ -127,10 +128,12 @@ jobs:
          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable
-
+      - name: Enable nvidia multi-process service
+        run: |
+          nvidia-cuda-mps-control -d
      - name: Should run nightly tests
        if: github.event_name == 'schedule'
        run: |
@@ -144,7 +147,7 @@ jobs:
          make test_unsigned_integer_multi_bit_gpu_ci

  slack-notify:
-    name: Slack Notification
+    name: gpu_unsigned_integer_tests/slack-notify
    needs: [ setup-instance, cuda-unsigned-integer-tests ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-unsigned-integer-tests.result != 'skipped' && failure() }}
@@ -166,7 +169,7 @@ jobs:
          SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
-    name: Teardown instance (cuda-tests)
+    name: gpu_unsigned_integer_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-unsigned-integer-tests ]
    runs-on: ubuntu-latest
--- a/.github/workflows/hpu_hlapi_tests.yml
+++ b/.github/workflows/hpu_hlapi_tests.yml
@@ -1,5 +1,5 @@
-# Test tfhe-fft
-name: Cargo Test HLAPI HPU
+# Test HPU backend HLAPI layer
+name: hpu_hlapi_tests

 on:
  pull_request:
@@ -21,6 +21,7 @@ permissions: { }

 jobs:
  should-run:
+    name: hpu_hlapi_tests/should-run
    runs-on: ubuntu-latest
    permissions:
      pull-requests: read
@@ -28,7 +29,7 @@ jobs:
      hpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.hpu_any_changed }}
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -46,11 +47,12 @@ jobs:
              - mockups/tfhe-hpu-mockup/**

  cargo-tests-hpu:
+    name: hpu_hlapi_tests/cargo-tests-hpu (bpr)
    needs: should-run
    if: needs.should-run.outputs.hpu_test == 'true'
    runs-on: large_ubuntu_16
    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}
--- a/.github/workflows/integer_long_run_tests.yml
+++ b/.github/workflows/integer_long_run_tests.yml
@@ -1,4 +1,4 @@
-name: AWS Long Run Tests on CPU
+name: integer_long_run_tests

 env:
  CARGO_TERM_COLOR: always
@@ -23,7 +23,7 @@ permissions: {}

 jobs:
  setup-instance:
-    name: Setup instance (cpu-tests)
+    name: integer_long_run_tests/setup-instance
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
    runs-on: ubuntu-latest
@@ -42,7 +42,7 @@ jobs:
          profile: cpu-big

  cpu-tests:
-    name: Long run CPU tests
+    name: integer_long_run_tests/cpu-tests
    needs: [ setup-instance ]
    concurrency:
      group: ${{ github.workflow_ref }}_${{github.event_name}}
@@ -51,13 +51,13 @@ jobs:
    timeout-minutes: 4320 # 72 hours
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -74,7 +74,7 @@ jobs:
          SLACK_MESSAGE: "CPU long run tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (cpu-tests)
+    name: integer_long_run_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cpu-tests ]
    runs-on: ubuntu-latest
--- a/.github/workflows/m1_tests.yml
+++ b/.github/workflows/m1_tests.yml
@@ -1,4 +1,4 @@
-name: Tests on M1 CPU
+name: m1_tests

 on:
  workflow_dispatch:
@@ -32,6 +32,7 @@ permissions:

 jobs:
  cargo-builds-m1:
+    name: m1_tests/cargo-builds-m1
    if: ${{ (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs') ||
      github.event_name == 'workflow_dispatch' ||
      contains(github.event.label.name, 'm1_test') }}
@@ -40,13 +41,13 @@ jobs:
    timeout-minutes: 720

    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: "false"
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -178,7 +179,7 @@ jobs:
          make test_integer_multi_bit_ci

  remove_label:
-    name: Remove m1_test label
+    name: m1_tests/remove_label
    runs-on: ubuntu-latest
    needs:
      - cargo-builds-m1
--- a/.github/workflows/make_release.yml
+++ b/.github/workflows/make_release.yml
@@ -1,5 +1,5 @@
 # Publish new release of tfhe-rs on various platform.
-name: Publish release
+name: make_release

 on:
  workflow_dispatch:
@@ -36,20 +36,22 @@ env:
 permissions: {}

 jobs:
-  verify_tag:
+  verify-tag:
+    name: make_release/verify-tag
    uses: ./.github/workflows/verify_tagged_commit.yml
    secrets:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

  package:
+    name: make_release/package
    runs-on: ubuntu-latest
-    needs: verify_tag
+    needs: verify-tag
    outputs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -66,6 +68,7 @@ jobs:
        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"

  provenance:
+    name: make_release/provenance
    if: ${{ !inputs.dry_run  }}
    needs: [package]
    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
@@ -81,16 +84,16 @@ jobs:
      base64-subjects: ${{ needs.package.outputs.hash }}

  publish_release:
-    name: Publish Release
+    name: make_release/publish_release
    needs: [package] # for comparing hashes
    runs-on: ubuntu-latest
    # For provenance of npmjs publish
    permissions:
      contents: read
-      id-token: write
+      id-token: write # also needed for OIDC token exchange on crates.io
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -100,20 +103,23 @@ jobs:
        run: |
          echo "NPM_TAG=latest" >> "${GITHUB_ENV}"
      - name: Download artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
        with:
          name: crate
          path: target/package
+      - name: Authenticate on registry
+        uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
+        id: auth
      - name: Publish crate.io package
        if: ${{ inputs.push_to_crates }}
        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          # DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish 
          # would fail. This is safe since DRY_RUN is handled in the env section above.
          # shellcheck disable=SC2086
-          cargo publish -p tfhe --token "${CRATES_TOKEN}" ${DRY_RUN}
+          cargo publish -p tfhe ${DRY_RUN}

      - name: Generate hash
        id: published_hash
--- a/.github/workflows/make_release_cuda.yml
+++ b/.github/workflows/make_release_cuda.yml
@@ -1,4 +1,4 @@
-name: Publish CUDA release
+name: make_release_cuda

 on:
  workflow_dispatch:
@@ -18,15 +18,16 @@ env:
 permissions: {}

 jobs:
-  verify_tag:
+  verify-tag:
+    name: make_release_cuda/verify-tag
    uses: ./.github/workflows/verify_tagged_commit.yml
    secrets:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

  setup-instance:
-    name: Setup instance (publish-cuda-release)
-    needs: verify_tag
+    name: make_release_cuda/setup-instance
+    needs: verify-tag
    runs-on: ubuntu-latest
    outputs:
      runner-name: ${{ steps.start-instance.outputs.label }}
@@ -43,7 +44,7 @@ jobs:
          profile: gpu-build

  package:
-    name: Package CUDA Release for provenance
+    name: make_release_cuda/package
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    outputs:
@@ -60,14 +61,14 @@ jobs:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: "false"
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -104,6 +105,7 @@ jobs:
        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"

  provenance:
+    name: make_release_cuda/provenance
    if: ${{ !inputs.dry_run  }}
    needs: [package]
    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
@@ -119,9 +121,12 @@ jobs:
      base64-subjects: ${{ needs.package.outputs.hash }}

  publish-cuda-release:
-    name: Publish CUDA Release
+    name: make_release_cuda/publish-cuda-release
    needs: [setup-instance, package] # for comparing hashes
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    permissions:
+      # Needed for OIDC token exchange on crates.io
+      id-token: write
    strategy:
      fail-fast: false
      # explicit include-based build matrix, of known valid options
@@ -134,7 +139,7 @@ jobs:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
    steps:
      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

@@ -163,15 +168,19 @@ jobs:
        env:
          GCC_VERSION: ${{ matrix.gcc }}

+      - name: Authenticate on registry
+        uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
+        id: auth
+
      - name: Publish crate.io package
        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          # DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish 
          # would fail. This is safe since DRY_RUN is handled in the env section above.
          # shellcheck disable=SC2086
-          cargo publish -p tfhe-cuda-backend --token "${CRATES_TOKEN}" ${DRY_RUN}
+          cargo publish -p tfhe-cuda-backend ${DRY_RUN}

      - name: Generate hash
        id: published_hash
@@ -194,7 +203,7 @@ jobs:
          SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: Teardown instance (publish-release)
+    name: make_release_cuda/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [setup-instance, publish-cuda-release]
    runs-on: ubuntu-latest
--- a/.github/workflows/make_release_hpu.yml
+++ b/.github/workflows/make_release_hpu.yml
@@ -1,4 +1,4 @@
-name: Publish HPU release
+name: make_release_hpu

 on:
  workflow_dispatch:
@@ -18,20 +18,22 @@ env:
 permissions: {}

 jobs:
-  verify_tag:
+  verify-tag:
+    name: make_release_hpu/verify-tag
    uses: ./.github/workflows/verify_tagged_commit.yml
    secrets:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

  package:
+    name: make_release_hpu/package
    runs-on: ubuntu-latest
-    needs: verify_tag
+    needs: verify-tag
    outputs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -48,6 +50,7 @@ jobs:
        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"

  provenance:
+    name: make_release_hpu/provenance
    if: ${{ !inputs.dry_run  }}
    needs: [package]
    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
@@ -63,26 +66,33 @@ jobs:
      base64-subjects: ${{ needs.package.outputs.hash }}

  publish_release:
-    name: Publish tfhe-hpu-backend Release
+    name: make_release_hpu/publish-release
    runs-on: ubuntu-latest
-    needs: [verify_tag, package] # for comparing hashes
+    needs: [verify-tag, package] # for comparing hashes
+    permissions:
+      # Needed for OIDC token exchange on crates.io
+      id-token: write
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

+      - name: Authenticate on registry
+        uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
+        id: auth
+
      - name: Publish crate.io package
        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          # DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish 
          # would fail. This is safe since DRY_RUN is handled in the env section above.
          # shellcheck disable=SC2086
-          cargo publish -p tfhe-hpu-backend --token "${CRATES_TOKEN}" ${DRY_RUN}
+          cargo publish -p tfhe-hpu-backend ${DRY_RUN}

      - name: Generate hash
        id: published_hash
--- a/.github/workflows/make_release_tfhe_csprng.yml
+++ b/.github/workflows/make_release_tfhe_csprng.yml
@@ -1,4 +1,4 @@
-name: Publish tfhe-csprng release
+name: make_release_tfhe_csprng

 on:
  workflow_dispatch:
@@ -18,19 +18,21 @@ env:
 permissions: {}

 jobs:
-  verify_tag:
+  verify-tag:
+    name: make_release_tfhe_csprng/verify-tag
    uses: ./.github/workflows/verify_tagged_commit.yml
    secrets:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

  package:
+    name: make_release_tfhe_csprng/package
    runs-on: ubuntu-latest
    outputs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -48,6 +50,7 @@ jobs:


  provenance:
+    name: make_release_tfhe_csprng/provenance
    if: ${{ !inputs.dry_run  }}
    needs: [package]
    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
@@ -64,30 +67,36 @@ jobs:


  publish_release:
-    name: Publish tfhe-csprng Release
-    needs: [verify_tag, package]
+    name: make_release_tfhe_csprng/publish-release
+    needs: [verify-tag, package]
    runs-on: ubuntu-latest
+    permissions:
+      # Needed for OIDC token exchange on crates.io
+      id-token: write
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
      - name: Download artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
        with:
          name: crate-tfhe-csprng
          path: target/package
+      - name: Authenticate on registry
+        uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
+        id: auth
      - name: Publish crate.io package
        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          # DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish 
          # would fail. This is safe since DRY_RUN is handled in the env section above.
          # shellcheck disable=SC2086
-          cargo publish -p tfhe-csprng --token "${CRATES_TOKEN}" ${DRY_RUN}
+          cargo publish -p tfhe-csprng ${DRY_RUN}
      - name: Generate hash
        id: published_hash
        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
--- a/.github/workflows/make_release_tfhe_fft.yml
+++ b/.github/workflows/make_release_tfhe_fft.yml
@@ -1,5 +1,5 @@
 # Publish new release of tfhe-fft
-name: Publish tfhe-fft release
+name: make_release_tfhe_fft

 on:
  workflow_dispatch:
@@ -19,20 +19,22 @@ env:
 permissions: {}

 jobs:
-  verify_tag:
+  verify-tag:
+    name: make_release_tfhe_fft/verify-tag
    uses: ./.github/workflows/verify_tagged_commit.yml
    secrets:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

  package:
+    name: make_release_tfhe_fft/package
    runs-on: ubuntu-latest
-    needs: verify_tag
+    needs: verify-tag
    outputs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -49,6 +51,7 @@ jobs:
        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"

  provenance:
+    name: make_release_tfhe_fft/provenance
    if: ${{ !inputs.dry_run  }}
    needs: [package]
    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
@@ -64,26 +67,33 @@ jobs:
      base64-subjects: ${{ needs.package.outputs.hash }}

  publish_release:
-    name: Publish tfhe-fft Release
+    name: make_release_tfhe_fft/publish-release
    runs-on: ubuntu-latest
-    needs: [verify_tag, package] # for comparing hashes
+    needs: [verify-tag, package] # for comparing hashes
+    permissions:
+      # Needed for OIDC token exchange on crates.io
+      id-token: write
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

+      - name: Authenticate on registry
+        uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
+        id: auth
+
      - name: Publish crate.io package
        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          # DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish 
          # would fail. This is safe since DRY_RUN is handled in the env section above.
          # shellcheck disable=SC2086
-          cargo publish -p tfhe-fft --token "${CRATES_TOKEN}" ${DRY_RUN}
+          cargo publish -p tfhe-fft ${DRY_RUN}

      - name: Generate hash
        id: published_hash
--- a/.github/workflows/make_release_tfhe_ntt.yml
+++ b/.github/workflows/make_release_tfhe_ntt.yml
@@ -1,5 +1,5 @@
 # Publish new release of tfhe-ntt
-name: Publish tfhe-ntt release
+name: make_release_tfhe_ntt

 on:
  workflow_dispatch:
@@ -19,20 +19,22 @@ env:
 permissions: {}

 jobs:
-  verify_tag:
+  verify-tag:
+    name: make_release_tfhe_ntt/verify-tag
    uses: ./.github/workflows/verify_tagged_commit.yml
    secrets:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

  package:
+    name: make_release_tfhe_ntt/package
    runs-on: ubuntu-latest
-    needs: verify_tag
+    needs: verify-tag
    outputs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -49,6 +51,7 @@ jobs:
        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"

  provenance:
+    name: make_release_tfhe_ntt/provenance
    if: ${{ !inputs.dry_run  }}
    needs: [package]
    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
@@ -64,26 +67,33 @@ jobs:
      base64-subjects: ${{ needs.package.outputs.hash }}

  publish_release:
-    name: Publish tfhe-ntt Release
+    name: make_release_tfhe_ntt/publish-release
    runs-on: ubuntu-latest
-    needs: [verify_tag, package] # for comparing hashes
+    needs: [verify-tag, package] # for comparing hashes
+    permissions:
+      # Needed for OIDC token exchange on crates.io
+      id-token: write
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

+      - name: Authenticate on registry
+        uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
+        id: auth
+
      - name: Publish crate.io package
        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          # DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish 
          # would fail. This is safe since DRY_RUN is handled in the env section above.
          # shellcheck disable=SC2086
-          cargo publish -p tfhe-ntt --token "${CRATES_TOKEN}" ${DRY_RUN}
+          cargo publish -p tfhe-ntt ${DRY_RUN}

      - name: Generate hash
        id: published_hash
--- a/.github/workflows/make_release_tfhe_versionable.yml
+++ b/.github/workflows/make_release_tfhe_versionable.yml
@@ -1,4 +1,4 @@
-name: Publish tfhe-versionable release
+name: make_release_tfhe_versionable

 on:
  workflow_dispatch:
@@ -13,20 +13,21 @@ env:
 permissions: {}

 jobs:
-  verify_tag:
+  verify-tag:
+    name: make_release_tfhe_versionable/verify-tag
    uses: ./.github/workflows/verify_tagged_commit.yml
    secrets:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

  package-derive:
-    name: Package tfhe-versionable-derive Release
+    name: make_release_tfhe_versionable/package-derive
    runs-on: ubuntu-latest
    outputs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -43,6 +44,7 @@ jobs:
        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"

  provenance-derive:
+    name: make_release_tfhe_versionable/provenance-derive
    needs: [package-derive]
    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
    permissions:
@@ -57,26 +59,32 @@ jobs:
      base64-subjects: ${{ needs.package-derive.outputs.hash }}

  publish_release-derive:
-    name: Publish tfhe-versionable-derive Release
-    needs: [ verify_tag, package-derive ] # for comparing hashes
+    name: make_release_tfhe_versionable/publish_release_derive
+    needs: [ verify-tag, package-derive ] # for comparing hashes
    runs-on: ubuntu-latest
+    permissions:
+      # Needed for OIDC token exchange on crates.io
+      id-token: write
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
      - name: Download artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
        with:
          name: crate-tfhe-versionable-derive
          path: target/package
+      - name: Authenticate on registry
+        uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
+        id: auth
      - name: Publish crate.io package
        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
        run: |
-          cargo publish -p tfhe-versionable-derive --token "${CRATES_TOKEN}"
+          cargo publish -p tfhe-versionable-derive
      - name: Generate hash
        id: published_hash
        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
@@ -96,14 +104,14 @@ jobs:
          SLACK_MESSAGE: "tfhe-versionable-derive release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  package:
-    name: Package tfhe-versionable Release
+    name: make_release_tfhe_versionable/package
    needs: publish_release-derive
    runs-on: ubuntu-latest
    outputs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
@@ -120,6 +128,7 @@ jobs:
        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"

  provenance:
+    name: make_release_tfhe_versionable/provenance
    needs: package
    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
    permissions:
@@ -134,26 +143,29 @@ jobs:
      base64-subjects: ${{ needs.package.outputs.hash }}

  publish_release:
-    name: Publish tfhe-versionable Release
+    name: make_release_tfhe_versionable/publish-release
    needs: package # for comparing hashes
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
      - name: Download artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
        with:
          name: crate-tfhe-versionable
          path: target/package
+      - name: Authenticate on registry
+        uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
+        id: auth
      - name: Publish crate.io package
        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
        run: |
-          cargo publish -p tfhe-versionable --token "${CRATES_TOKEN}"
+          cargo publish -p tfhe-versionable
      - name: Generate hash
        id: published_hash
        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
--- a/.github/workflows/make_release_zk_pok.yml
+++ b/.github/workflows/make_release_zk_pok.yml
@@ -1,4 +1,4 @@
-name: Publish tfhe-zk-pok release
+name: make_release_zk_pok

 on:
  workflow_dispatch:
@@ -15,33 +15,43 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

-permissions: {}
+permissions: { }

 jobs:
+  verify-tag:
+    name: make_release_zk_pok/verify-tag
+    uses: ./.github/workflows/verify_tagged_commit.yml
+    secrets:
+      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
+      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
+
  package:
-      runs-on: ubuntu-latest
-      outputs:
-        hash: ${{ steps.hash.outputs.hash }}
-      steps:
-        - name: Checkout
-          uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-          with:
-            fetch-depth: 0
-            persist-credentials: 'false'
-            token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-        - name: Prepare package
-          run: |
-            cargo package -p tfhe-zk-pok
-        - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
-          with:
-            name: crate-zk-pok
-            path: target/package/*.crate
-        - name: generate hash
-          id: hash
-          run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+    name: make_release_zk_pok/package
+    runs-on: ubuntu-latest
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-zk-pok
+      - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: crate-zk-pok
+          path: target/package/*.crate
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
  provenance:
+    name: make_release_zk_pok/provenance
    if: ${{ !inputs.dry_run  }}
-    needs: [package]
+    needs: [ package ]
    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
    permissions:
      # Needed to detect the GitHub Actions environment
@@ -54,37 +64,37 @@ jobs:
      # SHA-256 hashes of the Crate package.
      base64-subjects: ${{ needs.package.outputs.hash }}

-  verify_tag:
-    uses: ./.github/workflows/verify_tagged_commit.yml
-    secrets:
-      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
-      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
-
  publish_release:
-    name: Publish tfhe-zk-pok Release
-    needs: [verify_tag, package] # for comparing hashes
+    name: make_release_zk_pok/publish-release
+    needs: [ verify-tag, package ] # for comparing hashes
    runs-on: ubuntu-latest
+    permissions:
+      # Needed for OIDC token exchange on crates.io
+      id-token: write
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
        with:
          fetch-depth: 0
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
      - name: Download artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
        with:
          name: crate-zk-pok
          path: target/package
+      - name: Authenticate on registry
+        uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
+        id: auth
      - name: Publish crate.io package
        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          # DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish 
          # would fail. This is safe since DRY_RUN is handled in the env section above.
          # shellcheck disable=SC2086
-          cargo publish -p tfhe-zk-pok --token "${CRATES_TOKEN}" ${DRY_RUN}
+          cargo publish -p tfhe-zk-pok ${DRY_RUN}
      - name: Verify hash
        id: published_hash
        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -1,5 +1,5 @@
 # Perform a security check on all the cryptographic parameters set
-name: Parameters curves security check
+name: parameters_check

 env:
  CARGO_TERM_COLOR: always
@@ -16,20 +16,21 @@ permissions: {}

 jobs:
  params-curves-security-check:
+    name: parameters_check/params-curves-security-check
    runs-on: large_ubuntu_16-22.04
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Checkout lattice-estimator
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: malb/lattice-estimator
          path: lattice_estimator
-          ref: 'e80ec6bbbba212428b0e92d0467c18629cf9ed67'
+          ref: 'e35f45b7976a90a79c3c6625a45bbc344c1abc67'
          persist-credentials: 'false'

      - name: Install Sage
--- a/.github/workflows/placeholder_workflow.yml
+++ b/.github/workflows/placeholder_workflow.yml
@@ -1,5 +1,5 @@
 # Placeholder workflow file allowing running it without having to merge to main first
-name: Placeholder Workflow
+name: placeholder_workflow

 on:
  workflow_dispatch:
@@ -8,7 +8,7 @@ permissions: {}

 jobs:
  placeholder:
-    name: Placeholder
+    name: placeholder_workflow/placeholder
    runs-on: ubuntu-latest

    steps:
--- a/.github/workflows/sync_on_push.yml
+++ b/.github/workflows/sync_on_push.yml
@@ -1,5 +1,5 @@
 # Sync repos
-name: Sync repos
+name: sync_on_push

 on:
  push:
@@ -11,11 +11,12 @@ permissions: {}

 jobs:
  sync-repo:
+    name: sync_on_push/sync-repo
    if: ${{ github.repository == 'zama-ai/tfhe-rs' }}
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
--- a/.github/workflows/unverified_prs.yml
+++ b/.github/workflows/unverified_prs.yml
@@ -1,4 +1,5 @@
-name: 'Close unverified PRs'
+# Close unverified PRs'
+name: unverified_prs
 on:
  schedule:
    - cron: '30 1 * * *'
@@ -7,12 +8,13 @@ permissions: {}

 jobs:
  stale:
+    name: unverified_prs/stale
    runs-on: ubuntu-latest
    permissions:
      issues: read
      pull-requests: write
    steps:
-      - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
+      - uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
        with:
          stale-pr-message: 'This PR is unverified and has been open for 2 days, it will now be closed. If you want to contribute please sign the CLA as indicated by the bot.'
          days-before-stale: 2
--- a/.github/workflows/verify_tagged_commit.yml
+++ b/.github/workflows/verify_tagged_commit.yml
@@ -1,5 +1,5 @@
 # Verify a tagged commit
-name: Verify tagged commit
+name: verify_tagged_commit

 on:
  workflow_call:
@@ -13,6 +13,7 @@ permissions: {}

 jobs:
  checks:
+    name: verify_tagged_commit/checks
    runs-on: ubuntu-latest
    if: startsWith(github.ref, 'refs/tags/')
    steps:
--- a/.gitignore
+++ b/.gitignore
@@ -36,9 +36,6 @@ package-lock.json
 .env
 __pycache__

-# Dir used for backward compatibility test data
-# First directive is to ignore symlinks
-tests/tfhe-backward-compat-data
 ci/

 # In case someone clones the lattice-estimator locally to verify security
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,9 +30,10 @@ itertools = "0.14"
 num-complex = "0.4"
 pulp = { version = "0.21", default-features = false }
 rand = "0.8"
-rayon = "1"
+rayon = "1.11"
 serde = { version = "1.0", default-features = false }
-wasm-bindgen = "0.2.100"
+wasm-bindgen = "0.2.101"
+getrandom = "0.2.8"

 [profile.bench]
 lto = "fat"
--- a/277
+++ b/277
@@ -23,7 +23,6 @@ BENCH_PARAM_TYPE?=classical
 BENCH_PARAMS_SET?=default
 NODE_VERSION=22.6
 BACKWARD_COMPAT_DATA_DIR=utils/tfhe-backward-compat-data
-TFHE_SPEC:=tfhe
 WASM_PACK_VERSION="0.13.1"
 # We are kind of hacking the cut here, the version cannot contain a quote '"'
 WASM_BINDGEN_VERSION:=$(shell grep '^wasm-bindgen[[:space:]]*=' Cargo.toml | cut -d '"' -f 2 | xargs)
@@ -97,6 +96,12 @@ install_rs_build_toolchain:
 	( echo "Unable to install $(RS_BUILD_TOOLCHAIN) toolchain, check your rustup installation. \
 	Rustup can be downloaded at https://rustup.rs/" && exit 1 )

+.PHONY: install_rs_msrv_toolchain # Install the msrv toolchain
+install_rs_msrv_toolchain:
+	@rustup toolchain install --profile default "$(MIN_RUST_VERSION)" || \
+	( echo "Unable to install $(MIN_RUST_VERSION) toolchain, check your rustup installation. \
+	Rustup can be downloaded at https://rustup.rs/" && exit 1 )
+
 .PHONY: install_build_wasm32_target # Install the wasm32 toolchain used for builds
 install_build_wasm32_target: install_rs_build_toolchain
 	rustup +$(RS_BUILD_TOOLCHAIN) target add wasm32-unknown-unknown || \
@@ -303,28 +308,28 @@ clippy_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats,extended-types,zk-pok \
 		--all-targets \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings

 .PHONY: check_gpu # Run check on tfhe with "gpu" enabled
 check_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
 		--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
 		--all-targets \
-		-p $(TFHE_SPEC)
+		-p tfhe

 .PHONY: clippy_hpu # Run clippy lints on tfhe with "hpu" enabled
 clippy_hpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=boolean,shortint,integer,internal-keycache,hpu,pbs-stats,extended-types \
 		--all-targets \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings

 .PHONY: clippy_gpu_hpu # Run clippy lints on tfhe with "gpu" and "hpu" enabled
 clippy_gpu_hpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=boolean,shortint,integer,internal-keycache,gpu,hpu,pbs-stats,extended-types,zk-pok \
 		--all-targets \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings

 .PHONY: fix_newline # Fix newline at end of file issues to be UNIX compliant
 fix_newline: check_linelint_installed
@@ -345,55 +350,55 @@ check_workflow_security: install_zizmor
 .PHONY: clippy_core # Run clippy lints on core_crypto with and without experimental features
 clippy_core: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=experimental \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=nightly-avx512 \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=experimental,nightly-avx512 \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=zk-pok \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings

 .PHONY: clippy_boolean # Run clippy lints enabling the boolean features
 clippy_boolean: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=boolean \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings

 .PHONY: clippy_shortint # Run clippy lints enabling the shortint features
 clippy_shortint: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=shortint \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=shortint,experimental \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=zk-pok,shortint \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings

 .PHONY: clippy_integer # Run clippy lints enabling the integer features
 clippy_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=integer \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=integer,experimental \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=integer,experimental,extended-types \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings

 .PHONY: clippy # Run clippy lints enabling the boolean, shortint, integer
 clippy: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		--features=boolean,shortint,integer \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings

 .PHONY: clippy_rustdoc # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
 clippy_rustdoc: install_rs_check_toolchain
@@ -404,7 +409,7 @@ clippy_rustdoc: install_rs_check_toolchain
 	CARGO_TERM_QUIET=true CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
 		cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
 		--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental \
-		-p $(TFHE_SPEC)
+		-p tfhe

 .PHONY: clippy_rustdoc_gpu # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
 clippy_rustdoc_gpu: install_rs_check_toolchain
@@ -415,22 +420,22 @@ clippy_rustdoc_gpu: install_rs_check_toolchain
 	CARGO_TERM_QUIET=true CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
 		cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
 		--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu \
-		-p $(TFHE_SPEC)
+		-p tfhe

 .PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
 clippy_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=boolean-c-api,shortint-c-api,high-level-c-api,extended-types \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings

 .PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
 clippy_js_wasm_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,zk-pok,extended-types \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,extended-types \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings

 .PHONY: clippy_tasks # Run clippy lints on helper tasks crate.
 clippy_tasks: install_rs_check_toolchain
@@ -451,15 +456,17 @@ clippy_ws_tests: install_rs_check_toolchain
 clippy_all_targets: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats,extended-types \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats,extended-types,experimental \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
+		-p tfhe -- --no-deps -D warnings

 .PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
 clippy_tfhe_csprng: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		--features=parallel,software-prng -p tfhe-csprng -- --no-deps -D warnings
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
+		--features=parallel -p tfhe-csprng -- --no-deps -D warnings

 .PHONY: clippy_zk_pok # Run clippy lints on tfhe-zk-pok
 clippy_zk_pok: install_rs_check_toolchain
@@ -542,63 +549,72 @@ tfhe_lints: install_cargo_dylint
 .PHONY: build_core # Build core_crypto without experimental features
 build_core: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		-p $(TFHE_SPEC)
+		-p tfhe
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-			--features=nightly-avx512 -p $(TFHE_SPEC); \
+			--features=nightly-avx512 -p tfhe; \
 	fi

 .PHONY: build_core_experimental # Build core_crypto with experimental features
 build_core_experimental: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=experimental -p $(TFHE_SPEC)
+		--features=experimental -p tfhe
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-			--features=experimental,nightly-avx512 -p $(TFHE_SPEC); \
+			--features=experimental,nightly-avx512 -p tfhe; \
 	fi

 .PHONY: build_boolean # Build with boolean enabled
 build_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=boolean -p $(TFHE_SPEC) --all-targets
+		--features=boolean -p tfhe --all-targets

 .PHONY: build_shortint # Build with shortint enabled
 build_shortint: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=shortint -p $(TFHE_SPEC) --all-targets
+		--features=shortint -p tfhe --all-targets

 .PHONY: build_integer # Build with integer enabled
 build_integer: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=integer -p $(TFHE_SPEC) --all-targets
+		--features=integer -p tfhe --all-targets

 .PHONY: build_tfhe_full # Build with boolean, shortint and integer enabled
 build_tfhe_full: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
+		--features=boolean,shortint,integer -p tfhe --all-targets

 .PHONY: build_tfhe_coverage # Build with test coverage enabled
 build_tfhe_coverage: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
+		--features=boolean,shortint,integer,internal-keycache -p tfhe --tests
+
+# As of 05/08/2025 this is the set of features that can be easily compiled without additional
+# toolkits
+.PHONY: build_tfhe_msrv # Build with msrv compiler
+build_tfhe_msrv: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo +$(MIN_RUST_VERSION) build --profile dev \
+		--features=boolean,extended-types,hpu,hpu-debug \
+		--features=hpu-v80,integer,noise-asserts \
+		--features=pbs-stats,shortint,strings,zk-pok -p tfhe

 .PHONY: build_c_api # Build the C API for boolean, shortint and integer
 build_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
 		--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types \
-		-p $(TFHE_SPEC)
+		-p tfhe

 .PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
 build_c_api_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
 		--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types,gpu \
-		-p $(TFHE_SPEC)
+		-p tfhe

 .PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
 build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
 		--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
-		-p $(TFHE_SPEC)
+		-p tfhe

 .PHONY: build_web_js_api # Build the js API targeting the web browser
 build_web_js_api: install_rs_build_toolchain install_wasm_pack
@@ -633,10 +649,10 @@ build_tfhe_csprng: install_rs_build_toolchain
 .PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
 test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
+		--features=experimental,zk-pok -p tfhe -- core_crypto::
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-			--features=experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
+			--features=experimental,zk-pok,nightly-avx512 -p tfhe -- core_crypto::; \
 	fi

 .PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
@@ -645,13 +661,13 @@ test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain inst
 		--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
 		--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
 		--features=experimental,internal-keycache \
-		-p $(TFHE_SPEC) -- core_crypto::
+		-p tfhe -- core_crypto::
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 			--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
 			--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
 			--features=experimental,internal-keycache,nightly-avx512 \
-			-p $(TFHE_SPEC) -- -Z unstable-options --report-time core_crypto::; \
+			-p tfhe -- -Z unstable-options --report-time core_crypto::; \
 	fi

 .PHONY: test_cuda_backend # Run the internal tests of the CUDA backend
@@ -668,23 +684,47 @@ test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
 .PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
 test_core_crypto_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
+		--features=gpu -p tfhe -- core_crypto::gpu::
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
+		--features=gpu -p tfhe -- core_crypto::gpu::

 .PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
 test_integer_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=4
+		--features=integer,gpu -p tfhe -- integer::gpu::server_key:: --test-threads=2
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
+		--features=integer,gpu -p tfhe -- integer::gpu::server_key:: --test-threads=4

 .PHONY: test_integer_gpu_debug # Run the tests of the integer module with Debug flags for CUDA
 test_integer_gpu_debug: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile release_lto_off \
-		--features=integer,gpu-debug -vv -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=1 --nocapture
+		--features=integer,gpu-debug -vv -p tfhe -- integer::gpu::server_key:: --test-threads=1 --nocapture
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile release_lto_off \
-		--features=integer,gpu-debug -p $(TFHE_SPEC) -- integer::gpu::server_key::
+		--features=integer,gpu-debug -p tfhe -- integer::gpu::server_key::
+
+.PHONY: test_high_level_api_gpu_valgrind # Run the tests of the integer module with Debug flags for CUDA
+test_high_level_api_gpu_valgrind: install_rs_build_toolchain install_cargo_nextest
+	export RUSTFLAGS="-C target-cpu=x86-64" && \
+	export CARGO_RS_BUILD_TOOLCHAIN="$(CARGO_RS_BUILD_TOOLCHAIN)" && \
+	export TFHE_SPEC="tfhe" && \
+	export CARGO_PROFILE="$(CARGO_PROFILE)" &&	scripts/check_memory_errors.sh --cpu
+
+.PHONY: test_high_level_api_gpu_sanitizer # Run the tests of the integer module with Debug flags for CUDA
+test_high_level_api_gpu_sanitizer: install_rs_build_toolchain install_cargo_nextest
+	export RUSTFLAGS="-C target-cpu=x86-64" && \
+	export CARGO_RS_BUILD_TOOLCHAIN="$(CARGO_RS_BUILD_TOOLCHAIN)" && \
+	export TFHE_SPEC="tfhe" && \
+	export CARGO_PROFILE="$(CARGO_PROFILE)" &&	scripts/check_memory_errors.sh --gpu
+
+.PHONY: test_integer_hl_test_gpu_check_warnings
+test_integer_hl_test_gpu_check_warnings: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build \
+		--features=integer,internal-keycache,gpu-debug,zk-pok -vv -p tfhe &> /tmp/gpu_compile_output
+	WARNINGS=$$(cat /tmp/gpu_compile_output | grep ": warning #" | grep "\[tfhe-cuda-backend" | grep -v "inline qualifier" || true) && \
+	if [[ "$${WARNINGS}" != "" ]]; then \
+	    echo "FAILING BECAUSE CUDA COMPILATION WARNINGS WERE DETECTED: " && \
+		echo "$${WARNINGS}" && exit 1; \
+	fi


 .PHONY: test_integer_long_run_gpu # Run the long run integer tests on the gpu backend
@@ -693,21 +733,27 @@ test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
 	LONG_TESTS=TRUE \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
-		--tfhe-package "$(TFHE_SPEC)" --backend "gpu"
+		--tfhe-package "tfhe" --backend "gpu"
+
+.PHONY: test_integer_short_run_gpu # Run the long run integer tests on the gpu backend
+test_integer_short_run_gpu: install_rs_check_toolchain install_cargo_nextest
+	TFHE_RS_TEST_LONG_TESTS_MINIMAL=TRUE \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=integer,gpu -p tfhe -- integer::gpu::server_key::radix::tests_long_run::test_random_op_sequence integer::gpu::server_key::radix::tests_long_run::test_signed_random_op_sequence --test-threads=1 --nocapture

 .PHONY: test_integer_compression
 test_integer_compression: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
+		--features=integer -p tfhe -- integer::ciphertext::compressed_ciphertext_list::tests::
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
+		--features=integer -p tfhe -- integer::ciphertext::compress

 .PHONY: test_integer_compression_gpu
 test_integer_compression_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
+		--features=integer,gpu -p tfhe -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
+		--features=integer,gpu -p tfhe -- integer::gpu::ciphertext::compress

 .PHONY: test_integer_gpu_ci # Run the tests for integer ci on gpu backend
 test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -716,7 +762,7 @@ test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
-		--tfhe-package "$(TFHE_SPEC)"
+		--tfhe-package "tfhe"

 .PHONY: test_unsigned_integer_gpu_ci # Run the tests for unsigned integer ci on gpu backend
 test_unsigned_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -725,7 +771,7 @@ test_unsigned_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
-		--unsigned-only --tfhe-package "$(TFHE_SPEC)"
+		--unsigned-only --tfhe-package "tfhe"

 .PHONY: test_signed_integer_gpu_ci # Run the tests for signed integer ci on gpu backend
 test_signed_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -734,7 +780,7 @@ test_signed_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
-		--signed-only --tfhe-package "$(TFHE_SPEC)"
+		--signed-only --tfhe-package "tfhe"

 .PHONY: test_integer_multi_bit_gpu_ci # Run the tests for integer ci on gpu backend running only multibit tests
 test_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -743,7 +789,7 @@ test_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
-		--tfhe-package "$(TFHE_SPEC)"
+		--tfhe-package "tfhe"

 .PHONY: test_unsigned_integer_multi_bit_gpu_ci # Run the tests for unsigned integer ci on gpu backend running only multibit tests
 test_unsigned_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -752,7 +798,7 @@ test_unsigned_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
-		--unsigned-only --tfhe-package "$(TFHE_SPEC)"
+		--unsigned-only --tfhe-package "tfhe"

 .PHONY: test_signed_integer_multi_bit_gpu_ci # Run the tests for signed integer ci on gpu backend running only multibit tests
 test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -761,34 +807,34 @@ test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_n
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
-		--signed-only --tfhe-package "$(TFHE_SPEC)"
+		--signed-only --tfhe-package "tfhe"

 .PHONY: test_integer_hpu_ci # Run the tests for integer ci on hpu backend
 test_integer_hpu_ci: install_rs_check_toolchain install_cargo_nextest
-	cargo test --release -p $(TFHE_SPEC) --features hpu-v80 --test hpu
+	cargo test --release -p tfhe --features hpu-v80 --test hpu

 .PHONY: test_integer_hpu_mockup_ci # Run the tests for integer ci on hpu backend and mockup
 test_integer_hpu_mockup_ci: install_rs_check_toolchain install_cargo_nextest
 	source ./setup_hpu.sh --config sim ; \
 	cargo build --release --bin hpu_mockup; \
-    coproc target/release/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_pfail64_psi64.toml > mockup.log; \
+	coproc target/release/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_pfail64_psi64.toml > mockup.log; \
 	HPU_TEST_ITER=1 \
-	cargo test --profile devo -p $(TFHE_SPEC) --features hpu --test hpu -- u32 && \
+	cargo test --profile devo -p tfhe --features hpu --test hpu -- u32 && \
 	kill %1

 .PHONY: test_integer_hpu_mockup_ci_fast # Run the quick tests for integer ci on hpu backend and mockup.
 test_integer_hpu_mockup_ci_fast: install_rs_check_toolchain install_cargo_nextest
 	source ./setup_hpu.sh --config sim ; \
 	cargo build --profile devo --bin hpu_mockup; \
-    coproc target/devo/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_fast.toml > mockup.log; \
+	coproc target/devo/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_fast.toml > mockup.log; \
 	HPU_TEST_ITER=1 \
-	cargo test --profile devo -p $(TFHE_SPEC) --features hpu --test hpu -- u32 && \
+	cargo test --profile devo -p tfhe --features hpu --test hpu -- u32 && \
 	kill %1

 .PHONY: test_boolean # Run the tests of the boolean module
 test_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=boolean -p $(TFHE_SPEC) -- boolean::
+		--features=boolean -p tfhe -- boolean::

 .PHONY: test_boolean_cov # Run the tests of the boolean module with code coverage
 test_boolean_cov: install_rs_check_toolchain install_tarpaulin
@@ -796,13 +842,13 @@ test_boolean_cov: install_rs_check_toolchain install_tarpaulin
 		--out xml --output-dir coverage/boolean --line --engine llvm --timeout 500 \
 		$(COVERAGE_EXCLUDED_FILES) \
 		--features=boolean,internal-keycache \
-		-p $(TFHE_SPEC) -- -Z unstable-options --report-time boolean::
+		-p tfhe -- -Z unstable-options --report-time boolean::

 .PHONY: test_c_api_rs # Run the rust tests for the C API
 test_c_api_rs: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
 		--features=boolean-c-api,shortint-c-api,high-level-c-api \
-		-p $(TFHE_SPEC) \
+		-p tfhe \
 		c_api

 .PHONY: test_c_api_c # Run the C tests for the C API
@@ -821,19 +867,19 @@ test_shortint_ci: install_rs_build_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)" --tfhe-package "$(TFHE_SPEC)"
+		--cargo-profile "$(CARGO_PROFILE)" --tfhe-package "tfhe"

 .PHONY: test_shortint_multi_bit_ci # Run the tests for shortint ci running only multibit tests
 test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --tfhe-package "$(TFHE_SPEC)"
+		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --tfhe-package "tfhe"

 .PHONY: test_shortint # Run all the tests for shortint
 test_shortint: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
+		--features=shortint,internal-keycache -p tfhe -- shortint::

 .PHONY: test_shortint_cov # Run the tests of the shortint module with code coverage
 test_shortint_cov: install_rs_check_toolchain install_tarpaulin
@@ -841,7 +887,7 @@ test_shortint_cov: install_rs_check_toolchain install_tarpaulin
 		--out xml --output-dir coverage/shortint --line --engine llvm --timeout 500 \
 		$(COVERAGE_EXCLUDED_FILES) \
 		--features=shortint,internal-keycache \
-		-p $(TFHE_SPEC) -- -Z unstable-options --report-time shortint::
+		-p tfhe -- -Z unstable-options --report-time shortint::

 .PHONY: test_integer_ci # Run the tests for integer ci
 test_integer_ci: install_rs_check_toolchain install_cargo_nextest
@@ -850,7 +896,7 @@ test_integer_ci: install_rs_check_toolchain install_cargo_nextest
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
-		--tfhe-package "$(TFHE_SPEC)"
+		--tfhe-package "tfhe"

 .PHONY: test_unsigned_integer_ci # Run the tests for unsigned integer ci
 test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
@@ -859,7 +905,7 @@ test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
-		--unsigned-only --tfhe-package "$(TFHE_SPEC)"
+		--unsigned-only --tfhe-package "tfhe"

 .PHONY: test_signed_integer_ci # Run the tests for signed integer ci
 test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
@@ -868,7 +914,7 @@ test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
-		--signed-only --tfhe-package "$(TFHE_SPEC)"
+		--signed-only --tfhe-package "tfhe"

 .PHONY: test_integer_multi_bit_ci # Run the tests for integer ci running only multibit tests
 test_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
@@ -877,7 +923,7 @@ test_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
-		--tfhe-package "$(TFHE_SPEC)"
+		--tfhe-package "tfhe"

 .PHONY: test_unsigned_integer_multi_bit_ci # Run the tests for unsigned integer ci running only multibit tests
 test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
@@ -886,7 +932,7 @@ test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nex
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
-		--unsigned-only --tfhe-package "$(TFHE_SPEC)"
+		--unsigned-only --tfhe-package "tfhe"

 .PHONY: test_signed_integer_multi_bit_ci # Run the tests for signed integer ci running only multibit tests
 test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
@@ -895,7 +941,7 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
 	NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
-		--signed-only --tfhe-package "$(TFHE_SPEC)"
+		--signed-only --tfhe-package "tfhe"

 .PHONY: test_integer_long_run # Run the long run integer tests
 test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
@@ -903,22 +949,31 @@ test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
 	LONG_TESTS=TRUE \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
-		--tfhe-package "$(TFHE_SPEC)"
+		--tfhe-package "tfhe"
+
+.PHONY: test_noise_check # Run dedicated noise and pfail check tests
+test_noise_check: install_rs_check_toolchain
+	@# First run the sanity checks to make sure the atomic patterns are correct
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=boolean,shortint,integer,nightly-avx512 -p tfhe -- sanity_check
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=boolean,shortint,integer,nightly-avx512 -p tfhe -- noise_check \
+		--test-threads=1 --nocapture

 .PHONY: test_safe_serialization # Run the tests for safe serialization
 test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
+		--features=boolean,shortint,integer,internal-keycache -p tfhe -- safe_serialization::

 .PHONY: test_zk # Run the tests for the zk module of the TFHE-rs crate
 test_zk: install_rs_build_toolchain install_cargo_nextest
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=shortint,zk-pok -p $(TFHE_SPEC) -- zk::
+		--features=shortint,zk-pok -p tfhe -- zk::

 .PHONY: test_integer # Run all the tests for integer
 test_integer: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=integer,internal-keycache -p $(TFHE_SPEC) -- integer::
+		--features=integer,internal-keycache -p tfhe -- integer::

 .PHONY: test_integer_cov # Run the tests of the integer module with code coverage
 test_integer_cov: install_rs_check_toolchain install_tarpaulin
@@ -927,17 +982,17 @@ test_integer_cov: install_rs_check_toolchain install_tarpaulin
 		--implicit-test-threads \
 		--exclude-files $(COVERAGE_EXCLUDED_FILES) \
 		--features=integer,internal-keycache \
-		-p $(TFHE_SPEC) -- -Z unstable-options --report-time integer::
+		-p tfhe -- -Z unstable-options --report-time integer::

 .PHONY: test_high_level_api # Run all the tests for high_level_api
 test_high_level_api: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings -p $(TFHE_SPEC) \
+		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings -p tfhe \
 		-- high_level_api::

 test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
-		--test-threads=4 --features=integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
+		--test-threads=4 --features=integer,internal-keycache,gpu,zk-pok -p tfhe \
 		-E "test(/high_level_api::.*gpu.*/)"

 test_high_level_api_hpu: install_rs_build_toolchain install_cargo_nextest
@@ -945,13 +1000,13 @@ ifeq ($(HPU_CONFIG), v80)
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
 		--build-jobs=$(CARGO_BUILD_JOBS) \
 		--test-threads=1 \
-		--features=integer,internal-keycache,hpu,hpu-v80 -p $(TFHE_SPEC) \
+		--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe \
 		-E "test(/high_level_api::.*hpu.*/)"
 else
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
 		--build-jobs=$(CARGO_BUILD_JOBS) \
 		--test-threads=1 \
-		--features=integer,internal-keycache,hpu -p $(TFHE_SPEC) \
+		--features=integer,internal-keycache,hpu -p tfhe \
 		-E "test(/high_level_api::.*hpu.*/)"
 endif

@@ -959,7 +1014,7 @@ endif
 .PHONY: test_strings # Run the tests for strings ci
 test_strings: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=shortint,integer,strings -p $(TFHE_SPEC) \
+		--features=shortint,integer,strings -p tfhe \
 		-- strings::


@@ -967,24 +1022,24 @@ test_strings: install_rs_build_toolchain
 test_user_doc: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
 		--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok,strings \
-		-p $(TFHE_SPEC) \
+		-p tfhe \
 		-- test_user_docs::

 .PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
 test_user_doc_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=internal-keycache,integer,zk-pok,gpu -p $(TFHE_SPEC) \
+		--features=internal-keycache,integer,zk-pok,gpu -p tfhe \
 		-- test_user_docs::

 .PHONY: test_user_doc_hpu # Run tests for HPU from the .md documentation
 test_user_doc_hpu: install_rs_build_toolchain
 ifeq ($(HPU_CONFIG), v80)
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=internal-keycache,integer,hpu,hpu-v80 -p $(TFHE_SPEC) \
+		--features=internal-keycache,integer,hpu,hpu-v80 -p tfhe \
 		-- test_user_docs::
 else
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=internal-keycache,integer,hpu -p $(TFHE_SPEC) \
+		--features=internal-keycache,integer,hpu -p tfhe \
 		-- test_user_docs::
 endif

@@ -1069,7 +1124,7 @@ doc: install_rs_check_toolchain
 	DOCS_RS=1 \
 	RUSTDOCFLAGS="--html-in-header katex-header.html" \
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
-		--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
+		--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p tfhe

 .PHONY: docs # Build rust doc alias for doc
 docs: doc
@@ -1080,7 +1135,7 @@ lint_doc: install_rs_check_toolchain
 	DOCS_RS=1 \
 	RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
-		--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
+		--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p tfhe --no-deps

 .PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
 lint_docs: lint_doc
@@ -1118,7 +1173,7 @@ check_parameter_export_ok:
 check_compile_tests: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
 		--features=experimental,boolean,shortint,integer,internal-keycache \
-		-p $(TFHE_SPEC)
+		-p tfhe

 	@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
 		"$(MAKE)" build_c_api && \
@@ -1129,7 +1184,7 @@ check_compile_tests: install_rs_build_toolchain
 check_compile_tests_benches_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
 		--features=experimental,boolean,shortint,integer,internal-keycache,gpu,zk-pok \
-		-p $(TFHE_SPEC)
+		-p tfhe
 	mkdir -p "$(TFHECUDA_BUILD)" && \
 		cd "$(TFHECUDA_BUILD)" && \
 		cmake .. -DCMAKE_BUILD_TYPE=Debug -DTFHE_CUDA_BACKEND_BUILD_TESTS=ON -DTFHE_CUDA_BACKEND_BUILD_BENCHMARKS=ON && \
@@ -1257,11 +1312,11 @@ bench_signed_integer_gpu: install_rs_check_toolchain

 .PHONY: bench_integer_hpu # Run benchmarks for integer on HPU backend
 bench_integer_hpu: install_rs_check_toolchain
-	source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
+	source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark -- --quick
+	--features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark --

 .PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
 bench_integer_compression: install_rs_check_toolchain
@@ -1430,6 +1485,26 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
 	nvm use $(NODE_VERSION) && \
 	$(MAKE) bench_web_js_api_parallel_firefox

+.PHONY: bench_hlapi # Run benchmarks for integer operations
+bench_hlapi: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench hlapi \
+	--features=integer,internal-keycache,nightly-avx512 -p tfhe-benchmark --
+
+.PHONY: bench_hlapi_gpu # Run benchmarks for integer operations on GPU
+bench_hlapi_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench hlapi \
+	--features=integer,gpu,internal-keycache,nightly-avx512 -p tfhe-benchmark --
+
+.PHONY: bench_hlapi_hpu # Run benchmarks for HLAPI operations on HPU
+bench_hlapi_hpu: install_rs_check_toolchain
+	source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \
+	RUSTFLAGS="$(RUSTFLAGS)" \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench hlapi \
+	--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark --
+
 .PHONY: bench_hlapi_erc20 # Run benchmarks for ERC20 operations
 bench_hlapi_erc20: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
@@ -1456,11 +1531,11 @@ bench_hlapi_dex_gpu: install_rs_check_toolchain

 .PHONY: bench_hlapi_erc20_hpu # Run benchmarks for ECR20 operations on HPU
 bench_hlapi_erc20_hpu: install_rs_check_toolchain
-	source ./setup_hpu.sh --config $(HPU_CONFIG) ; \
+	source ./setup_hpu.sh --config $(HPU_CONFIG) -p ; \
 	RUSTFLAGS="$(RUSTFLAGS)" \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-erc20 \
-	--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark -- --quick
+	--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe-benchmark --

 .PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
 bench_tfhe_zk_pok: install_rs_check_toolchain
@@ -1489,13 +1564,13 @@ bench_hlapi_noise_squash_gpu: install_rs_check_toolchain
 gen_key_cache: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 		--example generates_test_keys \
-		--features=boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
+		--features=boolean,shortint,experimental,internal-keycache -p tfhe \
 		-- $(MULTI_BIT_ONLY) $(COVERAGE_ONLY)

 .PHONY: gen_key_cache_core_crypto # Run function to generate keys and cache them for core_crypto tests
 gen_key_cache_core_crypto: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --tests --profile $(CARGO_PROFILE) \
-		--features=experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
+		--features=experimental,internal-keycache -p tfhe -- --nocapture \
 		core_crypto::keycache::generate_keys

 .PHONY: measure_hlapi_compact_pk_ct_sizes # Measure sizes of public keys and ciphertext for high-level API
@@ -1576,7 +1651,7 @@ tfhe_lints

 .PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
 pcc_gpu: check_rust_bindings_did_not_change clippy_rustdoc_gpu \
-clippy_gpu clippy_cuda_backend clippy_bench_gpu check_compile_tests_benches_gpu
+clippy_gpu clippy_cuda_backend clippy_bench_gpu check_compile_tests_benches_gpu test_integer_hl_test_gpu_check_warnings

 .PHONY: pcc_hpu # pcc stands for pre commit checks for HPU compilation
 pcc_hpu: clippy_hpu clippy_hpu_backend clippy_hpu_mockup test_integer_hpu_mockup_ci_fast
--- a/README.md
+++ b/README.md
@@ -201,11 +201,9 @@ When a new update is published in the Lattice Estimator, we update parameters ac

 ### Security model

-By default, the parameter sets used in the High-Level API with the x86 CPU backend have a failure probability $\le 2^{128}$ to securely work in the IND-CPA^D model using the algorithmic techniques provided in our code base [1].
+By default, the parameter sets used in the High-Level API have a failure probability $\le 2^{-128}$ to securely work in the IND-CPA^D model using the algorithmic techniques provided in our code base [1].
 If you want to work within the IND-CPA security model, which is less strict than the IND-CPA-D model, the parameter sets can easily be changed and would have slightly better performance. More details can be found in the [TFHE-rs documentation](https://docs.zama.ai/tfhe-rs).

-The default parameters used in the High-Level API with the GPU backend are chosen considering the IND-CPA security model, and are selected with a bootstrapping failure probability fixed at $p_{error} \le 2^{-64}$. In particular, it is assumed that the results of decrypted computations are not shared by the secret key owner with any third parties, as such an action can lead to leakage of the secret encryption key. If you are designing an application where decryptions must be shared, you will need to craft custom encryption parameters which are chosen in consideration of the IND-CPA^D security model [2].
-
 [1] Bernard, Olivier, et al. "Drifting Towards Better Error Probabilities in Fully Homomorphic Encryption Schemes". https://eprint.iacr.org/2024/1718.pdf

 [2] Li, Baiyu, et al. "Securing approximate homomorphic encryption using differential privacy." Annual International Cryptology Conference. Cham: Springer Nature Switzerland, 2022. https://eprint.iacr.org/2022/816.pdf
--- a/_typos.toml
+++ b/_typos.toml
@@ -12,7 +12,7 @@ extend-ignore-identifiers-re = [
    "herlo",
    # Example in trivium
    "C9217BA0D762ACA1",
-    "0x[0-9a-fA-F]+"
+    "0x[0-9a-fA-F]+",
 ]

 [files]
@@ -20,4 +20,6 @@ extend-exclude = [
    "backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cu",
    "backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu",
    "backends/tfhe-hpu-backend/config_store/**/*.link_summary",
+    "*.cbor",
+    "*.bcode",
 ]
--- a/apps/trivium/README.md
+++ b/apps/trivium/README.md
@@ -129,7 +129,7 @@ Other sizes than 64 bit are expected to be available in the future.

 # FHE shortint Trivium implementation

-The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
+The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
 It uses a lower level API of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run
 on the same cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes
 its setup a little more intricate.
@@ -138,9 +138,9 @@ Example code:
 ```rust
 use tfhe::shortint::prelude::*;
 use tfhe::shortint::parameters::current_params::{
-    V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
-    V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
 };
 use tfhe::{ConfigBuilder, generate_keys, FheUint64};
 use tfhe::prelude::*;
@@ -148,17 +148,17 @@ use tfhe_trivium::TriviumStreamShortint;

 fn test_shortint() {
    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .use_custom_parameters(V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

-    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
+        V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
--- a/apps/trivium/benches/kreyvium_shortint.rs
+++ b/apps/trivium/benches/kreyvium_shortint.rs
@@ -1,9 +1,9 @@
 use criterion::Criterion;
 use tfhe::prelude::*;
 use tfhe::shortint::parameters::current_params::{
-    V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
 };
 use tfhe::shortint::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
@@ -11,19 +11,19 @@ use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};

 pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .use_custom_parameters(V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -64,19 +64,19 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {

 pub fn kreyvium_shortint_gen(c: &mut Criterion) {
    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .use_custom_parameters(V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -112,19 +112,19 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {

 pub fn kreyvium_shortint_trans(c: &mut Criterion) {
    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .use_custom_parameters(V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
--- a/apps/trivium/benches/trivium_shortint.rs
+++ b/apps/trivium/benches/trivium_shortint.rs
@@ -1,9 +1,9 @@
 use criterion::Criterion;
 use tfhe::prelude::*;
 use tfhe::shortint::parameters::current_params::{
-    V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
 };
 use tfhe::shortint::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
@@ -11,19 +11,19 @@ use tfhe_trivium::{TransCiphering, TriviumStreamShortint};

 pub fn trivium_shortint_warmup(c: &mut Criterion) {
    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .use_custom_parameters(V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -64,19 +64,19 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {

 pub fn trivium_shortint_gen(c: &mut Criterion) {
    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .use_custom_parameters(V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -112,19 +112,19 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {

 pub fn trivium_shortint_trans(c: &mut Criterion) {
    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .use_custom_parameters(V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
--- a/apps/trivium/src/kreyvium/test.rs
+++ b/apps/trivium/src/kreyvium/test.rs
@@ -1,16 +1,16 @@
 use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
 use tfhe::prelude::*;
 use tfhe::shortint::parameters::current_params::{
-    V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
 };
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
 // Values for these tests come from the github repo renaud1239/Kreyvium,
 // commit fd6828f68711276c25f55e605935028f5e843f43

 fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
-    assert!(a.len() % 8 == 0);
+    assert!(a.len().is_multiple_of(8));
    let mut hexadecimal: String = "".to_string();
    for test in a.chunks(8) {
        // Encoding is bytes in LSB order
@@ -63,7 +63,7 @@ fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
 }

 fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
-    assert!(a.len() % 8 == 0);
+    assert!(a.len().is_multiple_of(8));
    let mut hexadecimal: String = "".to_string();
    for test in a {
        hexadecimal.push_str(&format!("{test:02X?}"));
@@ -221,19 +221,19 @@ use tfhe::shortint::prelude::*;
 #[test]
 fn kreyvium_test_shortint_long() {
    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .use_custom_parameters(V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
--- a/apps/trivium/src/trivium/test.rs
+++ b/apps/trivium/src/trivium/test.rs
@@ -1,16 +1,16 @@
 use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
 use tfhe::prelude::*;
 use tfhe::shortint::parameters::current_params::{
-    V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
+    V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
 };
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
 // Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
 // file testvectors/trivium-80.80.test-vectors

 fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
-    assert!(a.len() % 8 == 0);
+    assert!(a.len().is_multiple_of(8));
    let mut hexadecimal: String = "".to_string();
    for test in a.chunks(8) {
        // Encoding is bytes in LSB order
@@ -63,7 +63,7 @@ fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
 }

 fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
-    assert!(a.len() % 8 == 0);
+    assert!(a.len().is_multiple_of(8));
    let mut hexadecimal: String = "".to_string();
    for test in a {
        hexadecimal.push_str(&format!("{test:02X?}"));
@@ -357,19 +357,19 @@ use tfhe::shortint::prelude::*;
 #[test]
 fn trivium_test_shortint_long() {
    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .use_custom_parameters(V1_4_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V1_4_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V1_4_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
--- a/backends/tfhe-cuda-backend/build.rs
+++ b/backends/tfhe-cuda-backend/build.rs
@@ -54,8 +54,7 @@ fn main() {
        }

        if cfg!(feature = "debug") {
-            cmake_config.define("CMAKE_BUILD_TYPE", "DEBUG");
-            cmake_config.define("CMAKE_CXX_FLAGS", "-Wuninitialized -O0");
+            cmake_config.define("CMAKE_BUILD_TYPE", "Debug");
        }

        // Build the CMake project
--- a/backends/tfhe-cuda-backend/cuda/CMakeLists.txt
+++ b/backends/tfhe-cuda-backend/cuda/CMakeLists.txt
@@ -78,8 +78,10 @@ endif()

 add_compile_definitions(CUDA_ARCH=${CUDA_ARCH})

+string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWERCASE)
+
 # Check if the DEBUG flag is defined
-if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+if(CMAKE_BUILD_TYPE_LOWERCASE STREQUAL "debug")
  # Debug mode
  message("Compiling in Debug mode")
  add_definitions(-DDEBUG)
--- a/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
+++ b/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
@@ -26,6 +26,15 @@ void cuda_modulus_switch_inplace_64(void *stream, uint32_t gpu_index,
                                    void *lwe_array_out, uint32_t size,
                                    uint32_t log_modulus);

+void cuda_modulus_switch_64(void *stream, uint32_t gpu_index, void *lwe_out,
+                            const void *lwe_in, uint32_t size,
+                            uint32_t log_modulus);
+
+void cuda_centered_modulus_switch_64(void *stream, uint32_t gpu_index,
+                                     void *lwe_out, const void *lwe_in,
+                                     uint32_t lwe_dimension,
+                                     uint32_t log_modulus);
+
 void cuda_improve_noise_modulus_switch_64(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *lwe_array_indexes,
--- a/backends/tfhe-cuda-backend/cuda/include/device.h
+++ b/backends/tfhe-cuda-backend/cuda/include/device.h
@@ -19,6 +19,11 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
    std::abort();
  }
 }
+
+// The PANIC macro should be used to validate user-inputs to GPU functions
+// it will execute in all targets, including production settings
+// e.g., cudaMemCopy to the device should check that the destination pointer is
+// a device pointer
 #define PANIC(format, ...)                                                     \
  {                                                                            \
    std::fprintf(stderr, "%s::%d::%s: panic.\n" format "\n", __FILE__,         \
@@ -26,6 +31,31 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
    std::abort();                                                              \
  }

+// This is a generic assertion checking macro with user defined printf-style
+// message
+#define PANIC_IF_FALSE(cond, format, ...)                                      \
+  do {                                                                         \
+    if (!(cond)) {                                                             \
+      PANIC(format "\n\n %s\n", ##__VA_ARGS__, #cond);                         \
+    }                                                                          \
+  } while (0)
+
+#ifndef GPU_ASSERTS_DISABLE
+// The GPU assert should be used to validate assumptions in algorithms,
+// for example, checking that two user-provided quantities have a certain
+// relationship or that the size of the buffer  provided to a function is
+// sufficient when it is filled with some algorithm that depends on
+// user-provided inputs e.g., OPRF corrections buffer should not have a size
+// higher than the number of blocks in the datatype that is generated
+#define GPU_ASSERT(cond, format, ...)                                          \
+  PANIC_IF_FALSE(cond, format, ##__VA_ARGS__)
+#else
+#define GPU_ASSERT(cond)                                                       \
+  do {                                                                         \
+  } while (0)
+#endif
+
+uint32_t cuda_get_device();
 void cuda_set_device(uint32_t gpu_index);

 cudaEvent_t cuda_create_event(uint32_t gpu_index);
@@ -89,6 +119,8 @@ void cuda_memset_async(void *dest, uint64_t val, uint64_t size,

 int cuda_get_number_of_gpus();

+int cuda_get_number_of_sms();
+
 void cuda_synchronize_device(uint32_t gpu_index);

 void cuda_drop(void *ptr, uint32_t gpu_index);
--- a/backends/tfhe-cuda-backend/cuda/include/helper_multi_gpu.h
+++ b/backends/tfhe-cuda-backend/cuda/include/helper_multi_gpu.h
@@ -6,6 +6,7 @@

 extern std::mutex m;
 extern bool p2p_enabled;
+extern const int THRESHOLD_MULTI_GPU;

 extern "C" {
 int32_t cuda_setup_multi_gpu(int device_0_id);
@@ -15,24 +16,27 @@ int32_t cuda_setup_multi_gpu(int device_0_id);
 template <typename Torus>
 using LweArrayVariant = std::variant<std::vector<Torus *>, Torus *>;

-// Macro to define the visitor logic using std::holds_alternative for vectors
-#define GET_VARIANT_ELEMENT(variant, index)                                    \
-  [&] {                                                                        \
-    if (std::holds_alternative<std::vector<Torus *>>(variant)) {               \
-      return std::get<std::vector<Torus *>>(variant)[index];                   \
-    } else {                                                                   \
-      return std::get<Torus *>(variant);                                       \
-    }                                                                          \
-  }()
-// Macro to define the visitor logic using std::holds_alternative for vectors
-#define GET_VARIANT_ELEMENT_64BIT(variant, index)                              \
-  [&] {                                                                        \
-    if (std::holds_alternative<std::vector<uint64_t *>>(variant)) {            \
-      return std::get<std::vector<uint64_t *>>(variant)[index];                \
-    } else {                                                                   \
-      return std::get<uint64_t *>(variant);                                    \
-    }                                                                          \
-  }()
+/// get_variant_element() resolves access when the input may be either a single
+/// pointer or a vector of pointers. If the variant holds a single pointer, the
+/// index is ignored and that pointer is returned; if it holds a vector, the
+/// element at `index` is returned.
+///
+/// This function replaces the previous macro:
+/// - Easier to debug and read than a macro
+/// - Deduces the pointer type from the variant (no need to name a Torus type
+/// explicitly)
+/// - Defined in a header, so it’s eligible for inlining by the optimizer
+template <typename Torus>
+inline Torus
+get_variant_element(const std::variant<std::vector<Torus>, Torus> &variant,
+                    size_t index) {
+  if (std::holds_alternative<std::vector<Torus>>(variant)) {
+    return std::get<std::vector<Torus>>(variant)[index];
+  } else {
+    return std::get<Torus>(variant);
+  }
+}
+
 int get_active_gpu_count(int num_inputs, int gpu_count);

 int get_num_inputs_on_gpu(int total_num_inputs, int gpu_index, int gpu_count);
--- a/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression.h
@@ -3,6 +3,26 @@

 #include "../../pbs/pbs_enums.h"

+typedef struct {
+  void *ptr;
+  uint32_t num_radix_blocks;
+  uint32_t lwe_dimension;
+} CudaLweCiphertextListFFI;
+
+typedef struct {
+  void *ptr;
+  uint32_t storage_log_modulus;
+  uint32_t lwe_per_glwe;
+  // Input LWEs are grouped by groups of `lwe_per_glwe`(the last group may be
+  // smaller)
+  // Each group is then packed into one GLWE with `lwe_per_glwe` bodies (one for
+  // each LWE of the group). In the end the total number of bodies is equal to
+  // the number of input LWE
+  uint32_t total_lwe_bodies_count;
+  uint32_t glwe_dimension;
+  uint32_t polynomial_size;
+} CudaPackedGlweCiphertextListFFI;
+
 extern "C" {
 uint64_t scratch_cuda_integer_compress_radix_ciphertext_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -10,28 +30,29 @@ uint64_t scratch_cuda_integer_compress_radix_ciphertext_64(
    uint32_t compression_polynomial_size, uint32_t lwe_dimension,
    uint32_t ks_level, uint32_t ks_base_log, uint32_t num_radix_blocks,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint32_t lwe_per_glwe, uint32_t storage_log_modulus,
-    bool allocate_gpu_memory);
+    uint32_t lwe_per_glwe, bool allocate_gpu_memory);

 uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    int8_t **mem_ptr, uint32_t encryption_glwe_dimension,
    uint32_t encryption_polynomial_size, uint32_t compression_glwe_dimension,
    uint32_t compression_polynomial_size, uint32_t lwe_dimension,
-    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t num_radix_blocks,
-    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint32_t storage_log_modulus, uint32_t body_count, bool allocate_gpu_memory,
-    bool allocate_ms_array);
+    uint32_t pbs_level, uint32_t pbs_base_log,
+    uint32_t num_blocks_to_decompress, uint32_t message_modulus,
+    uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_compress_radix_ciphertext_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *glwe_array_out, void const *lwe_array_in, void *const *fp_ksk,
-    uint32_t num_nths, int8_t *mem_ptr);
+    CudaPackedGlweCiphertextListFFI *glwe_array_out,
+    CudaLweCiphertextListFFI const *lwe_array_in, void *const *fp_ksk,
+    int8_t *mem_ptr);

 void cuda_integer_decompress_radix_ciphertext_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array_out, void const *glwe_in, uint32_t const *indexes_array,
-    uint32_t indexes_array_size, void *const *bsks, int8_t *mem_ptr);
+    CudaLweCiphertextListFFI *lwe_array_out,
+    CudaPackedGlweCiphertextListFFI const *glwe_in,
+    uint32_t const *indexes_array, void *const *bsks, int8_t *mem_ptr);

 void cleanup_cuda_integer_compress_radix_ciphertext_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -40,6 +61,41 @@ void cleanup_cuda_integer_compress_radix_ciphertext_64(
 void cleanup_cuda_integer_decompress_radix_ciphertext_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    int8_t **mem_ptr_void);
+
+uint64_t scratch_cuda_integer_compress_radix_ciphertext_128(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, uint32_t compression_glwe_dimension,
+    uint32_t compression_polynomial_size, uint32_t lwe_dimension,
+    uint32_t ks_level, uint32_t ks_base_log, uint32_t num_radix_blocks,
+    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
+    uint32_t lwe_per_glwe, bool allocate_gpu_memory);
+
+uint64_t scratch_cuda_integer_decompress_radix_ciphertext_128(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, uint32_t compression_glwe_dimension,
+    uint32_t compression_polynomial_size, uint32_t lwe_dimension,
+    uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
+    bool allocate_gpu_memory);
+
+void cuda_integer_compress_radix_ciphertext_128(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    CudaPackedGlweCiphertextListFFI *glwe_array_out,
+    CudaLweCiphertextListFFI const *lwe_array_in, void *const *fp_ksk,
+    int8_t *mem_ptr);
+
+void cuda_integer_decompress_radix_ciphertext_128(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    CudaLweCiphertextListFFI *lwe_array_out,
+    CudaPackedGlweCiphertextListFFI const *glwe_in,
+    uint32_t const *indexes_array, int8_t *mem_ptr);
+
+void cleanup_cuda_integer_compress_radix_ciphertext_128(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr_void);
+
+void cleanup_cuda_integer_decompress_radix_ciphertext_128(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr_void);
 }

 #endif
--- a/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression_utilities.h
@@ -5,40 +5,33 @@

 template <typename Torus> struct int_compression {
  int_radix_params compression_params;
-  uint32_t storage_log_modulus;
-  uint32_t lwe_per_glwe;
-
-  uint32_t body_count;
-
  // Compression
  int8_t *fp_ks_buffer;
  Torus *tmp_lwe;
  Torus *tmp_glwe_array_out;
  bool gpu_memory_allocated;
+  uint32_t lwe_per_glwe;

  int_compression(cudaStream_t const *streams, uint32_t const *gpu_indexes,
                  uint32_t gpu_count, int_radix_params compression_params,
                  uint32_t num_radix_blocks, uint32_t lwe_per_glwe,
-                  uint32_t storage_log_modulus, bool allocate_gpu_memory,
-                  uint64_t &size_tracker) {
+                  bool allocate_gpu_memory, uint64_t &size_tracker) {
    gpu_memory_allocated = allocate_gpu_memory;
    this->compression_params = compression_params;
-    this->lwe_per_glwe = lwe_per_glwe;
-    this->storage_log_modulus = storage_log_modulus;
-    this->body_count = num_radix_blocks;

-    Torus glwe_accumulator_size = (compression_params.glwe_dimension + 1) *
-                                  compression_params.polynomial_size;
+    uint64_t glwe_accumulator_size = (compression_params.glwe_dimension + 1) *
+                                     compression_params.polynomial_size;

-    tmp_lwe = (Torus *)cuda_malloc_with_size_tracking_async(
+    tmp_lwe = static_cast<Torus *>(cuda_malloc_with_size_tracking_async(
        num_radix_blocks * (compression_params.small_lwe_dimension + 1) *
            sizeof(Torus),
-        streams[0], gpu_indexes[0], size_tracker, allocate_gpu_memory);
-    tmp_glwe_array_out = (Torus *)cuda_malloc_with_size_tracking_async(
-        lwe_per_glwe * glwe_accumulator_size * sizeof(Torus), streams[0],
-        gpu_indexes[0], size_tracker, allocate_gpu_memory);
+        streams[0], gpu_indexes[0], size_tracker, allocate_gpu_memory));
+    tmp_glwe_array_out =
+        static_cast<Torus *>(cuda_malloc_with_size_tracking_async(
+            lwe_per_glwe * glwe_accumulator_size * sizeof(Torus), streams[0],
+            gpu_indexes[0], size_tracker, allocate_gpu_memory));

-    size_tracker += scratch_packing_keyswitch_lwe_list_to_glwe_64(
+    size_tracker += scratch_packing_keyswitch_lwe_list_to_glwe<Torus>(
        streams[0], gpu_indexes[0], &fp_ks_buffer,
        compression_params.small_lwe_dimension,
        compression_params.glwe_dimension, compression_params.polynomial_size,
@@ -58,11 +51,7 @@ template <typename Torus> struct int_compression {
 template <typename Torus> struct int_decompression {
  int_radix_params encryption_params;
  int_radix_params compression_params;
-
-  uint32_t storage_log_modulus;
-
-  uint32_t num_radix_blocks;
-  uint32_t body_count;
+  uint32_t num_blocks_to_decompress;

  Torus *tmp_extracted_glwe;
  Torus *tmp_extracted_lwe;
@@ -74,57 +63,63 @@ template <typename Torus> struct int_decompression {
  int_decompression(cudaStream_t const *streams, uint32_t const *gpu_indexes,
                    uint32_t gpu_count, int_radix_params encryption_params,
                    int_radix_params compression_params,
-                    uint32_t num_radix_blocks, uint32_t body_count,
-                    uint32_t storage_log_modulus, bool allocate_gpu_memory,
+                    uint32_t num_blocks_to_decompress, bool allocate_gpu_memory,
                    uint64_t &size_tracker) {
    gpu_memory_allocated = allocate_gpu_memory;
    this->encryption_params = encryption_params;
    this->compression_params = compression_params;
-    this->storage_log_modulus = storage_log_modulus;
-    this->num_radix_blocks = num_radix_blocks;
-    this->body_count = body_count;
+    this->num_blocks_to_decompress = num_blocks_to_decompress;

-    Torus glwe_accumulator_size = (compression_params.glwe_dimension + 1) *
-                                  compression_params.polynomial_size;
-    Torus lwe_accumulator_size = (compression_params.glwe_dimension *
-                                      compression_params.polynomial_size +
-                                  1);
-    decompression_rescale_lut = new int_radix_lut<Torus>(
-        streams, gpu_indexes, gpu_count, encryption_params, 1, num_radix_blocks,
-        allocate_gpu_memory, size_tracker);
+    uint64_t glwe_accumulator_size = (compression_params.glwe_dimension + 1) *
+                                     compression_params.polynomial_size;
+    uint64_t lwe_accumulator_size = (compression_params.glwe_dimension *
+                                         compression_params.polynomial_size +
+                                     1);

    tmp_extracted_glwe = (Torus *)cuda_malloc_with_size_tracking_async(
-        num_radix_blocks * glwe_accumulator_size * sizeof(Torus), streams[0],
-        gpu_indexes[0], size_tracker, allocate_gpu_memory);
+        num_blocks_to_decompress * glwe_accumulator_size * sizeof(Torus),
+        streams[0], gpu_indexes[0], size_tracker, allocate_gpu_memory);
    tmp_indexes_array = (uint32_t *)cuda_malloc_with_size_tracking_async(
-        num_radix_blocks * sizeof(uint32_t), streams[0], gpu_indexes[0],
+        num_blocks_to_decompress * sizeof(uint32_t), streams[0], gpu_indexes[0],
        size_tracker, allocate_gpu_memory);
    tmp_extracted_lwe = (Torus *)cuda_malloc_with_size_tracking_async(
-        num_radix_blocks * lwe_accumulator_size * sizeof(Torus), streams[0],
-        gpu_indexes[0], size_tracker, allocate_gpu_memory);
+        num_blocks_to_decompress * lwe_accumulator_size * sizeof(Torus),
+        streams[0], gpu_indexes[0], size_tracker, allocate_gpu_memory);

-    // Rescale is done using an identity LUT
-    // Here we do not divide by message_modulus
-    // Example: in the 2_2 case we are mapping a 2 bits message onto a 4 bits
-    // space, we want to keep the original 2 bits value in the 4 bits space,
-    // so we apply the identity and the encoding will rescale it for us.
-    auto decompression_rescale_f = [](Torus x) -> Torus { return x; };
+    // rescale is only needed on 64-bit decompression
+    if constexpr (std::is_same_v<Torus, uint64_t>) {
+      decompression_rescale_lut = new int_radix_lut<Torus>(
+          streams, gpu_indexes, gpu_count, encryption_params, 1,
+          num_blocks_to_decompress, allocate_gpu_memory, size_tracker);

-    auto effective_compression_message_modulus =
-        encryption_params.carry_modulus;
-    auto effective_compression_carry_modulus = 1;
+      // Rescale is done using an identity LUT
+      // Here we do not divide by message_modulus
+      // Example: in the 2_2 case we are mapping a 2-bit message onto a 4-bit
+      // space, we want to keep the original 2-bit value in the 4-bit space,
+      // so we apply the identity and the encoding will rescale it for us.
+      decompression_rescale_lut = new int_radix_lut<Torus>(
+          streams, gpu_indexes, gpu_count, encryption_params, 1,
+          num_blocks_to_decompress, allocate_gpu_memory, size_tracker);
+      auto decompression_rescale_f = [](Torus x) -> Torus { return x; };

-    generate_device_accumulator_with_encoding<Torus>(
-        streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
-        decompression_rescale_lut->get_degree(0),
-        decompression_rescale_lut->get_max_degree(0),
-        encryption_params.glwe_dimension, encryption_params.polynomial_size,
-        effective_compression_message_modulus,
-        effective_compression_carry_modulus, encryption_params.message_modulus,
-        encryption_params.carry_modulus, decompression_rescale_f,
-        gpu_memory_allocated);
+      auto effective_compression_message_modulus =
+          encryption_params.carry_modulus;
+      auto effective_compression_carry_modulus = 1;

-    decompression_rescale_lut->broadcast_lut(streams, gpu_indexes, 0);
+      generate_device_accumulator_with_encoding<Torus>(
+          streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
+          decompression_rescale_lut->get_degree(0),
+          decompression_rescale_lut->get_max_degree(0),
+          encryption_params.glwe_dimension, encryption_params.polynomial_size,
+          effective_compression_message_modulus,
+          effective_compression_carry_modulus,
+          encryption_params.message_modulus, encryption_params.carry_modulus,
+          decompression_rescale_f, gpu_memory_allocated);
+      auto active_gpu_count =
+          get_active_gpu_count(num_blocks_to_decompress, gpu_count);
+      decompression_rescale_lut->broadcast_lut(streams, gpu_indexes,
+                                               active_gpu_count);
+    }
  }
  void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
               uint32_t gpu_count) {
@@ -134,9 +129,11 @@ template <typename Torus> struct int_decompression {
                                       gpu_indexes[0], gpu_memory_allocated);
    cuda_drop_with_size_tracking_async(tmp_indexes_array, streams[0],
                                       gpu_indexes[0], gpu_memory_allocated);
-
-    decompression_rescale_lut->release(streams, gpu_indexes, gpu_count);
-    delete decompression_rescale_lut;
+    if constexpr (std::is_same_v<Torus, uint64_t>) {
+      decompression_rescale_lut->release(streams, gpu_indexes, gpu_count);
+      delete decompression_rescale_lut;
+      decompression_rescale_lut = nullptr;
+    }
  }
 };
 #endif
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
@@ -37,6 +37,10 @@ enum SIGNED_OPERATION { ADDITION = 1, SUBTRACTION = -1 };

 enum outputFlag { FLAG_NONE = 0, FLAG_OVERFLOW = 1, FLAG_CARRY = 2 };

+enum Direction { Trailing = 0, Leading = 1 };
+
+enum BitValue { Zero = 0, One = 1 };
+
 extern "C" {

 typedef struct {
@@ -48,6 +52,34 @@ typedef struct {
  uint32_t lwe_dimension;
 } CudaRadixCiphertextFFI;

+typedef struct {
+  uint64_t const *chosen_multiplier_has_at_least_one_set;
+  uint64_t const *decomposed_chosen_multiplier;
+
+  uint32_t const num_scalars;
+  uint32_t const active_bits;
+  uint64_t const shift_pre;
+  uint32_t const shift_post;
+  uint32_t const ilog2_chosen_multiplier;
+  uint32_t const chosen_multiplier_num_bits;
+
+  bool const is_chosen_multiplier_zero;
+  bool const is_abs_chosen_multiplier_one;
+  bool const is_chosen_multiplier_negative;
+  bool const is_chosen_multiplier_pow2;
+  bool const chosen_multiplier_has_more_bits_than_numerator;
+  // if signed: test if chosen_multiplier >= 2^{num_bits - 1}
+  bool const is_chosen_multiplier_geq_two_pow_numerator;
+
+  uint32_t const ilog2_divisor;
+
+  bool const is_divisor_zero;
+  bool const is_abs_divisor_one;
+  bool const is_divisor_negative;
+  bool const is_divisor_pow2;
+  bool const divisor_has_more_bits_than_numerator;
+} CudaScalarDivisorFFI;
+
 uint64_t scratch_cuda_apply_univariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
@@ -55,7 +87,8 @@ uint64_t scratch_cuda_apply_univariate_lut_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint64_t lut_degree, bool allocate_gpu_memory, bool allocate_ms_array);
+    uint64_t lut_degree, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);
 uint64_t scratch_cuda_apply_many_univariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
@@ -64,7 +97,7 @@ uint64_t scratch_cuda_apply_many_univariate_lut_kb_64(
    uint32_t grouping_factor, uint32_t num_radix_blocks,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
    uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory,
-    bool allocate_ms_array);
+    PBS_MS_REDUCTION_T noise_reduction_type);
 void cuda_apply_univariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    CudaRadixCiphertextFFI *output_radix_lwe,
@@ -85,7 +118,8 @@ uint64_t scratch_cuda_apply_bivariate_lut_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint64_t lut_degree, bool allocate_gpu_memory, bool allocate_ms_array);
+    uint64_t lut_degree, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_apply_bivariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -115,7 +149,7 @@ uint64_t scratch_cuda_full_propagation_64(
    uint32_t polynomial_size, uint32_t ks_level, uint32_t ks_base_log,
    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_full_propagation_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -134,7 +168,7 @@ uint64_t scratch_cuda_integer_mult_radix_ciphertext_kb_64(
    uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t pbs_base_log,
    uint32_t pbs_level, uint32_t ks_base_log, uint32_t ks_level,
    uint32_t grouping_factor, uint32_t num_blocks, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_mult_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -168,7 +202,7 @@ uint64_t scratch_cuda_integer_radix_logical_scalar_shift_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
    uint32_t carry_modulus, PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_radix_logical_scalar_shift_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -183,7 +217,7 @@ uint64_t scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
    uint32_t carry_modulus, PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_radix_arithmetic_scalar_shift_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -206,7 +240,8 @@ uint64_t scratch_cuda_integer_radix_shift_and_rotate_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
    uint32_t carry_modulus, PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
-    bool is_signed, bool allocate_gpu_memory, bool allocate_ms_array);
+    bool is_signed, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -227,7 +262,7 @@ uint64_t scratch_cuda_integer_radix_comparison_kb_64(
    uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
    COMPARISON_TYPE op_type, bool is_signed, bool allocate_gpu_memory,
-    bool allocate_ms_array);
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_comparison_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -257,7 +292,8 @@ uint64_t scratch_cuda_integer_radix_bitop_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    BITOP_TYPE op_type, bool allocate_gpu_memory, bool allocate_ms_array);
+    BITOP_TYPE op_type, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_bitop_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -286,7 +322,7 @@ uint64_t scratch_cuda_integer_radix_cmux_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_cmux_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -308,7 +344,7 @@ uint64_t scratch_cuda_integer_radix_scalar_rotate_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
    uint32_t carry_modulus, PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_radix_scalar_rotate_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -328,7 +364,8 @@ uint64_t scratch_cuda_propagate_single_carry_kb_64_inplace(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
    uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
-    uint32_t uses_carry, bool allocate_gpu_memory, bool allocate_ms_array);
+    uint32_t uses_carry, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 uint64_t scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -337,7 +374,8 @@ uint64_t scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
    uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
-    uint32_t uses_carry, bool allocate_gpu_memory, bool allocate_ms_array);
+    uint32_t uses_carry, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_propagate_single_carry_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -372,7 +410,7 @@ uint64_t scratch_cuda_integer_overflowing_sub_kb_64_inplace(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
    uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t compute_overflow,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_overflowing_sub_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -396,7 +434,7 @@ uint64_t scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
    uint32_t num_blocks_in_radix, uint32_t max_num_radix_in_vec,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
    bool reduce_degrees_for_single_carry_propagation, bool allocate_gpu_memory,
-    bool allocate_ms_array);
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -416,7 +454,7 @@ uint64_t scratch_cuda_integer_scalar_mul_kb_64(
    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
    uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
    PBS_TYPE pbs_type, uint32_t num_scalar_bits, bool allocate_gpu_memory,
-    bool allocate_ms_array);
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_scalar_multiplication_integer_radix_ciphertext_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -438,7 +476,8 @@ uint64_t scratch_cuda_integer_div_rem_radix_ciphertext_kb_64(
    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
    uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
-    PBS_TYPE pbs_type, bool allocate_gpu_memory, bool allocate_ms_array);
+    PBS_TYPE pbs_type, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_div_rem_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -459,7 +498,8 @@ uint64_t scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_radix_blocks,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint64_t lut_degree, bool allocate_gpu_memory, bool allocate_ms_array);
+    uint64_t lut_degree, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_compute_prefix_sum_hillis_steele_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -485,7 +525,8 @@ uint64_t scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
    uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
-    PBS_TYPE pbs_type, bool allocate_gpu_memory, bool allocate_ms_array);
+    PBS_TYPE pbs_type, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -505,7 +546,7 @@ uint64_t scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_radix_blocks,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_are_all_comparisons_block_true_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -526,7 +567,7 @@ uint64_t scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_radix_blocks,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -557,7 +598,7 @@ uint64_t scratch_cuda_apply_noise_squashing_kb(
    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
    uint32_t num_radix_blocks, uint32_t num_original_blocks,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_apply_noise_squashing_kb(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -579,7 +620,7 @@ uint64_t scratch_cuda_sub_and_propagate_single_carry_kb_64_inplace(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
    uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_sub_and_propagate_single_carry_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -600,19 +641,15 @@ uint64_t scratch_cuda_integer_unsigned_scalar_div_radix_kb_64(
    uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
    uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
-    PBS_TYPE pbs_type, bool allocate_gpu_memory, bool is_divisor_power_of_two,
-    bool log2_divisor_exceeds_threshold, bool multiplier_exceeds_threshold,
-    uint32_t num_scalar_bits, uint32_t ilog2_divisor, bool allocate_ms_array);
+    PBS_TYPE pbs_type, const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_unsigned_scalar_div_radix_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *numerator_ct, int8_t *mem_ptr, void *const *ksks,
-    uint64_t const *decomposed_scalar, uint64_t const *has_at_least_one_set,
+    CudaRadixCiphertextFFI *numerator_ct, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks,
    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
-    void *const *bsks, uint32_t num_scalars, bool multiplier_exceeds_threshold,
-    bool is_divisor_power_of_two, bool log2_divisor_exceeds_threshold,
-    uint32_t ilog2_divisor, uint64_t shift_pre, uint32_t shift_post,
-    uint64_t rhs);
+    const CudaScalarDivisorFFI *scalar_divisor_ffi);

 void cleanup_cuda_integer_unsigned_scalar_div_radix_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -625,7 +662,7 @@ uint64_t scratch_cuda_extend_radix_with_sign_msb_64(
    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
    uint32_t num_blocks, uint32_t num_additional_blocks,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_extend_radix_with_sign_msb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -644,23 +681,16 @@ uint64_t scratch_cuda_integer_signed_scalar_div_radix_kb_64(
    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
    uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
-    uint32_t num_blocks, uint32_t num_scalar_bits, uint32_t message_modulus,
-    uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
-    bool is_absolute_divisor_one, bool is_divisor_negative,
-    bool l_exceed_threshold, bool is_power_of_two, bool multiplier_is_small,
-    bool allocate_ms_array);
+    uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
+    PBS_TYPE pbs_type, const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_signed_scalar_div_radix_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *numerator_ct, int8_t *mem_ptr, void *const *ksks,
-    void *const *bsks,
+    CudaRadixCiphertextFFI *numerator_ct, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks,
    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
-    bool is_absolute_divisor_one, bool is_divisor_negative,
-    bool l_exceed_threshold, bool is_power_of_two, bool multiplier_is_small,
-    uint32_t l, uint32_t shift_post, bool is_rhs_power_of_two, bool is_rhs_zero,
-    bool is_rhs_one, uint32_t rhs_shift, uint32_t numerator_bits,
-    uint32_t num_scalars, uint64_t const *decomposed_scalar,
-    uint64_t const *has_at_least_one_set);
+    const CudaScalarDivisorFFI *scalar_divisor_ffi, uint32_t numerator_bits);

 void cleanup_cuda_integer_signed_scalar_div_radix_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -672,24 +702,18 @@ uint64_t scratch_integer_unsigned_scalar_div_rem_radix_kb_64(
    uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
    uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
-    PBS_TYPE pbs_type, bool allocate_gpu_memory, bool is_divisor_power_of_two,
-    bool log2_divisor_exceeds_threshold, bool multiplier_exceeds_threshold,
-    uint32_t num_scalar_bits_for_div, uint32_t num_scalar_bits_for_mul,
-    uint32_t ilog2_divisor, uint64_t divisor, bool allocate_ms_array);
+    PBS_TYPE pbs_type, const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    uint32_t const active_bits_divisor, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_unsigned_scalar_div_rem_radix_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    CudaRadixCiphertextFFI *quotient_ct, CudaRadixCiphertextFFI *remainder_ct,
-    int8_t *mem_ptr, void *const *ksks, void *const *bsks,
-    uint64_t const *decomposed_scalar_for_div,
-    uint64_t const *decomposed_scalar_for_mul,
-    uint64_t const *has_at_least_one_set_for_div,
-    uint64_t const *has_at_least_one_set_for_mul,
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
-    uint32_t num_scalars_for_div, uint32_t num_scalars_for_mul,
-    bool multiplier_exceeds_threshold, bool is_divisor_power_of_two,
-    bool log2_divisor_exceeds_threshold, uint32_t ilog2_divisor,
-    uint64_t divisor, uint64_t shift_pre, uint32_t shift_post, uint64_t rhs,
+    const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    uint64_t const *divisor_has_at_least_one_set,
+    uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
    void const *clear_blocks, void const *h_clear_blocks,
    uint32_t num_clear_blocks);

@@ -703,30 +727,87 @@ uint64_t scratch_integer_signed_scalar_div_rem_radix_kb_64(
    uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
    uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
-    PBS_TYPE pbs_type, bool allocate_gpu_memory,
-    uint32_t num_scalar_bits_for_div, uint32_t num_scalar_bits_for_mul,
-    bool is_absolute_divisor_one, bool is_divisor_negative,
-    bool l_exceed_threshold, bool is_absolute_divisor_power_of_two,
-    bool is_divisor_zero, bool multiplier_is_small, bool allocate_ms_array);
+    PBS_TYPE pbs_type, const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    uint32_t const active_bits_divisor, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_signed_scalar_div_rem_radix_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    CudaRadixCiphertextFFI *quotient_ct, CudaRadixCiphertextFFI *remainder_ct,
-    int8_t *mem_ptr, void *const *ksks, void *const *bsks,
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
-    bool is_absolute_divisor_one, bool is_divisor_negative,
-    bool is_divisor_zero, bool l_exceed_threshold,
-    bool is_absolute_divisor_power_of_two, bool multiplier_is_small, uint32_t l,
-    uint32_t shift_post, bool is_rhs_power_of_two, bool is_rhs_zero,
-    bool is_rhs_one, uint32_t rhs_shift, uint32_t divisor_shift,
-    uint32_t numerator_bits, uint32_t num_scalars_for_div,
-    uint32_t num_scalars_for_mul, uint64_t const *decomposed_scalar_for_div,
-    uint64_t const *decomposed_scalar_for_mul,
-    uint64_t const *has_at_least_one_set_for_div,
-    uint64_t const *has_at_least_one_set_for_mul);
+    const CudaScalarDivisorFFI *scalar_divisor_ffi,
+    uint64_t const *divisor_has_at_least_one_set,
+    uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
+    uint32_t numerator_bits);

 void cleanup_cuda_integer_signed_scalar_div_rem_radix_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    int8_t **mem_ptr_void);
+
+uint64_t scratch_integer_count_of_consecutive_bits_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
+    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
+    uint32_t num_blocks, uint32_t counter_num_blocks, uint32_t message_modulus,
+    uint32_t carry_modulus, PBS_TYPE pbs_type, Direction direction,
+    BitValue bit_value, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);
+
+void cuda_integer_count_of_consecutive_bits_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    CudaRadixCiphertextFFI *output_ct, CudaRadixCiphertextFFI const *input_ct,
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
+    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key);
+
+void cleanup_cuda_integer_count_of_consecutive_bits_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr_void);
+
+uint64_t scratch_cuda_integer_grouped_oprf_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
+    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
+    uint32_t num_blocks_to_process, uint32_t num_blocks,
+    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
+    bool allocate_gpu_memory, uint32_t message_bits_per_block,
+    uint32_t total_random_bits, PBS_MS_REDUCTION_T noise_reduction_type);
+
+void cuda_integer_grouped_oprf_async_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    CudaRadixCiphertextFFI *radix_lwe_out, const void *seeded_lwe_input,
+    uint32_t num_blocks_to_process, int8_t *mem, void *const *bsks,
+    CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
+
+void cleanup_cuda_integer_grouped_oprf_64(void *const *streams,
+                                          uint32_t const *gpu_indexes,
+                                          uint32_t gpu_count,
+                                          int8_t **mem_ptr_void);
+
+uint64_t scratch_integer_ilog2_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
+    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
+    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
+    uint32_t input_num_blocks, uint32_t counter_num_blocks,
+    uint32_t num_bits_in_ciphertext, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);
+
+void cuda_integer_ilog2_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    CudaRadixCiphertextFFI *output_ct, CudaRadixCiphertextFFI const *input_ct,
+    CudaRadixCiphertextFFI const *trivial_ct_neg_n,
+    CudaRadixCiphertextFFI const *trivial_ct_2,
+    CudaRadixCiphertextFFI const *trivial_ct_m_minus_1_block, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks,
+    const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key);
+
+void cleanup_cuda_integer_ilog2_kb_64(void *const *streams,
+                                      uint32_t const *gpu_indexes,
+                                      uint32_t gpu_count,
+                                      int8_t **mem_ptr_void);
 } // extern C
 #endif // CUDA_INTEGER_H
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_128_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_128_utilities.h
@@ -8,6 +8,6 @@ uint64_t scratch_cuda_programmable_bootstrap_128_vector_64(
    void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
    uint32_t level_count, uint32_t input_lwe_ciphertext_count,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 #endif // CUDA_BOOTSTRAP_128_H
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_enums.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_enums.h
@@ -3,6 +3,7 @@
 #include <stdint.h>
 enum PBS_TYPE { MULTI_BIT = 0, CLASSICAL = 1 };
 enum PBS_VARIANT { DEFAULT = 0, CG = 1, TBC = 2 };
+enum PBS_MS_REDUCTION_T { NO_REDUCTION = 0, DRIFT = 1, CENTERED = 2 };

 extern "C" {
 typedef struct {
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_utilities.h
@@ -83,23 +83,24 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
  Torus *temp_lwe_array_in;

  PBS_VARIANT pbs_variant;
-  bool uses_noise_reduction;
+  PBS_MS_REDUCTION_T noise_reduction_type;
  bool gpu_memory_allocated;

  pbs_buffer(cudaStream_t stream, uint32_t gpu_index, uint32_t lwe_dimension,
             uint32_t glwe_dimension, uint32_t polynomial_size,
             uint32_t level_count, uint32_t input_lwe_ciphertext_count,
             PBS_VARIANT pbs_variant, bool allocate_gpu_memory,
-             bool allocate_ms_array, uint64_t &size_tracker) {
+             PBS_MS_REDUCTION_T noise_reduction_type, uint64_t &size_tracker)
+      : noise_reduction_type(noise_reduction_type) {
    gpu_memory_allocated = allocate_gpu_memory;
    cuda_set_device(gpu_index);
-    this->uses_noise_reduction = allocate_ms_array;
    this->pbs_variant = pbs_variant;

    auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
    this->temp_lwe_array_in = (Torus *)cuda_malloc_with_size_tracking_async(
        (lwe_dimension + 1) * input_lwe_ciphertext_count * sizeof(Torus),
-        stream, gpu_index, size_tracker, allocate_ms_array);
+        stream, gpu_index, size_tracker,
+        noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT);
    switch (pbs_variant) {
    case PBS_VARIANT::DEFAULT: {
      uint64_t full_sm_step_one =
@@ -234,7 +235,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
      cuda_drop_with_size_tracking_async(global_accumulator, stream, gpu_index,
                                         gpu_memory_allocated);

-    if (uses_noise_reduction)
+    if (noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT)
      cuda_drop_with_size_tracking_async(temp_lwe_array_in, stream, gpu_index,
                                         gpu_memory_allocated);
  }
@@ -252,26 +253,30 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL> {
  uint64_t *trivial_indexes;

  PBS_VARIANT pbs_variant;
-  bool uses_noise_reduction;
+  PBS_MS_REDUCTION_T noise_reduction_type;
  bool gpu_memory_allocated;

  pbs_buffer_128(cudaStream_t stream, uint32_t gpu_index,
                 uint32_t lwe_dimension, uint32_t glwe_dimension,
                 uint32_t polynomial_size, uint32_t level_count,
                 uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
-                 bool allocate_gpu_memory, bool allocate_ms_array,
-                 uint64_t &size_tracker) {
+                 bool allocate_gpu_memory,
+                 PBS_MS_REDUCTION_T noise_reduction_type,
+                 uint64_t &size_tracker)
+      : noise_reduction_type(noise_reduction_type) {
    gpu_memory_allocated = allocate_gpu_memory;
    cuda_set_device(gpu_index);
    this->pbs_variant = pbs_variant;
-    this->uses_noise_reduction = allocate_ms_array;
-    if (allocate_ms_array) {
-      this->temp_lwe_array_in = (InputTorus *)cuda_malloc_async(
-          (lwe_dimension + 1) * input_lwe_ciphertext_count * sizeof(InputTorus),
-          stream, gpu_index);
+
+    if (noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT) {
+      this->temp_lwe_array_in =
+          (InputTorus *)cuda_malloc_with_size_tracking_async(
+              (lwe_dimension + 1) * input_lwe_ciphertext_count *
+                  sizeof(InputTorus),
+              stream, gpu_index, size_tracker, allocate_gpu_memory);
      this->trivial_indexes = (uint64_t *)cuda_malloc_with_size_tracking_async(
          input_lwe_ciphertext_count * sizeof(uint64_t), stream, gpu_index,
-          size_tracker, allocate_ms_array);
+          size_tracker, allocate_gpu_memory);
      uint64_t *h_trivial_indexes = new uint64_t[input_lwe_ciphertext_count];
      for (uint32_t i = 0; i < input_lwe_ciphertext_count; i++)
        h_trivial_indexes[i] = i;
@@ -420,7 +425,7 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL> {
      cuda_drop_with_size_tracking_async(global_accumulator, stream, gpu_index,
                                         gpu_memory_allocated);

-    if (uses_noise_reduction) {
+    if (noise_reduction_type == PBS_MS_REDUCTION_T::DRIFT) {
      cuda_drop_with_size_tracking_async(temp_lwe_array_in, stream, gpu_index,
                                         gpu_memory_allocated);
      cuda_drop_with_size_tracking_async(trivial_indexes, stream, gpu_index,
@@ -500,7 +505,7 @@ uint64_t scratch_cuda_programmable_bootstrap_tbc(
    void *stream, uint32_t gpu_index, pbs_buffer<Torus, CLASSICAL> **pbs_buffer,
    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
    uint32_t level_count, uint32_t input_lwe_ciphertext_count,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
 #endif

 template <typename Torus>
@@ -508,14 +513,14 @@ uint64_t scratch_cuda_programmable_bootstrap_cg(
    void *stream, uint32_t gpu_index, pbs_buffer<Torus, CLASSICAL> **pbs_buffer,
    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
    uint32_t level_count, uint32_t input_lwe_ciphertext_count,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 template <typename Torus>
 uint64_t scratch_cuda_programmable_bootstrap(
    void *stream, uint32_t gpu_index, pbs_buffer<Torus, CLASSICAL> **buffer,
    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
    uint32_t level_count, uint32_t input_lwe_ciphertext_count,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 template <typename Torus>
 bool has_support_to_cuda_programmable_bootstrap_tbc(uint32_t num_samples,
@@ -544,6 +549,11 @@ __device__ T *get_ith_mask_kth_block(T *ptr, int i, int k, int level,
                                     uint32_t polynomial_size,
                                     int glwe_dimension, uint32_t level_count);

+template <typename T, uint32_t polynomial_size, uint32_t glwe_dimension,
+          uint32_t level_count, uint32_t level_id>
+__device__ const T *get_ith_mask_kth_block_2_2_params(const T *ptr,
+                                                      int iteration, int k);
+
 template <typename T>
 __device__ T *get_ith_body_kth_block(T *ptr, int i, int k, int level,
                                     uint32_t polynomial_size,
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap.h
@@ -61,19 +61,19 @@ uint64_t scratch_cuda_programmable_bootstrap_32(
    void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t lwe_dimension,
    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
    uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
-    bool allocate_ms_array);
+    PBS_MS_REDUCTION_T noise_reduction_type);

 uint64_t scratch_cuda_programmable_bootstrap_64(
    void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t lwe_dimension,
    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
    uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
-    bool allocate_ms_array);
+    PBS_MS_REDUCTION_T noise_reduction_type);

 uint64_t scratch_cuda_programmable_bootstrap_128(
    void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t lwe_dimension,
    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
    uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
-    bool allocate_ms_array);
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap_multibit.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap_multibit.h
@@ -47,12 +47,11 @@ uint64_t scratch_cuda_multi_bit_programmable_bootstrap_128_vector_64(
 void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_128(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_output_indexes, void const *lut_vector,
-    void const *lut_vector_indexes, void const *lwe_array_in,
-    void const *lwe_input_indexes, void const *bootstrapping_key,
-    int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
-    uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
-    uint32_t lut_stride);
+    void const *lwe_array_in, void const *lwe_input_indexes,
+    void const *bootstrapping_key, int8_t *mem_ptr, uint32_t lwe_dimension,
+    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
+    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
+    uint32_t num_many_lut, uint32_t lut_stride);

 void cleanup_cuda_multi_bit_programmable_bootstrap_128(void *stream,
                                                       const uint32_t gpu_index,
--- a/backends/tfhe-cuda-backend/cuda/include/zk/zk.h
+++ b/backends/tfhe-cuda-backend/cuda/include/zk/zk.h
@@ -6,13 +6,6 @@
 #include <stdint.h>

 extern "C" {
-
-void cuda_lwe_expand_64(void *const stream, uint32_t gpu_index,
-                        void *lwe_array_out, const void *lwe_compact_array_in,
-                        uint32_t lwe_dimension, uint32_t num_lwe,
-                        const uint32_t *lwe_compact_input_indexes,
-                        const uint32_t *output_body_id_per_compact_list);
-
 uint64_t scratch_cuda_expand_without_verification_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
@@ -24,7 +17,7 @@ uint64_t scratch_cuda_expand_without_verification_64(
    const uint32_t *num_lwes_per_compact_list, const bool *is_boolean_array,
    uint32_t num_compact_lists, uint32_t message_modulus,
    uint32_t carry_modulus, PBS_TYPE pbs_type, KS_TYPE casting_key_type,
-    bool allocate_gpu_memory, bool allocate_ms_array);
+    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_expand_without_verification_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
--- a/backends/tfhe-cuda-backend/cuda/include/zk/zk_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/zk/zk_utilities.h
@@ -5,6 +5,96 @@
 #include "integer/integer.cuh"
 #include <cstdint>

+////////////////////////////////////
+// Helper structures used in expand
+template <typename Torus> struct lwe_mask {
+  Torus *mask;
+
+  lwe_mask(Torus *mask) : mask{mask} {}
+};
+
+template <typename Torus> struct compact_lwe_body {
+  Torus *body;
+  uint64_t monomial_degree;
+
+  /* Body id is the index of the body in the compact ciphertext list.
+   *  It's used to compute the rotation.
+   */
+  compact_lwe_body(Torus *body, const uint64_t body_id)
+      : body{body}, monomial_degree{body_id} {}
+};
+
+template <typename Torus> struct compact_lwe_list {
+  Torus *ptr;
+  uint32_t lwe_dimension;
+  uint32_t total_num_lwes;
+
+  compact_lwe_list(Torus *ptr, uint32_t lwe_dimension, uint32_t total_num_lwes)
+      : ptr{ptr}, lwe_dimension{lwe_dimension}, total_num_lwes{total_num_lwes} {
+  }
+
+  lwe_mask<Torus> get_mask() { return lwe_mask(ptr); }
+
+  // Returns the index-th body
+  compact_lwe_body<Torus> get_body(uint32_t index) {
+    if (index >= total_num_lwes) {
+      PANIC("index out of range in compact_lwe_list::get_body");
+    }
+
+    return compact_lwe_body(&ptr[lwe_dimension + index], uint64_t(index));
+  }
+};
+
+template <typename Torus> struct flattened_compact_lwe_lists {
+  Torus *d_ptr;
+  Torus **d_ptr_to_compact_list;
+  const uint32_t *h_num_lwes_per_compact_list;
+  uint32_t num_compact_lists;
+  uint32_t lwe_dimension;
+  uint32_t total_num_lwes;
+
+  flattened_compact_lwe_lists(Torus *d_ptr,
+                              const uint32_t *h_num_lwes_per_compact_list,
+                              uint32_t num_compact_lists,
+                              uint32_t lwe_dimension)
+      : d_ptr(d_ptr), h_num_lwes_per_compact_list(h_num_lwes_per_compact_list),
+        num_compact_lists(num_compact_lists), lwe_dimension(lwe_dimension) {
+    d_ptr_to_compact_list =
+        static_cast<Torus **>(malloc(num_compact_lists * sizeof(Torus **)));
+    total_num_lwes = 0;
+    auto curr_list = d_ptr;
+    for (auto i = 0; i < num_compact_lists; ++i) {
+      total_num_lwes += h_num_lwes_per_compact_list[i];
+      d_ptr_to_compact_list[i] = curr_list;
+      curr_list += lwe_dimension + h_num_lwes_per_compact_list[i];
+    }
+  }
+
+  compact_lwe_list<Torus> get_device_compact_list(uint32_t compact_list_index) {
+    if (compact_list_index >= num_compact_lists) {
+      PANIC("index out of range in flattened_compact_lwe_lists::get");
+    }
+
+    return compact_lwe_list(d_ptr_to_compact_list[compact_list_index],
+                            lwe_dimension,
+                            h_num_lwes_per_compact_list[compact_list_index]);
+  }
+};
+
+/*
+ * A expand_job tells the expand kernel exactly which input mask and body to use
+ * and what rotation to apply
+ */
+template <typename Torus> struct expand_job {
+  lwe_mask<Torus> mask_to_use;
+  compact_lwe_body<Torus> body_to_use;
+
+  expand_job(lwe_mask<Torus> mask_to_use, compact_lwe_body<Torus> body_to_use)
+      : mask_to_use{mask_to_use}, body_to_use{body_to_use} {}
+};
+
+////////////////////////////////////
+
 template <typename Torus> struct zk_expand_mem {
  int_radix_params computing_params;
  int_radix_params casting_params;
@@ -17,11 +107,12 @@ template <typename Torus> struct zk_expand_mem {
  Torus *tmp_expanded_lwes;
  Torus *tmp_ksed_small_to_big_expanded_lwes;

-  uint32_t *d_lwe_compact_input_indexes;
-
-  uint32_t *d_body_id_per_compact_list;
  bool gpu_memory_allocated;

+  uint32_t *num_lwes_per_compact_list;
+  expand_job<Torus> *d_expand_jobs;
+  expand_job<Torus> *h_expand_jobs;
+
  zk_expand_mem(cudaStream_t const *streams, uint32_t const *gpu_indexes,
                uint32_t gpu_count, int_radix_params computing_params,
                int_radix_params casting_params, KS_TYPE casting_key_type,
@@ -33,9 +124,17 @@ template <typename Torus> struct zk_expand_mem {
        casting_key_type(casting_key_type) {

    gpu_memory_allocated = allocate_gpu_memory;
+
+    // We copy num_lwes_per_compact_list so we get protection against
+    // num_lwes_per_compact_list being freed while this buffer is still in use
+    this->num_lwes_per_compact_list =
+        (uint32_t *)malloc(num_compact_lists * sizeof(uint32_t));
+    memcpy(this->num_lwes_per_compact_list, num_lwes_per_compact_list,
+           num_compact_lists * sizeof(uint32_t));
+
    num_lwes = 0;
    for (int i = 0; i < num_compact_lists; i++) {
-      num_lwes += num_lwes_per_compact_list[i];
+      num_lwes += this->num_lwes_per_compact_list[i];
    }

    if (computing_params.carry_modulus != computing_params.message_modulus) {
@@ -121,49 +220,14 @@ template <typename Torus> struct zk_expand_mem {
        malloc(num_packed_msgs * num_lwes * sizeof(Torus)));
    auto h_lut_indexes = static_cast<Torus *>(
        malloc(num_packed_msgs * num_lwes * sizeof(Torus)));
-    auto h_body_id_per_compact_list =
-        static_cast<uint32_t *>(malloc(num_lwes * sizeof(uint32_t)));
-    auto h_lwe_compact_input_indexes =
-        static_cast<uint32_t *>(malloc(num_lwes * sizeof(uint32_t)));

-    d_body_id_per_compact_list =
-        static_cast<uint32_t *>(cuda_malloc_with_size_tracking_async(
-            num_lwes * sizeof(uint32_t), streams[0], gpu_indexes[0],
-            size_tracker, allocate_gpu_memory));
-    d_lwe_compact_input_indexes =
-        static_cast<uint32_t *>(cuda_malloc_with_size_tracking_async(
-            num_lwes * sizeof(uint32_t), streams[0], gpu_indexes[0],
+    d_expand_jobs =
+        static_cast<expand_job<Torus> *>(cuda_malloc_with_size_tracking_async(
+            num_lwes * sizeof(expand_job<Torus>), streams[0], gpu_indexes[0],
            size_tracker, allocate_gpu_memory));

-    auto compact_list_id = 0;
-    auto idx = 0;
-    auto count = 0;
-    // During flatenning, all num_lwes LWEs from all compact lists are stored
-    // sequentially on a Torus array. h_lwe_compact_input_indexes stores the
-    // index of the first LWE related to the compact list that contains the i-th
-    // LWE
-    for (int i = 0; i < num_lwes; i++) {
-      h_lwe_compact_input_indexes[i] = idx;
-      count++;
-      if (count == num_lwes_per_compact_list[compact_list_id]) {
-        compact_list_id++;
-        idx += casting_params.big_lwe_dimension + count;
-        count = 0;
-      }
-    }
-
-    // Stores the index of the i-th LWE (within each compact list) related to
-    // the k-th compact list.
-    auto offset = 0;
-    for (int k = 0; k < num_compact_lists; k++) {
-      auto num_lwes_in_kth_compact_list = num_lwes_per_compact_list[k];
-      uint32_t body_count = 0;
-      for (int i = 0; i < num_lwes_in_kth_compact_list; i++) {
-        h_body_id_per_compact_list[i + offset] = body_count;
-        body_count++;
-      }
-      offset += num_lwes_in_kth_compact_list;
-    }
+    h_expand_jobs = static_cast<expand_job<Torus> *>(
+        malloc(num_lwes * sizeof(expand_job<Torus>)));

    /*
     * Each LWE contains encrypted data in both carry and message spaces
@@ -198,9 +262,9 @@ template <typename Torus> struct zk_expand_mem {
     * num_packed_msgs to use the sanitization LUT (which ensures output is
     * exactly 0 or 1).
     */
-    offset = 0;
+    auto offset = 0;
    for (int k = 0; k < num_compact_lists; k++) {
-      auto num_lwes_in_kth = num_lwes_per_compact_list[k];
+      auto num_lwes_in_kth = this->num_lwes_per_compact_list[k];
      for (int i = 0; i < num_packed_msgs * num_lwes_in_kth; i++) {
        auto lwe_index = i + num_packed_msgs * offset;
        auto lwe_index_in_list = i % num_lwes_in_kth;
@@ -220,20 +284,17 @@ template <typename Torus> struct zk_expand_mem {
        streams[0], gpu_indexes[0], h_indexes_in, h_indexes_out);
    auto lut_indexes = message_and_carry_extract_luts->get_lut_indexes(0, 0);

-    cuda_memcpy_with_size_tracking_async_to_gpu(
-        d_lwe_compact_input_indexes, h_lwe_compact_input_indexes,
-        num_lwes * sizeof(uint32_t), streams[0], gpu_indexes[0],
-        allocate_gpu_memory);
    cuda_memcpy_with_size_tracking_async_to_gpu(
        lut_indexes, h_lut_indexes, num_packed_msgs * num_lwes * sizeof(Torus),
        streams[0], gpu_indexes[0], allocate_gpu_memory);
-    cuda_memcpy_with_size_tracking_async_to_gpu(
-        d_body_id_per_compact_list, h_body_id_per_compact_list,
-        num_lwes * sizeof(uint32_t), streams[0], gpu_indexes[0],
+
+    auto active_gpu_count = get_active_gpu_count(2 * num_lwes, gpu_count);
+    message_and_carry_extract_luts->broadcast_lut(streams, gpu_indexes,
+                                                  active_gpu_count);
+
+    message_and_carry_extract_luts->allocate_lwe_vector_for_non_trivial_indexes(
+        streams, gpu_indexes, active_gpu_count, 2 * num_lwes, size_tracker,
        allocate_gpu_memory);
-
-    message_and_carry_extract_luts->broadcast_lut(streams, gpu_indexes, 0);
-
    // The expanded LWEs will always be on the casting key format
    tmp_expanded_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
        num_lwes * (casting_params.big_lwe_dimension + 1) * sizeof(Torus),
@@ -248,8 +309,6 @@ template <typename Torus> struct zk_expand_mem {
    free(h_indexes_in);
    free(h_indexes_out);
    free(h_lut_indexes);
-    free(h_body_id_per_compact_list);
-    free(h_lwe_compact_input_indexes);
  }

  void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
@@ -258,15 +317,16 @@ template <typename Torus> struct zk_expand_mem {
    message_and_carry_extract_luts->release(streams, gpu_indexes, gpu_count);
    delete message_and_carry_extract_luts;

-    cuda_drop_with_size_tracking_async(d_body_id_per_compact_list, streams[0],
-                                       gpu_indexes[0], gpu_memory_allocated);
-    cuda_drop_with_size_tracking_async(d_lwe_compact_input_indexes, streams[0],
-                                       gpu_indexes[0], gpu_memory_allocated);
    cuda_drop_with_size_tracking_async(tmp_expanded_lwes, streams[0],
                                       gpu_indexes[0], gpu_memory_allocated);
    cuda_drop_with_size_tracking_async(tmp_ksed_small_to_big_expanded_lwes,
                                       streams[0], gpu_indexes[0],
                                       gpu_memory_allocated);
+    cuda_drop_with_size_tracking_async(d_expand_jobs, streams[0],
+                                       gpu_indexes[0], gpu_memory_allocated);
+    cuda_synchronize_stream(streams[0], gpu_indexes[0]);
+    free(num_lwes_per_compact_list);
+    free(h_expand_jobs);
  }
 };

--- a/Show More
+++ b/Show More