feat(hpu,u55c) Update U55C bitstream

This bitstream add support for Multi-width IOp and Flush configuration
feat(hpu): Update register map
2026-01-11 15:48:20 -05:00 · 2025-04-01 10:19:19 +02:00 · 2025-04-01 10:17:23 +02:00 · 2025-04-01 10:17:23 +02:00 · 2025-04-01 10:17:23 +02:00 · 2025-03-31 21:40:56 +02:00
1155 changed files with 133056 additions and 20657 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -8,8 +8,14 @@ root = true
 end_of_line = lf
 insert_final_newline = true

-# 4 space indentation
-[*.rs]
+# 4 space indentation for rust and toml
+[*.{rs,toml}]
 charset = utf-8
 indent_style = space
 indent_size = 4
+
+# 2 for c and js
+[*.{js,json,c,h}]
+charset = utf-8
+indent_style = space
+indent_size = 2
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@@ -5,6 +5,8 @@ self-hosted-runner:
    - 4090-desktop
    - large_windows_16_latest
    - large_ubuntu_16
+    - large_ubuntu_16-22.04
+    - v80-desktop
 # Configuration variables in array of strings defined in your repository or
 # organization. `null` means disabling configuration variables check.
 # Empty array means no configuration variable is allowed.
--- a/.github/actions/gpu_setup/action.yml
+++ b/.github/actions/gpu_setup/action.yml
@@ -0,0 +1,63 @@
+name: Setup Cuda
+description: Setup Cuda on Hyperstack or GitHub instance
+
+inputs:
+  cuda-version:
+    description: Version of Cuda to use
+    required: true
+  gcc-version:
+    description: Version of GCC to use
+    required: true
+  cmake-version:
+    description: Version of cmake to use
+    default: 3.29.6
+  github-instance:
+    description: Instance is hosted on GitHub
+    default: 'false'
+
+runs:
+  using: "composite"
+  steps:
+    # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+    - name: Install dependencies
+      shell: bash
+      run: |
+        sudo apt update
+        curl -fsSL https://apt.kitware.com/keys/kitware-archive-latest.asc | sudo gpg --dearmour -o /etc/apt/trusted.gpg.d/kitware.gpg
+        sudo chmod 644 /etc/apt/trusted.gpg.d/kitware.gpg
+        echo 'deb [signed-by=/etc/apt/trusted.gpg.d/kitware.gpg] https://apt.kitware.com/ubuntu/ jammy main' | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null
+        sudo apt update
+        sudo apt install -y cmake cmake-format libclang-dev
+
+    - name: Install CUDA
+      if: inputs.github-instance == 'true'
+      shell: bash
+      run: |
+        TOOLKIT_VERSION="$(echo ${{ inputs.cuda-version }} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
+        wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+        sudo dpkg -i cuda-keyring_1.1-1_all.deb
+        sudo apt update
+        sudo apt -y install cuda-toolkit-${TOOLKIT_VERSION}
+
+    - name: Export CUDA variables
+      shell: bash
+      run: |
+        CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
+        echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+        echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+        echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+        echo "CUDA_MODULE_LOADER=EAGER" >> "${GITHUB_ENV}"
+
+    # Specify the correct host compilers
+    - name: Export gcc and g++ variables
+      shell: bash
+      run: |
+        {
+          echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
+          echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
+        } >> "${GITHUB_ENV}"
+
+    - name: Check device is detected
+      shell: bash
+      run: nvidia-smi
--- a/.github/workflows/aws_tfhe_backward_compat_tests.yml
+++ b/.github/workflows/aws_tfhe_backward_compat_tests.yml
@@ -11,6 +11,10 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -22,11 +26,12 @@ jobs:
    name: Setup instance (backward-compat-tests)
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -35,11 +40,18 @@ jobs:
          backend: aws
          profile: cpu-small

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  backward-compat-tests:
    name: Backward compatibility tests
    needs: [ setup-instance ]
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -47,10 +59,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -76,7 +88,7 @@ jobs:
        with:
          persist-credentials: 'false'
          repository: zama-ai/tfhe-backward-compat-data
-          path: tfhe/tfhe-backward-compat-data
+          path: tests/tfhe-backward-compat-data
          lfs: 'true'
          ref: ${{ steps.backward_compat_branch.outputs.branch }}

@@ -90,17 +102,18 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (backward-compat-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, backward-compat-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -114,4 +127,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -12,6 +12,10 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -22,7 +26,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
      zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
@@ -54,13 +58,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            dependencies:
              - tfhe/Cargo.toml
@@ -103,7 +107,7 @@ jobs:
            user_docs:
              - tfhe/src/**
              - '!tfhe/src/c_api/**'
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - README.md

      - name: Aggregate file changes
@@ -124,16 +128,17 @@ jobs:

  setup-instance:
    name: Setup instance (fast-tests)
-    if: github.event_name != 'pull_request' ||
-      needs.should-run.outputs.any_file_changed == 'true'
+    if: github.event_name == 'workflow_dispatch' ||
+      (github.event_name != 'workflow_dispatch' && needs.should-run.outputs.any_file_changed == 'true')
    needs: should-run
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -142,13 +147,18 @@ jobs:
          backend: aws
          profile: cpu-big

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  fast-tests:
    name: Fast CPU tests
-    if: github.event_name != 'pull_request' ||
-      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    needs: [ should-run, setup-instance ]
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -156,10 +166,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -255,22 +265,23 @@ jobs:
          make test_zk

      - name: Slack Notification
-        if: ${{ failure() }}
+        if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
        continue-on-error: true
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (fast-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, fast-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -284,4 +295,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -14,12 +14,16 @@ env:
  # nextest
  TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
  NO_BIG_PARAMS: FALSE
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [labeled]
+    types: [ labeled ]
  push:
    branches:
      - main
@@ -28,12 +32,11 @@ jobs:
  should-run:
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
-      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
      (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      integer_test: ${{ github.event_name == 'workflow_dispatch' ||
        steps.changed-files.outputs.integer_any_changed }}
@@ -42,14 +45,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-          persist-credentials: "false"
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            integer:
              - tfhe/Cargo.toml
@@ -67,15 +69,16 @@ jobs:
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
-      (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
+      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -84,11 +87,18 @@ jobs:
          backend: aws
          profile: cpu-big

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  unsigned-integer-tests:
    name: Unsigned integer tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -96,10 +106,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: "false"
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -130,17 +140,18 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (unsigned-integer-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [setup-instance, unsigned-integer-tests]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -154,4 +165,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -14,12 +14,16 @@ env:
  # nextest
  TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
  NO_BIG_PARAMS: FALSE
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [labeled]
+    types: [ labeled ]
  push:
    branches:
      - main
@@ -33,7 +37,7 @@ jobs:
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      integer_test: ${{ github.event_name == 'workflow_dispatch' ||
        steps.changed-files.outputs.integer_any_changed }}
@@ -42,14 +46,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-          persist-credentials: "false"
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            integer:
              - tfhe/Cargo.toml
@@ -67,15 +70,16 @@ jobs:
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
-      (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
+      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -84,11 +88,18 @@ jobs:
          backend: aws
          profile: cpu-big

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  signed-integer-tests:
    name: Signed integer tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -96,10 +107,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: "false"
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -134,17 +145,18 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (signed-integer-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [setup-instance, signed-integer-tests]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -158,4 +170,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -11,6 +11,10 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -27,7 +31,7 @@ jobs:
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
      zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
@@ -63,13 +67,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            dependencies:
              - tfhe/Cargo.toml
@@ -111,7 +115,7 @@ jobs:
            user_docs:
              - tfhe/src/**
              - '!tfhe/src/c_api/**'
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - README.md

      - name: Aggregate file changes
@@ -138,11 +142,12 @@ jobs:
    needs: should-run
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -151,13 +156,20 @@ jobs:
          backend: aws
          profile: cpu-big

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  cpu-tests:
    name: CPU tests
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    needs: [ should-run, setup-instance ]
    concurrency:
-      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.head_ref || github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -165,10 +177,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -240,17 +252,18 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cpu-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cpu-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -264,4 +277,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -10,6 +10,10 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -23,11 +27,12 @@ jobs:
    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -36,11 +41,18 @@ jobs:
          backend: aws
          profile: cpu-small

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  wasm-tests:
    name: WASM tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -48,10 +60,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -109,17 +121,18 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (wasm-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, wasm-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -133,4 +146,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_boolean.yml
+++ b/.github/workflows/benchmark_boolean.yml
@@ -29,7 +29,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -51,7 +51,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -62,7 +63,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

@@ -93,7 +94,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_boolean
          path: ${{ env.RESULTS_FILENAME }}
@@ -103,7 +104,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -121,13 +123,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (boolean-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, boolean-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_core_crypto.yml
+++ b/.github/workflows/benchmark_core_crypto.yml
@@ -3,6 +3,9 @@ name: Core crypto benchmarks

 on:
  workflow_dispatch:
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 5a.m.
+    - cron: '0 5 * * 6'

 env:
  CARGO_TERM_COLOR: always
@@ -26,7 +29,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -47,7 +50,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -58,7 +62,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

@@ -81,7 +85,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
@@ -91,7 +95,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -109,13 +114,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (core-crypto-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, core-crypto-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_erc20.yml
+++ b/.github/workflows/benchmark_erc20.yml
@@ -29,7 +29,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -52,7 +52,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -63,7 +64,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

@@ -72,7 +73,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Run benchmarks
        run: |
@@ -97,7 +99,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_erc20
          path: ${{ env.RESULTS_FILENAME }}
@@ -118,13 +120,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (erc20-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, erc20-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_gpu_4090.yml
+++ b/.github/workflows/benchmark_gpu_4090.yml
@@ -17,7 +17,7 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [labeled]
+    types: [ labeled ]
  schedule:
    # Weekly benchmarks will be triggered each Friday at 9p.m.
    - cron: "0 21 * * 5"
@@ -33,16 +33,13 @@ jobs:
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ["self-hosted", "4090-desktop"]
    timeout-minutes: 1440 # 24 hours
-    strategy:
-      fail-fast: false
-      max-parallel: 1
-
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -54,7 +51,7 @@ jobs:
          echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

@@ -63,7 +60,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Run integer benchmarks
        run: |
@@ -82,7 +80,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_integer_multi_bit_gpu_default
          path: ${{ env.RESULTS_FILENAME }}
@@ -99,7 +97,7 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  cuda-core-crypto-benchmarks:
    name: Cuda core crypto benchmarks  (RTX 4090)
@@ -116,7 +114,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -127,7 +126,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

@@ -136,7 +135,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Run core crypto benchmarks
        run: |
@@ -157,7 +157,7 @@ jobs:
      

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
@@ -182,7 +182,7 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  remove_github_label:
    name: Remove 4090 bench label
--- a/.github/workflows/benchmark_gpu_core_crypto.yml
+++ b/.github/workflows/benchmark_gpu_core_crypto.yml
@@ -27,7 +27,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -48,27 +48,19 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}

      - name: Get benchmark details
        run: |
@@ -78,37 +70,11 @@ jobs:
            echo "COMMIT_HASH=$(git describe --tags --dirty)";
          } >> "${GITHUB_ENV}"

-      - name: Set up home
-        # "Install rust" step require root user to have a HOME directory which is not set.
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
-
      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
      - name: Run benchmarks with AVX512
        run: |
          make bench_pbs_gpu
@@ -128,7 +94,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
@@ -138,7 +104,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -161,13 +128,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-integer-full-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-core-crypto-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_gpu_erc20.yml
+++ b/.github/workflows/benchmark_gpu_erc20.yml
@@ -12,7 +12,10 @@ on:
          - "l40 (n3-L40x1)"
          - "single-h100 (n3-H100x1)"
          - "2-h100 (n3-H100x2)"
+          - "4-h100 (n3-H100x4)"
          - "multi-h100 (n3-H100x8)"
+          - "multi-h100-nvlink (n3-H100x8-NVLink)"
+          - "multi-h100-sxm5 (n3-H100x8-SXM5)"

 jobs:
  parse-inputs:
--- a/.github/workflows/benchmark_gpu_erc20_common.yml
+++ b/.github/workflows/benchmark_gpu_erc20_common.yml
@@ -14,7 +14,7 @@ on:
        type: string
        required: true
    secrets:
-      FHE_ACTIONS_TOKEN:
+      REPO_CHECKOUT_TOKEN:
        required: true
      SLAB_ACTION_TOKEN:
        required: true
@@ -54,7 +54,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -75,27 +75,19 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}

      - name: Get benchmark details
        run: |
@@ -105,40 +97,11 @@ jobs:
            echo "COMMIT_HASH=$(git describe --tags --dirty)";
          } >> "${GITHUB_ENV}"

-      - name: Set up home
-        # "Install rust" step require root user to have a HOME directory which is not set.
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
-
      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run benchmarks
        run: |
          make bench_hlapi_erc20_gpu
@@ -157,9 +120,9 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
-          name: ${{ github.sha }}_erc20
+          name: ${{ github.sha }}_erc20_${{ inputs.profile }}
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
@@ -167,7 +130,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -190,13 +154,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-erc20-${{ inputs.profile }}-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_gpu_integer.yml
+++ b/.github/workflows/benchmark_gpu_integer.yml
@@ -15,6 +15,7 @@ on:
          - "4-h100 (n3-H100x4)"
          - "multi-h100 (n3-H100x8)"
          - "multi-h100-nvlink (n3-H100x8-NVLink)"
+          - "multi-h100-sxm5 (n3-H100x8-SXM5)"
          - "multi-a100-nvlink (n3-A100x8-NVLink)"
      command:
        description: "Benchmark command to run"
--- a/.github/workflows/benchmark_gpu_integer_common.yml
+++ b/.github/workflows/benchmark_gpu_integer_common.yml
@@ -26,7 +26,7 @@ on:
        type: boolean
        default: false
    secrets:
-      FHE_ACTIONS_TOKEN:
+      REPO_CHECKOUT_TOKEN:
        required: true
      SLAB_ACTION_TOKEN:
        required: true
@@ -118,7 +118,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -145,27 +145,19 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}

      - name: Get benchmark details
        run: |
@@ -175,47 +167,11 @@ jobs:
            echo "COMMIT_HASH=$(git describe --tags --dirty)";
          } >> "${GITHUB_ENV}"

-      - name: Set up home
-        # "Install rust" step require root user to have a HOME directory which is not set.
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
-
      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-          } >> "${GITHUB_ENV}"
-
-      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          repository: zama-ai/slab
-          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Should run benchmarks with all precisions
        if: inputs.all_precisions
        run: |
@@ -240,11 +196,19 @@ jobs:
          --bench-type ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
-          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
+          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}
          path: ${{ env.RESULTS_FILENAME }}

+      - name: Checkout Slab repo
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
      - name: Send data to Slab
        shell: bash
        run: |
@@ -266,13 +230,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-${{ inputs.profile }}-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_integer.yml
+++ b/.github/workflows/benchmark_integer.yml
@@ -90,7 +90,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -119,7 +119,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -130,7 +131,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

@@ -139,7 +140,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Should run benchmarks with all precisions
        if: inputs.all_precisions
@@ -170,7 +172,7 @@ jobs:
          --bench-type ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -191,13 +193,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (integer-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, integer-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_shortint.yml
+++ b/.github/workflows/benchmark_shortint.yml
@@ -56,7 +56,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -82,7 +82,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -93,7 +94,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

@@ -102,7 +103,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Run benchmarks with AVX512
        run: |
@@ -136,7 +138,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -157,13 +159,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (shortint-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, shortint-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_signed_integer.yml
+++ b/.github/workflows/benchmark_signed_integer.yml
@@ -90,7 +90,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -119,7 +119,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -130,7 +131,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

@@ -139,7 +140,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Should run benchmarks with all precisions
        if: inputs.all_precisions
@@ -164,7 +166,7 @@ jobs:
          --bench-type ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -185,13 +187,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (integer-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, signed-integer-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_tfhe_fft.yml
+++ b/.github/workflows/benchmark_tfhe_fft.yml
@@ -32,7 +32,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -84,7 +84,7 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_fft
          path: ${{ env.RESULTS_FILENAME }}
@@ -94,7 +94,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -126,7 +127,7 @@ jobs:
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_tfhe_ntt.yml
+++ b/.github/workflows/benchmark_tfhe_ntt.yml
@@ -32,7 +32,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -84,7 +84,7 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_ntt
          path: ${{ env.RESULTS_FILENAME }}
@@ -94,7 +94,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -126,7 +127,7 @@ jobs:
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_tfhe_zk_pok.yml
+++ b/.github/workflows/benchmark_tfhe_zk_pok.yml
@@ -3,6 +3,14 @@ name: tfhe-zk-pok benchmarks

 on:
  workflow_dispatch:
+    inputs:
+      bench_type:
+        description: "Benchmarks type"
+        type: choice
+        default: latency
+        options:
+          - latency
+          - throughput
  push:
    branches:
      - main
@@ -20,6 +28,7 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  BENCH_TYPE: ${{ inputs.bench_type || 'latency' }}

 jobs:
  should-run:
@@ -36,9 +45,8 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            zk_pok:
              - tfhe-zk-pok/**
@@ -58,7 +66,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -80,7 +88,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -91,7 +100,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

@@ -100,11 +109,12 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Run benchmarks
        run: |
-          make bench_tfhe_zk_pok
+          make BENCH_TYPE=${{ env.BENCH_TYPE }} bench_tfhe_zk_pok

      - name: Parse results
        run: |
@@ -118,10 +128,11 @@ jobs:
          --commit-date "${{ env.COMMIT_DATE }}" \
          --bench-date "${{ env.BENCH_DATE }}" \
          --walk-subdirs \
-          --name-suffix avx512
+          --name-suffix avx512 \
+          --bench-type ${{ env.BENCH_TYPE }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_tfhe_zk_pok
          path: ${{ env.RESULTS_FILENAME }}
@@ -131,7 +142,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -149,13 +161,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (tfhe-zk-pok-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, tfhe-zk-pok-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_wasm_client.yml
+++ b/.github/workflows/benchmark_wasm_client.yml
@@ -28,7 +28,7 @@ jobs:
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
    steps:
@@ -36,13 +36,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            wasm_bench:
              - tfhe/Cargo.toml
@@ -65,7 +65,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -88,7 +88,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -99,7 +100,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

@@ -166,7 +167,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_wasm_${{ matrix.browser }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -176,7 +177,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -194,13 +196,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (wasm-client-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, wasm-client-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_zk_pke.yml
+++ b/.github/workflows/benchmark_zk_pke.yml
@@ -43,13 +43,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            zk_pok:
              - tfhe/Cargo.toml
@@ -104,7 +104,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -130,7 +130,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -141,7 +142,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: nightly

@@ -150,7 +151,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Run benchmarks with AVX512
        run: |
@@ -177,7 +179,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
        with:
          name: ${{ github.sha }}_integer_zk
          path: ${{ env.RESULTS_FILENAME }}
@@ -187,7 +189,8 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -205,13 +208,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (pke-zk-benchmarks)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, pke-zk-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/cargo_build.yml
+++ b/.github/workflows/cargo_build.yml
@@ -28,7 +28,7 @@ jobs:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

--- a/.github/workflows/cargo_build_tfhe_ntt.yml
+++ b/.github/workflows/cargo_build_tfhe_ntt.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  cargo-builds:
+  cargo-builds-ntt:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
--- a/.github/workflows/cargo_test_fft.yml
+++ b/.github/workflows/cargo_test_fft.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  cargo-tests:
+  cargo-tests-fft:
    runs-on: ${{ matrix.runner_type }}
    strategy:
      matrix:
@@ -38,7 +38,7 @@ jobs:
        run: |
          make test_fft_no_std

-  cargo-tests-nightly:
+  cargo-tests-fft-nightly:
    runs-on: ${{ matrix.runner_type }}
    strategy:
      matrix:
@@ -60,7 +60,7 @@ jobs:
        run: |
          make test_fft_no_std_nightly

-  cargo-tests-node-js:
+  cargo-tests-fft-node-js:
    runs-on: "ubuntu-latest"
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
--- a/.github/workflows/cargo_test_ntt.yml
+++ b/.github/workflows/cargo_test_ntt.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  cargo-tests:
+  cargo-tests-ntt:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@@ -33,7 +33,7 @@ jobs:
      - name: Test no-std
        run: make test_ntt_no_std

-  cargo-tests-nightly:
+  cargo-tests-ntt-nightly:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
--- a/.github/workflows/check_commit.yml
+++ b/.github/workflows/check_commit.yml
@@ -2,6 +2,7 @@
 name: Check commit and PR compliance
 on:
  pull_request:
+
 jobs:
  check-commit-pr:
    name: Check commit and PR
--- a/.github/workflows/ci_lint.yml
+++ b/.github/workflows/ci_lint.yml
@@ -6,6 +6,7 @@ on:

 env:
  ACTIONLINT_VERSION: 1.6.27
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}

 jobs:
  lint-check:
@@ -14,6 +15,9 @@ jobs:
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Get actionlint
        run: |
@@ -27,7 +31,8 @@ jobs:
          make lint_workflow

      - name: Ensure SHA pinned actions
-        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@5d6ac37a4cef8b8df67f482a8e384987766f0213 # v3.0.17
+        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@25ed13d0628a1601b4b44048e63cc4328ed03633 # v3.0.22
        with:
          allowlist: |
            slsa-framework/slsa-github-generator
+            ./
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -25,7 +25,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -47,13 +47,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
          files_yaml: |
            tfhe:
@@ -83,7 +83,7 @@ jobs:
          make test_shortint_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e
+        uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -97,7 +97,7 @@ jobs:
          make test_integer_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e
+        uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -115,13 +115,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (code-coverage)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, code-coverage ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/csprng_randomness_tests.yml
+++ b/.github/workflows/csprng_randomness_tests.yml
@@ -10,6 +10,10 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -23,11 +27,12 @@ jobs:
    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -36,11 +41,18 @@ jobs:
          backend: aws
          profile: cpu-small

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  csprng-randomness-tests:
    name: CSPRNG randomness tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -48,10 +60,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -65,17 +77,18 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (csprng-randomness-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, csprng-randomness-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -89,4 +102,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_4090_tests.yml
+++ b/.github/workflows/gpu_4090_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an RTX 4090 machine
-name: TFHE Cuda Backend - 4090 full tests
+name: Cuda - 4090 full tests

 env:
  CARGO_TERM_COLOR: always
@@ -11,6 +11,7 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -28,7 +29,7 @@ jobs:
      contains(github.event.label.name, '4090_test') ||
      (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: true
    runs-on: ["self-hosted", "4090-desktop"]

@@ -37,10 +38,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -80,4 +81,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_fast_h100_tests.yml
+++ b/.github/workflows/gpu_fast_h100_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
-name: TFHE Cuda Backend - Fast tests on H100
+name: Cuda - Fast tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -12,18 +12,22 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-      types: [ labeled ]
+    types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -31,13 +35,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -51,7 +55,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_fast_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -64,11 +68,12 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -77,13 +82,20 @@ jobs:
          backend: hyperstack
          profile: single-h100

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  cuda-tests-linux:
    name: CUDA H100 tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -94,60 +106,25 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run core crypto and internal CUDA backend tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
@@ -174,20 +151,22 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
+        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -201,4 +180,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_fast_tests.yml
+++ b/.github/workflows/gpu_fast_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an AWS instance
-name: TFHE Cuda Backend - Fast tests
+name: Cuda - Fast tests

 env:
  CARGO_TERM_COLOR: always
@@ -12,6 +12,10 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -22,7 +26,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -30,13 +34,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -50,7 +54,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_fast_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -58,15 +62,16 @@ jobs:
  setup-instance:
    name: Setup instance (cuda-tests)
    needs: should-run
-    if: github.event_name != 'pull_request' ||
+    if: github.event_name == 'workflow_dispatch' ||
      needs.should-run.outputs.gpu_test == 'true'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -75,13 +80,20 @@ jobs:
          backend: hyperstack
          profile: gpu-test

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  cuda-tests-linux:
    name: CUDA tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -92,60 +104,25 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run core crypto and internal CUDA backend tests
        run: |
          make test_core_crypto_gpu
@@ -172,20 +149,22 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
+        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -199,4 +178,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_full_h100_tests.yml
+++ b/.github/workflows/gpu_full_h100_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
-name: TFHE Cuda Backend - Full tests on H100
+name: Cuda - Full tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -11,7 +11,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}

 on:
  workflow_dispatch:
@@ -25,7 +24,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -49,9 +48,6 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
      - name: Install dependencies
@@ -69,40 +65,19 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run core crypto, integer and internal CUDA backend tests
        run: |
          make test_gpu
@@ -130,7 +105,7 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
@@ -139,7 +114,7 @@ jobs:
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -153,4 +128,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_full_multi_gpu_tests.yml
+++ b/.github/workflows/gpu_full_multi_gpu_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an AWS instance
-name: TFHE Cuda Backend - Full tests multi-GPU
+name: Cuda - Full tests multi-GPU

 env:
  CARGO_TERM_COLOR: always
@@ -12,6 +12,10 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -23,7 +27,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -31,13 +35,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -51,7 +55,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/**_multi_gpu_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -64,11 +68,12 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -77,13 +82,20 @@ jobs:
          backend: hyperstack
          profile: multi-gpu-test

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  cuda-tests-linux:
    name: CUDA multi-GPU tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -94,60 +106,25 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run multi-bit CUDA integer compression tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_integer_compression_gpu
@@ -177,20 +154,22 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
+        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests-multi-gpu)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -204,4 +183,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_integer_long_run_tests.yml
+++ b/.github/workflows/gpu_integer_long_run_tests.yml
@@ -1,4 +1,4 @@
-name: AWS Long Run Tests on GPU
+name: Long Run Tests on GPU

 env:
  CARGO_TERM_COLOR: always
@@ -15,8 +15,8 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  schedule:
-    # Weekly tests will be triggered each Friday at 1a.m.
-    - cron: '0 1 * * FRI'
+    # Weekly tests will be triggered each Friday at 9p.m.
+    - cron: "0 21 * * 5"

 jobs:
  setup-instance:
@@ -29,17 +29,17 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
          backend: hyperstack
-          profile: 2-h100
+          profile: multi-gpu-test

  cuda-tests:
-    name: Long run GPU H100 tests
+    name: Long run GPU tests
    needs: [ setup-instance ]
    concurrency:
      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
@@ -53,57 +53,22 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
+    timeout-minutes: 4320 # 72 hours
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run tests
        run: |
          make test_integer_long_run_gpu
@@ -119,17 +84,17 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests.result }}
-          SLACK_MESSAGE: "Integer GPU H100 long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (gpu-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/gpu_pcc.yml
+++ b/.github/workflows/gpu_pcc.yml
@@ -1,5 +1,5 @@
 # Perfom tfhe-cuda-backend post-commit checks on an AWS instance
-name: TFHE Cuda Backend - Post-commit Checks
+name: Cuda - Post-commit Checks

 env:
  CARGO_TERM_COLOR: always
@@ -11,6 +11,10 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16-22.04"

 on:
  pull_request:
@@ -20,11 +24,12 @@ jobs:
    name: Setup instance (cuda-pcc)
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -33,11 +38,18 @@ jobs:
          backend: aws
          profile: gpu-build

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  cuda-pcc:
    name: CUDA post-commit checks
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -56,14 +68,20 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Set up home
+      - name: Install CUDA
+        if: env.SECRETS_AVAILABLE == 'false'
+        shell: bash
        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+          TOOLKIT_VERSION="$(echo ${{ matrix.cuda }} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
+          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+          sudo dpkg -i cuda-keyring_1.1-1_all.deb
+          sudo apt update
+          sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -83,7 +101,6 @@ jobs:
            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
          } >> "${GITHUB_ENV}"

      - name: Run fmt checks
@@ -95,22 +112,23 @@ jobs:
          make pcc_gpu

      - name: Slack Notification
-        if: ${{ failure() }}
+        if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
        continue-on-error: true
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-pcc)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-pcc ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -124,4 +142,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_signed_integer_classic_tests.yml
+++ b/.github/workflows/gpu_signed_integer_classic_tests.yml
@@ -1,5 +1,5 @@
 # Signed integer GPU tests on an RTXA6000 VM on hyperstack with classical PBS
-name: TFHE Cuda Backend - Signed integer tests with classical PBS
+name: Cuda - Signed integer tests with classical PBS

 env:
  CARGO_TERM_COLOR: always
@@ -12,18 +12,22 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-      types: [ labeled ]
+    types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -31,13 +35,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -51,7 +55,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_signed_integer_classic_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -64,11 +68,12 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -77,13 +82,20 @@ jobs:
          backend: hyperstack
          profile: gpu-test

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  cuda-tests-linux:
    name: CUDA signed integer tests with classical PBS
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -94,58 +106,25 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run signed integer tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
@@ -158,20 +137,22 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
+        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-signed-classic-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -185,4 +166,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_signed_integer_h100_tests.yml
+++ b/.github/workflows/gpu_signed_integer_h100_tests.yml
@@ -1,5 +1,5 @@
 # Signed integer GPU tests on an H100 VM on hyperstack
-name: TFHE Cuda Backend - Signed integer tests on H100
+name: Cuda - Signed integer tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -12,18 +12,23 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-      types: [ labeled ]
+    types: [ labeled ]
+

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -31,13 +36,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -51,7 +56,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_signed_integer_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -64,11 +69,12 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -77,13 +83,20 @@ jobs:
          backend: hyperstack
          profile: single-h100

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  cuda-tests-linux:
    name: CUDA H100 signed integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -94,58 +107,25 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run signed integer multi-bit tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
@@ -158,20 +138,22 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
+        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -185,4 +167,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_signed_integer_tests.yml
+++ b/.github/workflows/gpu_signed_integer_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend signed integer on an AWS instance
-name: TFHE Cuda Backend - Signed integer tests
+name: Cuda - Signed integer tests

 env:
  CARGO_TERM_COLOR: always
@@ -14,14 +14,15 @@ env:
  FAST_TESTS: TRUE
  NIGHTLY_TESTS: FALSE
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types:
-      - opened
-      - synchronize
  schedule:
    # Nightly tests @ 1AM after each work day
    - cron: "0 1 * * MON-FRI"
@@ -30,7 +31,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -38,13 +39,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -58,11 +59,10 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_signed_integer_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
-
  setup-instance:
    name: Setup instance (cuda-signed-integer-tests)
    runs-on: ubuntu-latest
@@ -71,11 +71,12 @@ jobs:
      github.event_name == 'workflow_dispatch' ||
      needs.should-run.outputs.gpu_test == 'true'
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -84,13 +85,20 @@ jobs:
          backend: hyperstack
          profile: gpu-test

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  cuda-signed-integer-tests:
    name: CUDA signed integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -101,57 +109,25 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
      - name: Should run nightly tests
        if: github.event_name == 'schedule'
        run: |
@@ -160,10 +136,6 @@ jobs:
            echo "NIGHTLY_TESTS=TRUE";
          } >> "${GITHUB_ENV}"

-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run signed integer multi-bit tests
        run: |
          make test_signed_integer_multi_bit_gpu_ci
@@ -179,17 +151,18 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-signed-integer-tests.result }}
-          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-signed-integer-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -203,4 +176,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_unsigned_integer_classic_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_classic_tests.yml
@@ -1,5 +1,5 @@
 # Test unsigned integers on an RTXA6000 VM on hyperstack with the classical PBS
-name: TFHE Cuda Backend - Unsigned integer tests with classical PBS
+name: Cuda - Unsigned integer tests with classical PBS

 env:
  CARGO_TERM_COLOR: always
@@ -12,18 +12,23 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-      types: [ labeled ]
+    types: [ labeled ]
+

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -31,13 +36,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -51,7 +56,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_unsigned_integer_classic_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -59,16 +64,17 @@ jobs:
  setup-instance:
    name: Setup instance (cuda-unsigned-classic-tests)
    needs: should-run
-    if: github.event_name != 'pull_request' ||
+    if: github.event_name == 'workflow_dispatch' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -77,13 +83,20 @@ jobs:
          backend: hyperstack
          profile: gpu-test

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  cuda-tests-linux:
    name: CUDA unsigned integer tests with classical PBS
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -94,58 +107,25 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run unsigned integer tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
@@ -158,20 +138,22 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
+        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-unsigned-classic-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -185,4 +167,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_unsigned_integer_h100_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_h100_tests.yml
@@ -1,5 +1,5 @@
 # Test unsigned integers on an H100 VM on hyperstack
-name: TFHE Cuda Backend - Unsigned integer tests on H100
+name: Cuda - Unsigned integer tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -12,18 +12,22 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-      types: [ labeled ]
+    types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -31,13 +35,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -51,7 +55,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_unsigned_integer_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -59,16 +63,17 @@ jobs:
  setup-instance:
    name: Setup instance (cuda-h100-tests)
    needs: should-run
-    if: github.event_name != 'pull_request' ||
+    if: github.event_name == 'workflow_dispatch' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -77,13 +82,20 @@ jobs:
          backend: hyperstack
          profile: single-h100

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  cuda-tests-linux:
    name: CUDA H100 unsigned integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -94,58 +106,25 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run unsigned integer multi-bit tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
@@ -158,20 +137,22 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
+        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop instance
+      - name: Stop remote instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -185,4 +166,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_unsigned_integer_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend unsigned integer on an AWS instance
-name: TFHE Cuda Backend - Unsigned integer tests
+name: Cuda - Unsigned integer tests

 env:
  CARGO_TERM_COLOR: always
@@ -13,14 +13,16 @@ env:
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  FAST_TESTS: TRUE
  NIGHTLY_TESTS: FALSE
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types:
-      - opened
-      - synchronize
+    types: [ labeled ]
  schedule:
    # Nightly tests @ 1AM after each work day
    - cron: "0 1 * * MON-FRI"
@@ -29,7 +31,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -37,13 +39,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
+        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
        with:
-          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -57,7 +59,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**.md'
+              - 'tfhe/docs/**/**.md'
              - '.github/workflows/gpu_unsigned_integer_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -70,11 +72,12 @@ jobs:
      needs.should-run.outputs.gpu_test == 'true'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -83,13 +86,20 @@ jobs:
          backend: hyperstack
          profile: gpu-test

+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
+
  cuda-unsigned-integer-tests:
    name: CUDA unsigned integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -100,54 +110,25 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
      - name: Should run nightly tests
        if: github.event_name == 'schedule'
        run: |
@@ -156,10 +137,6 @@ jobs:
            echo "NIGHTLY_TESTS=TRUE";
          } >> "${GITHUB_ENV}"

-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run unsigned integer multi-bit tests
        run: |
          make test_unsigned_integer_multi_bit_gpu_ci
@@ -172,20 +149,22 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
+        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-unsigned-integer-tests.result }}
-          SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-unsigned-integer-tests ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -199,4 +178,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/integer_long_run_tests.yml
+++ b/.github/workflows/integer_long_run_tests.yml
@@ -15,8 +15,8 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  schedule:
-    # Weekly tests will be triggered each Friday at 1a.m.
-    - cron: '0 1 * * FRI'
+    # Weekly tests will be triggered each Friday at 9p.m.
+    - cron: "0 21 * * 5"

 jobs:
  setup-instance:
@@ -29,7 +29,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -45,15 +45,16 @@ jobs:
      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    timeout-minutes: 4320 # 72 hours
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -71,13 +72,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (cpu-tests)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cpu-tests ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/m1_tests.yml
+++ b/.github/workflows/m1_tests.yml
@@ -3,7 +3,7 @@ name: Tests on M1 CPU
 on:
  workflow_dispatch:
  pull_request:
-    types: [labeled]
+    types: [ labeled ]
  # Have a nightly build for M1 tests
  schedule:
    # * is a special character in YAML so you have to quote this string
@@ -21,14 +21,17 @@ env:
  # We clear the cache to reduce memory pressure because of the numerous processes of cargo
  # nextest
  TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}

 concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref }}
+  group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
  cancel-in-progress: true

 jobs:
  cargo-builds-m1:
-    if: ${{ (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'm1_test') }}
+    if: ${{ (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs') ||
+      github.event_name == 'workflow_dispatch' ||
+      contains(github.event.label.name, 'm1_test') }}
    runs-on: ["self-hosted", "m1mac"]
    # 12 hours, default is 6 hours, hopefully this is more than enough
    timeout-minutes: 720
@@ -37,9 +40,10 @@ jobs:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: "false"
+          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -191,6 +195,8 @@ jobs:
          SLACK_COLOR: ${{ needs.cargo-builds-m1.result }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+          MSG_MINIMAL: event,action url,commit
+          BRANCH: ${{ github.ref }}
--- a/.github/workflows/make_release.yml
+++ b/.github/workflows/make_release.yml
@@ -43,14 +43,15 @@ jobs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
      - name: Prepare package
        run: |
          cargo package -p tfhe
-      - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
+      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
        with:
          name: crate
          path: target/package/*.crate
@@ -77,15 +78,17 @@ jobs:
    name: Publish Release
    needs: [package] # for comparing hashes
    runs-on: ubuntu-latest
+    # For provenance of npmjs publish
    permissions:
      contents: read
      id-token: write
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
      - name: Create NPM version tag
        if: ${{ inputs.npm_latest_tag }}
        run: |
@@ -110,7 +113,7 @@ jobs:
      - name: Slack notification (hashes comparison)
        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: failure
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
@@ -155,7 +158,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_concrete_csprng.yml
+++ b/.github/workflows/make_release_concrete_csprng.yml
@@ -1,49 +0,0 @@
-name: Publish tfhe-csprng release
-
-on:
-  workflow_dispatch:
-    inputs:
-      dry_run:
-        description: "Dry-run"
-        type: boolean
-        default: true
-
-env:
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-
-jobs:
-  verify_tag:
-    uses: ./.github/workflows/verify_tagged_commit.yml
-    secrets:
-      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
-      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
-
-  publish_release:
-    name: Publish tfhe-csprng Release
-    needs: verify_tag
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Publish crate.io package
-        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
-          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
-        run: |
-          cargo publish -p tfhe-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "tfhe-csprng release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_cuda.yml
+++ b/.github/workflows/make_release_cuda.yml
@@ -1,4 +1,3 @@
-# Publish new release of tfhe-cuda-backend on crates.io.
 name: Publish CUDA release

 on:
@@ -8,10 +7,6 @@ on:
        description: "Dry-run"
        type: boolean
        default: true
-      push_to_crates:
-        description: "Push to crate"
-        type: boolean
-        default: true

 env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
@@ -36,7 +31,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -45,10 +40,12 @@ jobs:
          backend: aws
          profile: gpu-build

-  publish-cuda-release:
-    name: Publish CUDA Release
+  package:
+    name: Package CUDA Release for provenance
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
    strategy:
      fail-fast: false
      # explicit include-based build matrix, of known valid options
@@ -61,17 +58,76 @@ jobs:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+          persist-credentials: "false"
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        with:
+          toolchain: stable
+
+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          {
+            echo "CUDA_PATH=$CUDA_PATH";
+            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
+            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
+          } >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-cuda-backend
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+  provenance:
+    if: ${{ !inputs.dry_run  }}
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
+  publish-cuda-release:
+    name: Publish CUDA Release
+    needs: [setup-instance, package] # for comparing hashes
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.2"
+            gcc: 9
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+    steps:
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

@@ -97,30 +153,45 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Publish crate.io package
-        if: ${{ inputs.push_to_crates }}
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          cargo publish -p tfhe-cuda-backend --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-cuda-backend crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (publish-release)
-    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
-    needs: [ setup-instance, publish-cuda-release ]
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [setup-instance, publish-cuda-release]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/make_release_tfhe_csprng.yml
+++ b/.github/workflows/make_release_tfhe_csprng.yml
@@ -0,0 +1,103 @@
+name: Publish tfhe-csprng release
+
+on:
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: "Dry-run"
+        type: boolean
+        default: true
+
+env:
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  verify_tag:
+    uses: ./.github/workflows/verify_tagged_commit.yml
+    secrets:
+      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
+      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
+
+  package:
+    runs-on: ubuntu-latest
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-csprng
+      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+        with:
+          name: crate-tfhe-csprng
+          path: target/package/*.crate
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+
+  provenance:
+    if: ${{ !inputs.dry_run  }}
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
+
+  publish_release:
+    name: Publish tfhe-csprng Release
+    needs: [verify_tag, package]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+      - name: Download artifact
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: crate-tfhe-csprng
+          path: target/package
+      - name: Publish crate.io package
+        env:
+          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
+        run: |
+          cargo publish -p tfhe-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-csprng - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "tfhe-csprng release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_tfhe_fft.yml
+++ b/.github/workflows/make_release_tfhe_fft.yml
@@ -19,15 +19,53 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

+  package:
+    runs-on: ubuntu-latest
+    needs: verify_tag
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-fft
+      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+        with:
+          name: crate
+          path: target/package/*.crate
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+  provenance:
+    if: ${{ !inputs.dry_run  }}
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
  publish_release:
    name: Publish tfhe-fft Release
    runs-on: ubuntu-latest
-    needs: verify_tag
+    needs: [verify_tag, package] # for comparing hashes
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Publish crate.io package
        env:
@@ -36,10 +74,26 @@ jobs:
        run: |
          cargo publish -p tfhe-fft --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-fft crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_ntt.yml
+++ b/.github/workflows/make_release_tfhe_ntt.yml
@@ -19,13 +19,50 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

+  package:
+    runs-on: ubuntu-latest
+    needs: verify_tag
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-ntt
+      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+        with:
+          name: crate
+          path: target/package/*.crate
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+  provenance:
+    if: ${{ !inputs.dry_run  }}
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
  publish_release:
    name: Publish tfhe-ntt Release
    runs-on: ubuntu-latest
-    needs: verify_tag
+    needs: [verify_tag, package] # for comparing hashes
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0

@@ -36,10 +73,26 @@ jobs:
        run: |
          cargo publish -p tfhe-ntt --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-ntt crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_versionable.yml
+++ b/.github/workflows/make_release_tfhe_versionable.yml
@@ -18,35 +18,161 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

+  package-derive:
+    runs-on: ubuntu-latest
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-versionable-derive
+      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+        with:
+          name: crate-tfhe-versionable-derive
+          path: target/package/*.crate
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+  provenance-derive:
+    needs: [package-derive]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package-derive.outputs.hash }}
+
+  publish_release-derive:
+    name: Publish tfhe-versionable Release
+    needs: [verify_tag, package-derive] # for comparing hashes
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+      - name: Download artifact
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: crate-tfhe-versionable-derive
+          path: target/package
+      - name: Publish crate.io package
+        env:
+          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+        run: |
+          cargo publish -p tfhe-versionable-derive --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package-derive.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-versionable-derive - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "tfhe-versionable-derive release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+  package:
+    runs-on: ubuntu-latest
+    outputs:
+      hash: ${{ steps.hash.outputs.hash }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          fetch-depth: 0
+      - name: Prepare package
+        run: |
+          cargo package -p tfhe-versionable
+      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+        with:
+          name: crate-tfhe-versionable
+          path: target/package/*.crate
+      - name: generate hash
+        id: hash
+        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+  provenance:
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
  publish_release:
    name: Publish tfhe-versionable Release
-    needs: verify_tag
+    needs: [package] # for comparing hashes
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Publish proc-macro crate
-        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
-          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
-        run: |
-          cargo publish -p tfhe-versionable-derive --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-
-      - name: Publish main crate
-        if: ${{ ! inputs.dry_run }}
+      - name: Download artifact
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: crate-tfhe-versionable
+          path: target/package
+      - name: Publish crate.io package
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
        run: |
-          cargo publish -p tfhe-versionable --token ${{ env.CRATES_TOKEN }}
+          cargo publish -p tfhe-versionable --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+
+      - name: Generate hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-versionable - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_zk_pok.yml
+++ b/.github/workflows/make_release_zk_pok.yml
@@ -1,4 +1,3 @@
-# Publish new release of tfhe-zk-pok on crates.io.
 name: Publish tfhe-zk-pok release

 on:
@@ -13,6 +12,40 @@ env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}

 jobs:
+  package:
+      runs-on: ubuntu-latest
+      outputs:
+        hash: ${{ steps.hash.outputs.hash }}
+      steps:
+        - name: Checkout
+          uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+          with:
+            fetch-depth: 0
+        - name: Prepare package
+          run: |
+            cargo package -p tfhe-zk-pok
+        - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+          with:
+            name: crate-zk-pok
+            path: target/package/*.crate
+        - name: generate hash
+          id: hash
+          run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+  provenance:
+    if: ${{ !inputs.dry_run  }}
+    needs: [package]
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
+    permissions:
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
+    with:
+      # SHA-256 hashes of the Crate package.
+      base64-subjects: ${{ needs.package.outputs.hash }}
+
  verify_tag:
    uses: ./.github/workflows/verify_tagged_commit.yml
    secrets:
@@ -21,26 +54,44 @@ jobs:

  publish_release:
    name: Publish tfhe-zk-pok Release
-    needs: verify_tag
+    needs: [verify_tag, package] # for comparing hashes
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+      - name: Download artifact
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: crate-zk-pok
+          path: target/package
      - name: Publish crate.io package
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          cargo publish -p tfhe-zk-pok --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-
+      - name: Verify hash
+        id: published_hash
+        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
+      - name: Slack notification (hashes comparison)
+        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        env:
+          SLACK_COLOR: failure
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "SLSA tfhe-zk-pok crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -14,7 +14,7 @@ on:

 jobs:
  params-curves-security-check:
-    runs-on: large_ubuntu_16
+    runs-on: large_ubuntu_16-22.04
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
--- a/.github/workflows/placeholder_workflow.yml
+++ b/.github/workflows/placeholder_workflow.yml
@@ -1,14 +1,85 @@
-# Placeholder workflow file allowing running it without having to merge to main first
+# Run all integer benchmarks on a permanent HPU instance and return parsed results to Slab CI bot.
 name: Placeholder Workflow

 on:
+  pull_request:
  workflow_dispatch:

-jobs:
-  placeholder:
-    name: Placeholder
-    runs-on: ubuntu-latest
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"

+jobs:
+  integer-benchmarks-hpu:
+    name: Execute integer & erc20 benchmarks for HPU backend
+    runs-on: v80-desktop
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    timeout-minutes: 1440  # 24 hours
    steps:
-      - run: |
-          echo "Hello this is a Placeholder Workflow"
+      # Needed as long as hw_regmap repository is private
+      - name: Configure SSH
+        uses: webfactory/ssh-agent@a6f90b1f127823b31d4d4a8d96047790581349bd # v0.9.1
+        with:
+          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Get benchmark details
+        run: |
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
+            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+          } >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Run benchmarks
+        run: |
+          make bench_integer_hpu
+          make bench_hlapi_erc20_hpu
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware "hpu_x1" \
+          --backend hpu \
+          --project-version "${{ env.COMMIT_HASH }}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${{ env.COMMIT_DATE }}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        with:
+          name: ${{ github.sha }}_integer_benchmarks
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
+          --slab-url "${{ secrets.SLAB_URL }}"
--- a/.github/workflows/sync_on_push.yml
+++ b/.github/workflows/sync_on_push.yml
@@ -16,7 +16,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
      - name: git-sync
        uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
        with:
--- a/.gitignore
+++ b/.gitignore
@@ -32,5 +32,8 @@ web-test-runner/
 node_modules/
 package-lock.json

+# Python .env
+.env
+
 # Dir used for backward compatibility test data
-tfhe/tfhe-backward-compat-data/
+tests/tfhe-backward-compat-data/
--- a/.linelint.yml
+++ b/.linelint.yml
@@ -1,11 +1,15 @@
 ignore:
  - .git
  - target
+  - tfhe/build
+  - venv
+  - web-test-runner
  - tfhe/benchmarks_parameters
  - tfhe/web_wasm_parallel_tests/node_modules
  - tfhe/web_wasm_parallel_tests/dist
  - keys
  - coverage
+  - utils/tfhe-lints/ui/main.stderr

 rules:
  # checks if file ends in a newline character
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,23 +9,24 @@ members = [
    "apps/trivium",
    "tfhe-csprng",
    "backends/tfhe-cuda-backend",
+    "backends/tfhe-hpu-backend",
    "utils/tfhe-versionable",
    "utils/tfhe-versionable-derive",
+    "mockups/tfhe-hpu-mockup",
 ]

-exclude = [
-    "tfhe/backward_compatibility_tests",
-    "utils/cargo-tfhe-lints-inner",
-    "utils/cargo-tfhe-lints"
-]
+exclude = ["tests/backward_compatibility_tests", "utils/tfhe-lints"]
 [workspace.dependencies]
-aligned-vec = { version = "0.5", default-features = false }
+aligned-vec = { version = "0.6", default-features = false }
 bytemuck = "1.14.3"
-dyn-stack = { version = "0.10", default-features = false }
+dyn-stack = { version = "0.11", default-features = false }
+itertools = "0.14"
 num-complex = "0.4"
-pulp = { version = "0.18.22", default-features = false }
+pulp = { version = "0.20", default-features = false }
+rand = "0.8"
+rayon = "1"
 serde = { version = "1.0", default-features = false }
-wasm-bindgen = ">=0.2.86,<0.2.94"
+wasm-bindgen = "0.2.100"

 [profile.bench]
 lto = "fat"
@@ -43,3 +44,6 @@ inherits = "dev"
 opt-level = 3
 lto = "off"
 debug-assertions = false
+
+[workspace.metadata.dylint]
+libraries = [{ path = "utils/tfhe-lints" }]
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 BSD 3-Clause Clear License

-Copyright © 2024 ZAMA.
+Copyright © 2025 ZAMA.
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,
--- a/343
+++ b/343
@@ -2,7 +2,6 @@ SHELL:=$(shell /usr/bin/env which bash)
 OS:=$(shell uname)
 RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
 CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
-TARGET_ARCH_FEATURE:=$(shell ./scripts/get_arch_feature.sh)
 CPU_COUNT=$(shell ./scripts/cpu_count.sh)
 RS_BUILD_TOOLCHAIN:=stable
 CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
@@ -21,10 +20,11 @@ BENCH_OP_FLAVOR?=DEFAULT
 BENCH_TYPE?=latency
 NODE_VERSION=22.6
 BACKWARD_COMPAT_DATA_URL=https://github.com/zama-ai/tfhe-backward-compat-data.git
-BACKWARD_COMPAT_DATA_BRANCH?=v0.4
+BACKWARD_COMPAT_DATA_BRANCH?=$(shell ./scripts/backward_compat_data_version.py)
 BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
 BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
 TFHE_SPEC:=tfhe
+WASM_PACK_VERSION="0.13.1"
 # We are kind of hacking the cut here, the version cannot contain a quote '"'
 WASM_BINDGEN_VERSION:=$(shell grep '^wasm-bindgen[[:space:]]*=' Cargo.toml | cut -d '"' -f 2 | xargs)
 WEB_RUNNER_DIR=web-test-runner
@@ -53,6 +53,12 @@ REGEX_PATTERN?=''
 TFHECUDA_SRC=backends/tfhe-cuda-backend/cuda
 TFHECUDA_BUILD=$(TFHECUDA_SRC)/build

+# tfhe-hpu-backend
+CUR_SCRIPT_DIR=$(shell cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
+HPU_BACKEND_DIR=$(CUR_SCRIPT_DIR)/backends/tfhe-hpu-backend
+HPU_CONFIG=aved
+AVED_PCIE_DEV=$(shell lspci -d 10ee:50b5 | sed -e "s/\(..\).*/\1/")
+
 # Exclude these files from coverage reports
 define COVERAGE_EXCLUDED_FILES
 --exclude-files apps/trivium/src/trivium/* \
@@ -116,8 +122,8 @@ install_wasm_bindgen_cli: install_rs_build_toolchain

 .PHONY: install_wasm_pack # Install wasm-pack to build JS packages
 install_wasm_pack: install_rs_build_toolchain
-	@wasm-pack --version > /dev/null 2>&1 || \
-	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.0 || \
+	@wasm-pack --version | grep "$(WASM_PACK_VERSION)" > /dev/null 2>&1 || \
+	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@$(WASM_PACK_VERSION) || \
 	( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )

 .PHONY: install_node # Install last version of NodeJS via nvm
@@ -151,10 +157,9 @@ install_tarpaulin: install_rs_build_toolchain
 	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cargo-tarpaulin --locked || \
 	( echo "Unable to install cargo tarpaulin, unknown error." && exit 1 )

-.PHONY: install_tfhe_lints # Install custom tfhe-rs lints
-install_tfhe_lints:
-	(cd utils/cargo-tfhe-lints-inner && cargo install --path .) && \
-	cd utils/cargo-tfhe-lints && cargo install --path .
+.PHONY: install_cargo_dylint # Install custom tfhe-rs lints
+install_cargo_dylint:
+	cargo install cargo-dylint dylint-link

 .PHONY: install_typos_checker # Install typos checker
 install_typos_checker: install_rs_build_toolchain
@@ -243,7 +248,8 @@ fmt_js: check_nvm_installed
 	source ~/.nvm/nvm.sh && \
 	nvm install $(NODE_VERSION) && \
 	nvm use $(NODE_VERSION) && \
-	$(MAKE) -C tfhe/web_wasm_parallel_tests fmt
+	$(MAKE) -C tfhe/web_wasm_parallel_tests fmt && \
+	$(MAKE) -C tfhe/js_on_wasm_tests fmt

 .PHONY: fmt_gpu # Format rust and cuda code
 fmt_gpu: install_rs_check_toolchain
@@ -272,7 +278,8 @@ check_fmt_js: check_nvm_installed
 	source ~/.nvm/nvm.sh && \
 	nvm install $(NODE_VERSION) && \
 	nvm use $(NODE_VERSION) && \
-	$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt
+	$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt && \
+	$(MAKE) -C tfhe/js_on_wasm_tests check_fmt

 .PHONY: check_typos # Check for typos in codebase
 check_typos: install_typos_checker
@@ -281,14 +288,14 @@ check_typos: install_typos_checker
 .PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
 clippy_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
+		--features=boolean,shortint,integer,internal-keycache,gpu \
 		--all-targets \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: check_gpu # Run check on tfhe with "gpu" enabled
 check_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
+		--features=boolean,shortint,integer,internal-keycache,gpu \
 		--all-targets \
 		-p $(TFHE_SPEC)

@@ -307,52 +314,51 @@ lint_workflow: check_actionlint_installed
 .PHONY: clippy_core # Run clippy lints on core_crypto with and without experimental features
 clippy_core: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE) \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),experimental \
+		--features=experimental \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),nightly-avx512 \
+		--features=nightly-avx512 \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 \
+		--features=experimental,nightly-avx512 \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),zk-pok \
+		--features=zk-pok \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_boolean # Run clippy lints enabling the boolean features
 clippy_boolean: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),boolean \
+		--features=boolean \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_shortint # Run clippy lints enabling the shortint features
 clippy_shortint: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),shortint \
+		--features=shortint \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),shortint,experimental \
+		--features=shortint,experimental \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),zk-pok,shortint \
+		--features=zk-pok,shortint \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_integer # Run clippy lints enabling the integer features
 clippy_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),integer \
+		--features=integer \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),integer,experimental \
+		--features=integer,experimental \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy # Run clippy lints enabling the boolean, shortint, integer
 clippy: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
+		--features=boolean,shortint,integer \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_rustdoc # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
@@ -363,13 +369,24 @@ clippy_rustdoc: install_rs_check_toolchain
 	fi && \
 	CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
 		cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok,pbs-stats,strings \
+		--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental \
+		-p $(TFHE_SPEC)
+
+.PHONY: clippy_rustdoc_gpu # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
+clippy_rustdoc_gpu: install_rs_check_toolchain
+	if [[ "$(OS)" != "Linux" ]]; then \
+		echo "WARNING: skipped clippy_rustdoc_gpu, unsupported OS $(OS)"; \
+		exit 0; \
+	fi && \
+	CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
+		cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
+		--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu \
 		-p $(TFHE_SPEC)

 .PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
 clippy_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
+		--features=boolean-c-api,shortint-c-api,high-level-c-api \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
@@ -394,17 +411,16 @@ clippy_trivium: install_rs_check_toolchain
 .PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
 clippy_all_targets: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings \
+		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
+		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
 clippy_tfhe_csprng: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		--features=$(TARGET_ARCH_FEATURE) \
-		-p tfhe-csprng -- --no-deps -D warnings
+		--features=parallel,software-prng -p tfhe-csprng -- --no-deps -D warnings

 .PHONY: clippy_zk_pok # Run clippy lints on tfhe-zk-pok
 clippy_zk_pok: install_rs_check_toolchain
@@ -418,10 +434,15 @@ clippy_versionable: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		-p tfhe-versionable -- --no-deps -D warnings

+.PHONY: clippy_tfhe_lints # Run clippy lints on tfhe-lints
+clippy_tfhe_lints: install_cargo_dylint # the toolchain is selected with toolchain.toml
+	cd utils/tfhe-lints && \
+	cargo clippy --all-targets -- --no-deps -D warnings
+
 .PHONY: clippy_all # Run all clippy targets
 clippy_all: clippy_rustdoc clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets \
 clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_tfhe_csprng clippy_zk_pok clippy_trivium \
-clippy_versionable
+clippy_versionable clippy_tfhe_lints

 .PHONY: clippy_fast # Run main clippy targets
 clippy_fast: clippy_rustdoc clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks \
@@ -437,73 +458,73 @@ check_rust_bindings_did_not_change:
 	cargo build -p tfhe-cuda-backend && "$(MAKE)" fmt_gpu && \
 	git diff --quiet HEAD -- backends/tfhe-cuda-backend/src/bindings.rs || \
 	( echo "Generated bindings have changed! Please run 'git add backends/tfhe-cuda-backend/src/bindings.rs' \
-	and commit the changes." && exit 1 ) 
+	and commit the changes." && exit 1 )


 .PHONY: tfhe_lints # Run custom tfhe-rs lints
-tfhe_lints: install_tfhe_lints
-	cd tfhe && RUSTFLAGS="$(RUSTFLAGS)" cargo tfhe-lints \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok -- -D warnings
+tfhe_lints: install_cargo_dylint
+	RUSTFLAGS="$(RUSTFLAGS)" cargo dylint --all -p tfhe --no-deps -- \
+		--features=boolean,shortint,integer,strings,zk-pok

 .PHONY: build_core # Build core_crypto without experimental features
 build_core: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE) -p $(TFHE_SPEC)
+		-p $(TFHE_SPEC)
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-			--features=$(TARGET_ARCH_FEATURE),nightly-avx512 -p $(TFHE_SPEC); \
+			--features=nightly-avx512 -p $(TFHE_SPEC); \
 	fi

 .PHONY: build_core_experimental # Build core_crypto with experimental features
 build_core_experimental: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),experimental -p $(TFHE_SPEC)
+		--features=experimental -p $(TFHE_SPEC)
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-			--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 -p $(TFHE_SPEC); \
+			--features=experimental,nightly-avx512 -p $(TFHE_SPEC); \
 	fi

 .PHONY: build_boolean # Build with boolean enabled
 build_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) --all-targets
+		--features=boolean -p $(TFHE_SPEC) --all-targets

 .PHONY: build_shortint # Build with shortint enabled
 build_shortint: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),shortint -p $(TFHE_SPEC) --all-targets
+		--features=shortint -p $(TFHE_SPEC) --all-targets

 .PHONY: build_integer # Build with integer enabled
 build_integer: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) --all-targets
+		--features=integer -p $(TFHE_SPEC) --all-targets

 .PHONY: build_tfhe_full # Build with boolean, shortint and integer enabled
 build_tfhe_full: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
+		--features=boolean,shortint,integer -p $(TFHE_SPEC) --all-targets

 .PHONY: build_tfhe_coverage # Build with test coverage enabled
 build_tfhe_coverage: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
+		--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests

 .PHONY: build_c_api # Build the C API for boolean, shortint and integer
 build_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
+		--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
 		-p $(TFHE_SPEC)

 .PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
 build_c_api_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
+		--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
 		-p $(TFHE_SPEC)

 .PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
 build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
+		--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
 		-p $(TFHE_SPEC)

 .PHONY: build_web_js_api # Build the js API targeting the web browser
@@ -517,11 +538,11 @@ build_web_js_api: install_rs_build_toolchain install_wasm_pack
 build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
 	cd tfhe && \
 	rustup component add rust-src --toolchain $(RS_CHECK_TOOLCHAIN) && \
-	RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory,+mutable-globals" rustup run $(RS_CHECK_TOOLCHAIN) \
+	RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory" rustup run $(RS_CHECK_TOOLCHAIN) \
 		wasm-pack build --release --target=web \
 		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api,zk-pok \
 		-Z build-std=panic_abort,std && \
-	find pkg/snippets -type f -iname workerHelpers.worker.js -exec sed -i "s|from '..\/..\/..\/';|from '..\/..\/..\/tfhe.js';|" {} \;
+	find pkg/snippets -type f -iname workerHelpers.js -exec sed -i "s|const pkg = await import('..\/..\/..');|const pkg = await import('..\/..\/..\/tfhe.js');|" {} \;
 	jq '.files += ["snippets"]' tfhe/pkg/package.json > tmp_pkg.json && mv -f tmp_pkg.json tfhe/pkg/package.json

 .PHONY: build_node_js_api # Build the js API targeting nodejs
@@ -534,15 +555,15 @@ build_node_js_api: install_rs_build_toolchain install_wasm_pack
 .PHONY: build_tfhe_csprng # Build tfhe_csprng
 build_tfhe_csprng: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng --all-targets
+		-p tfhe-csprng --all-targets

 .PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
 test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
+		--features=experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-			--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
+			--features=experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
 	fi

 .PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
@@ -550,13 +571,13 @@ test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain inst
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 		--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
 		--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
-		--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache \
+		--features=experimental,internal-keycache \
 		-p $(TFHE_SPEC) -- core_crypto::
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 			--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
 			--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
-			--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,nightly-avx512 \
+			--features=experimental,internal-keycache,nightly-avx512 \
 			-p $(TFHE_SPEC) -- -Z unstable-options --report-time core_crypto::; \
 	fi

@@ -574,35 +595,38 @@ test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
 .PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
 test_core_crypto_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
+		--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
+		--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::

 .PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
 test_integer_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
+		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
+		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::

-.PHONY: test_integer_long_run_gpu # Run the tests of the integer module including experimental on the gpu backend
-test_integer_long_run_gpu: install_rs_build_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,gpu,__long_run_tests -p $(TFHE_SPEC) -- integer::gpu::server_key::radix::tests_long_run --test-threads=6
+.PHONY: test_integer_long_run_gpu # Run the long run integer tests on the gpu backend
+test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	LONG_TESTS=TRUE \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
+		--tfhe-package "$(TFHE_SPEC)" --backend "gpu"

 .PHONY: test_integer_compression
 test_integer_compression: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
+		--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
+		--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compress

 .PHONY: test_integer_compression_gpu
 test_integer_compression_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
+		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
+		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress

 .PHONY: test_integer_gpu_ci # Run the tests for integer ci on gpu backend
 test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -661,20 +685,20 @@ test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_n
 .PHONY: test_boolean # Run the tests of the boolean module
 test_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) -- boolean::
+		--features=boolean -p $(TFHE_SPEC) -- boolean::

 .PHONY: test_boolean_cov # Run the tests of the boolean module with code coverage
 test_boolean_cov: install_rs_check_toolchain install_tarpaulin
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 		--out xml --output-dir coverage/boolean --line --engine llvm --timeout 500 \
 		$(COVERAGE_EXCLUDED_FILES) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache \
+		--features=boolean,internal-keycache \
 		-p $(TFHE_SPEC) -- -Z unstable-options --report-time boolean::

 .PHONY: test_c_api_rs # Run the rust tests for the C API
 test_c_api_rs: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
+		--features=boolean-c-api,shortint-c-api,high-level-c-api \
 		-p $(TFHE_SPEC) \
 		c_api

@@ -706,14 +730,14 @@ test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
 .PHONY: test_shortint # Run all the tests for shortint
 test_shortint: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
+		--features=shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::

 .PHONY: test_shortint_cov # Run the tests of the shortint module with code coverage
 test_shortint_cov: install_rs_check_toolchain install_tarpaulin
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 		--out xml --output-dir coverage/shortint --line --engine llvm --timeout 500 \
 		$(COVERAGE_EXCLUDED_FILES) \
-		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
+		--features=shortint,internal-keycache \
 		-p $(TFHE_SPEC) -- -Z unstable-options --report-time shortint::

 .PHONY: test_integer_ci # Run the tests for integer ci
@@ -770,26 +794,28 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
 		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
 		--signed-only --tfhe-package "$(TFHE_SPEC)"

-.PHONY: test_integer_long_run # Run the long run tests for integer
-test_integer_long_run: install_rs_build_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-						--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,__long_run_tests -p $(TFHE_SPEC) -- integer::server_key::radix_parallel::tests_long_run
-
+.PHONY: test_integer_long_run # Run the long run integer tests
+test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	LONG_TESTS=TRUE \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
+		--tfhe-package "$(TFHE_SPEC)"

 .PHONY: test_safe_serialization # Run the tests for safe serialization
 test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
+		--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::

 .PHONY: test_zk # Run the tests for the zk module of the TFHE-rs crate
 test_zk: install_rs_build_toolchain install_cargo_nextest
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),shortint,zk-pok -p $(TFHE_SPEC) -- zk::
+		--features=shortint,zk-pok -p $(TFHE_SPEC) -- zk::

 .PHONY: test_integer # Run all the tests for integer
 test_integer: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache -p $(TFHE_SPEC) -- integer::
+		--features=integer,internal-keycache -p $(TFHE_SPEC) -- integer::

 .PHONY: test_integer_cov # Run the tests of the integer module with code coverage
 test_integer_cov: install_rs_check_toolchain install_tarpaulin
@@ -797,38 +823,44 @@ test_integer_cov: install_rs_check_toolchain install_tarpaulin
 		--out xml --output-dir coverage/integer --line --engine llvm --timeout 500 \
 		--implicit-test-threads \
 		--exclude-files $(COVERAGE_EXCLUDED_FILES) \
-		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
+		--features=integer,internal-keycache \
 		-p $(TFHE_SPEC) -- -Z unstable-options --report-time integer::

 .PHONY: test_high_level_api # Run all the tests for high_level_api
 test_high_level_api: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok -p $(TFHE_SPEC) \
+		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings -p $(TFHE_SPEC) \
 		-- high_level_api::

 test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) \
+		--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) \
 		-E "test(/high_level_api::.*gpu.*/)"

+test_high_level_api_hpu: install_rs_build_toolchain install_cargo_nextest
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
+		--features=integer,internal-keycache,hpu -p $(TFHE_SPEC) \
+		-E "test(/high_level_api::.*hpu.*/)"
+
+
 .PHONY: test_strings # Run the tests for strings ci
 test_strings: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),shortint,integer,strings -p $(TFHE_SPEC) \
+		--features=shortint,integer,strings -p $(TFHE_SPEC) \
 		-- strings::


 .PHONY: test_user_doc # Run tests from the .md documentation
 test_user_doc: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
+		--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok,strings \
 		-p $(TFHE_SPEC) \
 		-- test_user_docs::

 .PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
 test_user_doc_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
+		--features=boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
 		-- test_user_docs::


@@ -836,14 +868,12 @@ test_user_doc_gpu: install_rs_build_toolchain
 .PHONY: test_regex_engine # Run tests for regex_engine example
 test_regex_engine: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--example regex_engine \
-		--features=$(TARGET_ARCH_FEATURE),integer
+		--example regex_engine --features=integer

 .PHONY: test_sha256_bool # Run tests for sha256_bool example
 test_sha256_bool: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--example sha256_bool \
-		--features=$(TARGET_ARCH_FEATURE),boolean
+		--example sha256_bool --features=boolean

 .PHONY: test_examples # Run tests for examples
 test_examples: test_sha256_bool test_regex_engine
@@ -861,7 +891,7 @@ test_kreyvium: install_rs_build_toolchain
 .PHONY: test_tfhe_csprng # Run tfhe-csprng tests
 test_tfhe_csprng: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng
+		-p tfhe-csprng

 .PHONY: test_zk_pok # Run tfhe-zk-pok tests
 test_zk_pok: install_rs_build_toolchain
@@ -879,23 +909,28 @@ test_zk_wasm_x86_compat_ci: check_nvm_installed
 test_zk_wasm_x86_compat: install_rs_build_toolchain build_node_js_api
 	cd tfhe/tests/zk_wasm_x86_test && npm install
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		-p tfhe --test zk_wasm_x86_test --features=$(TARGET_ARCH_FEATURE),integer,zk-pok
+		-p tfhe --test zk_wasm_x86_test --features=integer,zk-pok

 .PHONY: test_versionable # Run tests for tfhe-versionable subcrate
 test_versionable: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
 		--all-targets -p tfhe-versionable

+.PHONY: test_tfhe_lints # Run test on tfhe-lints
+test_tfhe_lints: install_cargo_dylint
+	cd utils/tfhe-lints && \
+	cargo test
+
 # The backward compat data repo holds historical binary data but also rust code to generate and load them.
 # Here we use the "patch" functionality of Cargo to make sure the repo used for the data is the same as the one used for the code.
 .PHONY: test_backward_compatibility_ci
 test_backward_compatibility_ci: install_rs_build_toolchain
 	TFHE_BACKWARD_COMPAT_DATA_DIR="$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tfhe/$(BACKWARD_COMPAT_DATA_DIR)\"" \
-		--features=$(TARGET_ARCH_FEATURE),shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
+		--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tests/$(BACKWARD_COMPAT_DATA_DIR)\"" \
+		--features=shortint,integer,zk-pok -p tests test_backward_compatibility -- --nocapture

 .PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
-test_backward_compatibility: tfhe/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
+test_backward_compatibility: tests/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci

 .PHONY: backward_compat_branch # Prints the required backward compatibility branch
 backward_compat_branch:
@@ -907,7 +942,7 @@ doc: install_rs_check_toolchain
 	DOCS_RS=1 \
 	RUSTDOCFLAGS="--html-in-header katex-header.html" \
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
+		--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)

 .PHONY: docs # Build rust doc alias for doc
 docs: doc
@@ -918,7 +953,7 @@ lint_doc: install_rs_check_toolchain
 	DOCS_RS=1 \
 	RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
+		--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps

 .PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
 lint_docs: lint_doc
@@ -944,10 +979,14 @@ check_intra_md_links: install_mlc
 check_md_links: install_mlc
 	mlc --match-file-extension tfhe/docs

+.PHONY: check_parameter_export_ok # Checks exported "current" shortint parameter module is correct
+check_parameter_export_ok:
+	python3 ./scripts/check_current_param_export.py
+
 .PHONY: check_compile_tests # Build tests in debug without running them
 check_compile_tests: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
-		--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache \
+		--features=experimental,boolean,shortint,integer,internal-keycache \
 		-p $(TFHE_SPEC)

 	@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
@@ -958,7 +997,7 @@ check_compile_tests: install_rs_build_toolchain
 .PHONY: check_compile_tests_benches_gpu # Build tests in debug without running them
 check_compile_tests_benches_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
-		--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache,gpu \
+		--features=experimental,boolean,shortint,integer,internal-keycache,gpu \
 		-p $(TFHE_SPEC)
 	mkdir -p "$(TFHECUDA_BUILD)" && \
 		cd "$(TFHECUDA_BUILD)" && \
@@ -1037,42 +1076,51 @@ dieharder_csprng: install_dieharder build_tfhe_csprng
 .PHONY: print_doc_bench_parameters # Print parameters used in doc benchmarks
 print_doc_bench_parameters:
 	RUSTFLAGS="" cargo run --example print_doc_bench_parameters \
-	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe
+	--features=shortint,internal-keycache -p tfhe

 .PHONY: bench_integer # Run benchmarks for unsigned integer
 bench_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_signed_integer # Run benchmarks for signed integer
 bench_signed_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-signed-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
 bench_integer_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+
+.PHONY: bench_integer_hpu # Run benchmarks for integer on HPU backend
+bench_integer_hpu: install_rs_check_toolchain
+	source ./setup_hpu.sh --config aved
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	HPU_BACKEND_DIR="$(HPU_BACKEND_DIR)" HPU_CONFIG="$(HPU_CONFIG)" AVED_PCIE_DEV="$(AVED_PCIE_DEV)" \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-bench \
+	--features=integer,internal-keycache,hpu,hpu-aved -p $(TFHE_SPEC) --

 .PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
 bench_integer_compression: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench	glwe_packing_compression-integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_integer_compression_gpu
 bench_integer_compression_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench	glwe_packing_compression-integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --

 .PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
 bench_integer_multi_bit: install_rs_check_toolchain
@@ -1080,7 +1128,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
 bench_signed_integer_multi_bit: install_rs_check_toolchain
@@ -1088,7 +1136,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-signed-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
 bench_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1096,7 +1144,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
 bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1104,14 +1152,14 @@ bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
+	--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned

 .PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
 bench_integer_zk: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench zk-pke-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,zk-pok,nightly-avx512 \
+	--features=integer,internal-keycache,zk-pok,nightly-avx512 \
 	-p $(TFHE_SPEC) --

 .PHONY: bench_shortint # Run benchmarks for shortint
@@ -1119,14 +1167,14 @@ bench_shortint: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench shortint-bench \
-	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_shortint_oprf # Run benchmarks for shortint
 bench_shortint_oprf: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench oprf-shortint-bench \
-	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
 bench_shortint_multi_bit: install_rs_check_toolchain
@@ -1134,43 +1182,43 @@ bench_shortint_multi_bit: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench shortint-bench \
-	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_boolean # Run benchmarks for boolean
 bench_boolean: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench boolean-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_pbs # Run benchmarks for PBS
 bench_pbs: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench pbs-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_pbs128 # Run benchmarks for PBS using FFT 128 bits
 bench_pbs128: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench pbs128-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
 bench_pbs_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_FAST_BENCH=$(FAST_BENCH) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench pbs-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_ks # Run benchmarks for keyswitch
 bench_ks: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench ks-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_ks_gpu # Run benchmarks for PBS on GPU backend
 bench_ks_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench ks-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 bench_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
 bench_web_js_api_parallel_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
@@ -1206,13 +1254,21 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
 bench_hlapi_erc20: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-erc20 \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ECR20 operations on GPU
 bench_hlapi_erc20_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-erc20 \
-	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
+
+.PHONY: bench_hlapi_erc20_hpu # Run benchmarks for ECR20 operations on HPU
+bench_hlapi_erc20_hpu: install_rs_check_toolchain
+	source ./setup_hpu.sh --config aved
+	RUSTFLAGS="$(RUSTFLAGS)" HPU_BACKEND_DIR="$(HPU_BACKEND_DIR)" HPU_CONFIG="$(HPU_CONFIG)" AVED_PCIE_DEV="$(AVED_PCIE_DEV)" \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench hlapi-erc20 \
+	--features=integer,internal-keycache,hpu,hpu-aved -p $(TFHE_SPEC) -- --quick

 .PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
 bench_tfhe_zk_pok: install_rs_check_toolchain
@@ -1227,32 +1283,32 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
 gen_key_cache: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 		--example generates_test_keys \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
+		--features=boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
 		-- $(MULTI_BIT_ONLY) $(COVERAGE_ONLY)

 .PHONY: gen_key_cache_core_crypto # Run function to generate keys and cache them for core_crypto tests
 gen_key_cache_core_crypto: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --tests --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
+		--features=experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
 		core_crypto::keycache::generate_keys

 .PHONY: measure_hlapi_compact_pk_ct_sizes # Measure sizes of public keys and ciphertext for high-level API
 measure_hlapi_compact_pk_ct_sizes: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example hlapi_compact_pk_ct_sizes \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache
+	--features=integer,internal-keycache

 .PHONY: measure_shortint_key_sizes # Measure sizes of bootstrapping and key switching keys for shortint
 measure_shortint_key_sizes: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example shortint_key_sizes \
-	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache
+	--features=shortint,internal-keycache

 .PHONY: measure_boolean_key_sizes # Measure sizes of bootstrapping and key switching keys for boolean
 measure_boolean_key_sizes: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example boolean_key_sizes \
-	--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache
+	--features=boolean,internal-keycache

 .PHONY: parse_integer_benches # Run python parser to output a csv containing integer benches data
 parse_integer_benches:
@@ -1264,20 +1320,19 @@ parse_integer_benches:
 parse_wasm_benchmarks: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example wasm_benchmarks_parser \
-	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
+	--features=shortint,internal-keycache \
 	-- wasm_benchmark_results.json

 .PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
 write_params_to_file: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
-	--example write_params_to_file \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run \
+	--example write_params_to_file --features=boolean,shortint,internal-keycache

 .PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
 clone_backward_compat_data:
-	./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tfhe/$(BACKWARD_COMPAT_DATA_DIR)
+	./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tests/$(BACKWARD_COMPAT_DATA_DIR)

-tfhe/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
+tests/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data

 #
 # Real use case examples
@@ -1286,36 +1341,36 @@ tfhe/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
 .PHONY: regex_engine # Run regex_engine example
 regex_engine: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
-	--example regex_engine \
-	--features=$(TARGET_ARCH_FEATURE),integer \
+	--example regex_engine --features=integer \
 	-- $(REGEX_STRING) $(REGEX_PATTERN)

 .PHONY: dark_market # Run dark market example
 dark_market: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example dark_market \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
+	--features=integer,internal-keycache \
 	-- fhe-modified fhe-parallel plain fhe

 .PHONY: sha256_bool # Run sha256_bool example
 sha256_bool: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
-	--example sha256_bool \
-	--features=$(TARGET_ARCH_FEATURE),boolean
+	--example sha256_bool --features=boolean

 .PHONY: pcc # pcc stands for pre commit checks (except GPU)
-pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
-clippy_all tfhe_lints check_compile_tests
+pcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
+check_md_docs_are_tested check_intra_md_links clippy_all check_compile_tests test_tfhe_lints \
+tfhe_lints

 .PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
-pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change
+pcc_gpu: check_rust_bindings_did_not_change clippy_rustdoc_gpu \
+clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu

 .PHONY: fpcc # pcc stands for pre commit checks, the f stands for fast
-fpcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested clippy_fast \
-check_compile_tests
+fpcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
+check_md_docs_are_tested clippy_fast check_compile_tests

 .PHONY: conformance # Automatically fix problems that can be fixed
-conformance: fix_newline fmt
+conformance: fix_newline fmt fmt_js

 #=============================== FFT Section ==================================
 .PHONY: doc_fft # Build rust doc for tfhe-fft
@@ -1387,7 +1442,7 @@ test_fft_nightly: install_rs_check_toolchain
 .PHONY: test_fft_no_std
 test_fft_no_std: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --release -p tfhe-fft \
-		--no-default-features 
+		--no-default-features
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --release -p tfhe-fft \
 		--no-default-features \
 		--features=fft128
@@ -1481,7 +1536,7 @@ test_ntt_nightly: install_rs_check_toolchain
 .PHONY: test_ntt_no_std
 test_ntt_no_std: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --release -p tfhe-ntt \
-		--no-default-features 
+		--no-default-features

 .PHONY: test_ntt_no_std_nightly
 test_ntt_no_std_nightly: install_rs_check_toolchain
--- a/README.md
+++ b/README.md
@@ -70,22 +70,8 @@ production-ready library for all the advanced features of TFHE.
 ### Cargo.toml configuration
 To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:

-+ For x86_64-based machines running Unix-like OSes:
-
 ```toml
-tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64-unix"] }
-```
-
-+ For Apple Silicon or aarch64-based machines running Unix-like OSes:
-
-```toml
-tfhe = { version = "*", features = ["boolean", "shortint", "integer", "aarch64-unix"] }
-```
-
-+ For x86_64-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) running Windows:
-
-```toml
-tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64"] }
+tfhe = { version = "*", features = ["boolean", "shortint", "integer"] }
 ```

 > [!Note]
--- a/apps/trivium/Cargo.toml
+++ b/apps/trivium/Cargo.toml
@@ -6,15 +6,8 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-rayon = { version = "1.7.0"}
-
-[target.'cfg(target_arch = "x86_64")'.dependencies.tfhe]
-path = "../../tfhe"
-features = [ "boolean", "shortint", "integer", "x86_64" ]
-
-[target.'cfg(target_arch = "aarch64")'.dependencies.tfhe]
-path = "../../tfhe"
-features = [ "boolean", "shortint", "integer", "aarch64-unix" ]
+rayon = { workspace = true }
+tfhe = { path = "../../tfhe", features = [ "boolean", "shortint", "integer" ] }

 [dev-dependencies]
 criterion = { version = "0.5.1", features = [ "html_reports" ]}
--- a/apps/trivium/README.md
+++ b/apps/trivium/README.md
@@ -18,102 +18,102 @@ use tfhe::prelude::*;
 use tfhe_trivium::TriviumStream;

 fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
-	assert!(a.len() % 8 == 0);
-	let mut hexadecimal: String = "".to_string();
-	for test in a.chunks(8) {
-		// Encoding is bytes in LSB order
-		match test[4..8] {
-			[false, false, false, false] => hexadecimal.push('0'),
-			[true, false, false, false] => hexadecimal.push('1'),
-			[false, true, false, false] => hexadecimal.push('2'),
-			[true, true, false, false] => hexadecimal.push('3'),
+    assert!(a.len() % 8 == 0);
+    let mut hexadecimal: String = "".to_string();
+    for test in a.chunks(8) {
+        // Encoding is bytes in LSB order
+        match test[4..8] {
+            [false, false, false, false] => hexadecimal.push('0'),
+            [true, false, false, false] => hexadecimal.push('1'),
+            [false, true, false, false] => hexadecimal.push('2'),
+            [true, true, false, false] => hexadecimal.push('3'),

-			[false, false, true, false] => hexadecimal.push('4'),
-			[true, false, true, false] => hexadecimal.push('5'),
-			[false, true, true, false] => hexadecimal.push('6'),
-			[true, true, true, false] => hexadecimal.push('7'),
+            [false, false, true, false] => hexadecimal.push('4'),
+            [true, false, true, false] => hexadecimal.push('5'),
+            [false, true, true, false] => hexadecimal.push('6'),
+            [true, true, true, false] => hexadecimal.push('7'),

-			[false, false, false, true] => hexadecimal.push('8'),
-			[true, false, false, true] => hexadecimal.push('9'),
-			[false, true, false, true] => hexadecimal.push('A'),
-			[true, true, false, true] => hexadecimal.push('B'),
+            [false, false, false, true] => hexadecimal.push('8'),
+            [true, false, false, true] => hexadecimal.push('9'),
+            [false, true, false, true] => hexadecimal.push('A'),
+            [true, true, false, true] => hexadecimal.push('B'),

-			[false, false, true, true] => hexadecimal.push('C'),
-			[true, false, true, true] => hexadecimal.push('D'),
-			[false, true, true, true] => hexadecimal.push('E'),
-			[true, true, true, true] => hexadecimal.push('F'),
-			_ => ()
-		};
-		match test[0..4] {
-			[false, false, false, false] => hexadecimal.push('0'),
-			[true, false, false, false] => hexadecimal.push('1'),
-			[false, true, false, false] => hexadecimal.push('2'),
-			[true, true, false, false] => hexadecimal.push('3'),
+            [false, false, true, true] => hexadecimal.push('C'),
+            [true, false, true, true] => hexadecimal.push('D'),
+            [false, true, true, true] => hexadecimal.push('E'),
+            [true, true, true, true] => hexadecimal.push('F'),
+            _ => ()
+        };
+        match test[0..4] {
+            [false, false, false, false] => hexadecimal.push('0'),
+            [true, false, false, false] => hexadecimal.push('1'),
+            [false, true, false, false] => hexadecimal.push('2'),
+            [true, true, false, false] => hexadecimal.push('3'),

-			[false, false, true, false] => hexadecimal.push('4'),
-			[true, false, true, false] => hexadecimal.push('5'),
-			[false, true, true, false] => hexadecimal.push('6'),
-			[true, true, true, false] => hexadecimal.push('7'),
+            [false, false, true, false] => hexadecimal.push('4'),
+            [true, false, true, false] => hexadecimal.push('5'),
+            [false, true, true, false] => hexadecimal.push('6'),
+            [true, true, true, false] => hexadecimal.push('7'),

-			[false, false, false, true] => hexadecimal.push('8'),
-			[true, false, false, true] => hexadecimal.push('9'),
-			[false, true, false, true] => hexadecimal.push('A'),
-			[true, true, false, true] => hexadecimal.push('B'),
+            [false, false, false, true] => hexadecimal.push('8'),
+            [true, false, false, true] => hexadecimal.push('9'),
+            [false, true, false, true] => hexadecimal.push('A'),
+            [true, true, false, true] => hexadecimal.push('B'),

-			[false, false, true, true] => hexadecimal.push('C'),
-			[true, false, true, true] => hexadecimal.push('D'),
-			[false, true, true, true] => hexadecimal.push('E'),
-			[true, true, true, true] => hexadecimal.push('F'),
-			_ => ()
-		};
-	}
-	return hexadecimal;
+            [false, false, true, true] => hexadecimal.push('C'),
+            [true, false, true, true] => hexadecimal.push('D'),
+            [false, true, true, true] => hexadecimal.push('E'),
+            [true, true, true, true] => hexadecimal.push('F'),
+            _ => ()
+        };
+    }
+    return hexadecimal;
 }

 fn main() {
-	let config = ConfigBuilder::default().build();
-	let (client_key, server_key) = generate_keys(config);
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);

-	let key_string = "0053A6F94C9FF24598EB".to_string();
-	let mut key = [false; 80];
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [false; 80];

-	for i in (0..key_string.len()).step_by(2) {
-		let mut val: u8 = u8::from_str_radix(&key_string[i..i+2], 16).unwrap();
-		for j in 0..8 {
-			key[8*(i>>1) + j] = val % 2 == 1;
-			val >>= 1;
-		}
-	}
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&key_string[i..i+2], 16).unwrap();
+        for j in 0..8 {
+            key[8*(i>>1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }

-	let iv_string = "0D74DB42A91077DE45AC".to_string();
-	let mut iv = [false; 80];
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [false; 80];

-	for i in (0..iv_string.len()).step_by(2) {
-		let mut val: u8 = u8::from_str_radix(&iv_string[i..i+2], 16).unwrap();
-		for j in 0..8 {
-			iv[8*(i>>1) + j] = val % 2 == 1;
-			val >>= 1;
-		}
-	}
-	
-	let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&iv_string[i..i+2], 16).unwrap();
+        for j in 0..8 {
+            iv[8*(i>>1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }

-	let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
-	let cipher_iv = iv.map(|x| FheBool::encrypt(x, &client_key));
+    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+
+    let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
+    let cipher_iv = iv.map(|x| FheBool::encrypt(x, &client_key));


-	let mut trivium = TriviumStream::<FheBool>::new(cipher_key, cipher_iv, &server_key);
+    let mut trivium = TriviumStream::<FheBool>::new(cipher_key, cipher_iv, &server_key);

-	let mut vec = Vec::<bool>::with_capacity(64*8);
-	while vec.len() < 64*8 {
-		let cipher_outputs = trivium.next_64();
-		for c in cipher_outputs {
-			vec.push(c.decrypt(&client_key))
-		}
-	}
+    let mut vec = Vec::<bool>::with_capacity(64*8);
+    while vec.len() < 64*8 {
+        let cipher_outputs = trivium.next_64();
+        for c in cipher_outputs {
+            vec.push(c.decrypt(&client_key))
+        }
+    }

-	let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
-	assert_eq!(output_0_63, hexadecimal[0..64*2]);
+    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+    assert_eq!(output_0_63, hexadecimal[0..64*2]);
 }
 ```

@@ -129,63 +129,76 @@ Other sizes than 64 bit are expected to be available in the future.

 # FHE shortint Trivium implementation

-The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `PARAM_MESSAGE_1_CARRY_1_KS_PBS`). It uses a lower level API 
-of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run on the same 
-cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes 
+The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
+It uses a lower level API of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run
+on the same cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes
 its setup a little more intricate.

 Example code:
 ```rust
 use tfhe::shortint::prelude::*;
-use tfhe::shortint::CastingKey;
+use tfhe::shortint::parameters::v1_0::{
+    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
+    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
+    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+};
 use tfhe::{ConfigBuilder, generate_keys, FheUint64};
 use tfhe::prelude::*;
 use tfhe_trivium::TriviumStreamShortint;

 fn test_shortint() {
-	let config = ConfigBuilder::default().build();
-	let (hl_client_key, hl_server_key) = generate_keys(config);
-	let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
-	let ksk = CastingKey::new((&client_key, &server_key), (&hl_client_key, &hl_server_key));
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .build();
+    let (hl_client_key, hl_server_key) = generate_keys(config);
+    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
+    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

-	let key_string = "0053A6F94C9FF24598EB".to_string();
-	let mut key = [0; 80];
+    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+    let ksk = KeySwitchingKey::new(
+        (&client_key, Some(&server_key)),
+        (&underlying_ck, &underlying_sk),
+        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
+    );

-	for i in (0..key_string.len()).step_by(2) {
-		let mut val = u64::from_str_radix(&key_string[i..i+2], 16).unwrap();
-		for j in 0..8 {
-			key[8*(i>>1) + j] = val % 2;
-			val >>= 1;
-		}
-	}
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [0; 80];

-	let iv_string = "0D74DB42A91077DE45AC".to_string();
-	let mut iv = [0; 80];
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&key_string[i..i+2], 16).unwrap();
+        for j in 0..8 {
+            key[8*(i>>1) + j] = val % 2;
+            val >>= 1;
+        }
+    }

-	for i in (0..iv_string.len()).step_by(2) {
-		let mut val = u64::from_str_radix(&iv_string[i..i+2], 16).unwrap();
-		for j in 0..8 {
-			iv[8*(i>>1) + j] = val % 2;
-			val >>= 1;
-		}
-	}
-	let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [0; 80];

-	let cipher_key = key.map(|x| client_key.encrypt(x));
-	let cipher_iv = iv.map(|x| client_key.encrypt(x));
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&iv_string[i..i+2], 16).unwrap();
+        for j in 0..8 {
+            iv[8*(i>>1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();

-	let mut ciphered_message = vec![FheUint64::try_encrypt(0u64, &hl_client_key).unwrap(); 9];
+    let cipher_key = key.map(|x| client_key.encrypt(x));
+    let cipher_iv = iv.map(|x| client_key.encrypt(x));

-	let mut trivium = TriviumStreamShortint::new(cipher_key, cipher_iv, &server_key, &ksk);
+    let mut ciphered_message = vec![FheUint64::try_encrypt(0u64, &hl_client_key).unwrap(); 9];

-	let mut vec = Vec::<u64>::with_capacity(8);
-	while vec.len() < 8 {
-		let trans_ciphered_message = trivium.trans_encrypt_64(ciphered_message.pop().unwrap(), &hl_server_key);
-		vec.push(trans_ciphered_message.decrypt(&hl_client_key));
-	}
+    let mut trivium = TriviumStreamShortint::new(cipher_key, cipher_iv, &server_key, &ksk);

-	let hexadecimal = get_hexagonal_string_from_u64(vec);
-	assert_eq!(output_0_63, hexadecimal[0..64*2]);
+    let mut vec = Vec::<u64>::with_capacity(8);
+    while vec.len() < 8 {
+        let trans_ciphered_message = trivium.trans_encrypt_64(ciphered_message.pop().unwrap(), &hl_server_key);
+        vec.push(trans_ciphered_message.decrypt(&hl_client_key));
+    }
+
+    let hexadecimal = get_hexagonal_string_from_u64(vec);
+    assert_eq!(output_0_63, hexadecimal[0..64*2]);
 }
 ```

--- a/apps/trivium/benches/kreyvium_shortint.rs
+++ b/apps/trivium/benches/kreyvium_shortint.rs
@@ -1,23 +1,29 @@
 use criterion::Criterion;
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
+use tfhe::shortint::parameters::v1_0::{
+    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
+    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
+};
 use tfhe::shortint::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
 use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};

 pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
+        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -57,18 +63,20 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
 }

 pub fn kreyvium_shortint_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
+        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -103,18 +111,20 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
 }

 pub fn kreyvium_shortint_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
+        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
--- a/apps/trivium/benches/trivium_shortint.rs
+++ b/apps/trivium/benches/trivium_shortint.rs
@@ -1,23 +1,29 @@
 use criterion::Criterion;
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
+use tfhe::shortint::parameters::v1_0::{
+    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
+    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
+};
 use tfhe::shortint::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
 use tfhe_trivium::{TransCiphering, TriviumStreamShortint};

 pub fn trivium_shortint_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
+        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -57,18 +63,20 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
 }

 pub fn trivium_shortint_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
+        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -103,18 +111,20 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
 }

 pub fn trivium_shortint_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
+        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
--- a/apps/trivium/src/kreyvium/test.rs
+++ b/apps/trivium/src/kreyvium/test.rs
@@ -1,6 +1,10 @@
 use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
+use tfhe::shortint::parameters::v1_0::{
+    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
+    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
+};
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
 // Values for these tests come from the github repo renaud1239/Kreyvium,
 // commit fd6828f68711276c25f55e605935028f5e843f43
@@ -216,18 +220,20 @@ use tfhe::shortint::prelude::*;

 #[test]
 fn kreyvium_test_shortint_long() {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
+        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
--- a/apps/trivium/src/trivium/test.rs
+++ b/apps/trivium/src/trivium/test.rs
@@ -1,6 +1,10 @@
 use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
+use tfhe::shortint::parameters::v1_0::{
+    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
+    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
+};
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
 // Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
 // file testvectors/trivium-80.80.test-vectors
@@ -352,18 +356,20 @@ use tfhe::shortint::prelude::*;

 #[test]
 fn trivium_test_shortint_long() {
-    let config = ConfigBuilder::default().build();
+    let config = ConfigBuilder::default()
+        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
+        .build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
+        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
--- a/backends/tfhe-cuda-backend/Cargo.toml
+++ b/backends/tfhe-cuda-backend/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tfhe-cuda-backend"
-version = "0.6.0"
+version = "0.8.0"
 edition = "2021"
 authors = ["Zama team"]
 license = "BSD-3-Clause-Clear"
@@ -14,4 +14,4 @@ keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
 [build-dependencies]
 cmake = { version = "0.1" }
 pkg-config = { version = "0.3" }
-bindgen = "0.70.1"
+bindgen = "0.71"
--- a/backends/tfhe-cuda-backend/LICENSE
+++ b/backends/tfhe-cuda-backend/LICENSE
@@ -1,6 +1,6 @@
 BSD 3-Clause Clear License

-Copyright © 2024 ZAMA.
+Copyright © 2025 ZAMA.
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,
--- a/backends/tfhe-cuda-backend/cuda/include/device.h
+++ b/backends/tfhe-cuda-backend/cuda/include/device.h
@@ -27,6 +27,8 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
    std::abort();                                                              \
  }

+void cuda_set_device(uint32_t gpu_index);
+
 cudaEvent_t cuda_create_event(uint32_t gpu_index);

 void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
--- a/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression_utilities.h
@@ -38,6 +38,7 @@ template <typename Torus> struct int_compression {

      scratch_packing_keyswitch_lwe_list_to_glwe_64(
          streams[0], gpu_indexes[0], &fp_ks_buffer,
+          compression_params.small_lwe_dimension,
          compression_params.glwe_dimension, compression_params.polynomial_size,
          num_radix_blocks, true);
    }
@@ -64,7 +65,7 @@ template <typename Torus> struct int_decompression {
  Torus *tmp_extracted_lwe;
  uint32_t *tmp_indexes_array;

-  int_radix_lut<Torus> *carry_extract_lut;
+  int_radix_lut<Torus> *decompression_rescale_lut;

  int_decompression(cudaStream_t const *streams, uint32_t const *gpu_indexes,
                    uint32_t gpu_count, int_radix_params encryption_params,
@@ -83,7 +84,7 @@ template <typename Torus> struct int_decompression {
      Torus lwe_accumulator_size = (compression_params.glwe_dimension *
                                        compression_params.polynomial_size +
                                    1);
-      carry_extract_lut = new int_radix_lut<Torus>(
+      decompression_rescale_lut = new int_radix_lut<Torus>(
          streams, gpu_indexes, gpu_count, encryption_params, 1,
          num_radix_blocks, allocate_gpu_memory);

@@ -96,18 +97,30 @@ template <typename Torus> struct int_decompression {
          num_radix_blocks * lwe_accumulator_size * sizeof(Torus), streams[0],
          gpu_indexes[0]);

-      // Carry extract LUT
-      auto carry_extract_f = [encryption_params](Torus x) -> Torus {
-        return x / encryption_params.message_modulus;
+      // Rescale is done using an identity LUT
+      // Here we do not divide by message_modulus
+      // Example: in the 2_2 case we are mapping a 2 bits message onto a 4 bits
+      // space, we want to keep the original 2 bits value in the 4 bits space,
+      // so we apply the identity and the encoding will rescale it for us.
+      auto decompression_rescale_f = [encryption_params](Torus x) -> Torus {
+        return x;
      };

-      generate_device_accumulator<Torus>(
-          streams[0], gpu_indexes[0], carry_extract_lut->get_lut(0, 0),
-          encryption_params.glwe_dimension, encryption_params.polynomial_size,
-          encryption_params.message_modulus, encryption_params.carry_modulus,
-          carry_extract_f);
+      auto effective_compression_message_modulus =
+          encryption_params.carry_modulus;
+      auto effective_compression_carry_modulus = 1;

-      carry_extract_lut->broadcast_lut(streams, gpu_indexes, 0);
+      generate_device_accumulator_with_encoding<Torus>(
+          streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
+          decompression_rescale_lut->get_degree(0),
+          decompression_rescale_lut->get_max_degree(0),
+          encryption_params.glwe_dimension, encryption_params.polynomial_size,
+          effective_compression_message_modulus,
+          effective_compression_carry_modulus,
+          encryption_params.message_modulus, encryption_params.carry_modulus,
+          decompression_rescale_f);
+
+      decompression_rescale_lut->broadcast_lut(streams, gpu_indexes, 0);
    }
  }
  void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
@@ -116,8 +129,8 @@ template <typename Torus> struct int_decompression {
    cuda_drop_async(tmp_extracted_lwe, streams[0], gpu_indexes[0]);
    cuda_drop_async(tmp_indexes_array, streams[0], gpu_indexes[0]);

-    carry_extract_lut->release(streams, gpu_indexes, gpu_count);
-    delete carry_extract_lut;
+    decompression_rescale_lut->release(streams, gpu_indexes, gpu_count);
+    delete decompression_rescale_lut;
  }
 };
 #endif
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
@@ -38,6 +38,15 @@ enum SIGNED_OPERATION { ADDITION = 1, SUBTRACTION = -1 };
 enum outputFlag { FLAG_NONE = 0, FLAG_OVERFLOW = 1, FLAG_CARRY = 2 };

 extern "C" {
+
+typedef struct {
+  void *ptr;
+  uint64_t *degrees;
+  uint64_t *noise_levels;
+  uint32_t num_radix_blocks;
+  uint32_t lwe_dimension;
+} CudaRadixCiphertextFFI;
+
 void scratch_cuda_apply_univariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
@@ -45,14 +54,20 @@ void scratch_cuda_apply_univariate_lut_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory);
-
-void cuda_apply_univariate_lut_kb_64(void *const *streams,
-                                     uint32_t const *gpu_indexes,
-                                     uint32_t gpu_count, void *output_radix_lwe,
-                                     void const *input_radix_lwe,
-                                     int8_t *mem_ptr, void *const *ksks,
-                                     void *const *bsks, uint32_t num_blocks);
+    uint64_t lut_degree, bool allocate_gpu_memory);
+void scratch_cuda_apply_many_univariate_lut_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
+    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
+    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
+    uint32_t grouping_factor, uint32_t num_radix_blocks,
+    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
+    uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory);
+void cuda_apply_univariate_lut_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    CudaRadixCiphertextFFI *output_radix_lwe,
+    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
+    void *const *ksks, void *const *bsks);

 void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
                                             uint32_t const *gpu_indexes,
@@ -66,13 +81,15 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory);
+    uint64_t lut_degree, bool allocate_gpu_memory);

 void cuda_apply_bivariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *output_radix_lwe, void const *input_radix_lwe_1,
-    void const *input_radix_lwe_2, int8_t *mem_ptr, void *const *ksks,
-    void *const *bsks, uint32_t num_blocks, uint32_t shift);
+    CudaRadixCiphertextFFI *output_radix_lwe,
+    CudaRadixCiphertextFFI const *input_radix_lwe_1,
+    CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
+    void *const *ksks, void *const *bsks, uint32_t num_radix_blocks,
+    uint32_t shift);

 void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
                                            uint32_t const *gpu_indexes,
@@ -81,9 +98,10 @@ void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,

 void cuda_apply_many_univariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *output_radix_lwe, void const *input_radix_lwe, int8_t *mem_ptr,
-    void *const *ksks, void *const *bsks, uint32_t num_blocks,
-    uint32_t num_luts, uint32_t lut_stride);
+    CudaRadixCiphertextFFI *output_radix_lwe,
+    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
+    void *const *ksks, void *const *bsks, uint32_t num_luts,
+    uint32_t lut_stride);

 void scratch_cuda_full_propagation_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -125,15 +143,14 @@ void cleanup_cuda_integer_mult(void *const *streams,

 void cuda_negate_integer_radix_ciphertext_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array_out, void const *lwe_array_in, uint32_t lwe_dimension,
-    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
+    CudaRadixCiphertextFFI *lwe_array_out,
+    CudaRadixCiphertextFFI const *lwe_array_in, uint32_t message_modulus,
    uint32_t carry_modulus);

 void cuda_scalar_addition_integer_radix_ciphertext_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array, void const *scalar_input, uint32_t lwe_dimension,
-    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
-    uint32_t carry_modulus);
+    CudaRadixCiphertextFFI *lwe_array, void const *scalar_input,
+    uint32_t num_scalars, uint32_t message_modulus, uint32_t carry_modulus);

 void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -146,8 +163,8 @@ void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(

 void cuda_integer_radix_logical_scalar_shift_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array, uint32_t shift, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks, uint32_t num_blocks);
+    CudaRadixCiphertextFFI *lwe_array, uint32_t shift, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks);

 void scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -160,8 +177,8 @@ void scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(

 void cuda_integer_radix_arithmetic_scalar_shift_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array, uint32_t shift, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks, uint32_t num_blocks);
+    CudaRadixCiphertextFFI *lwe_array, uint32_t shift, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_radix_logical_scalar_shift(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -182,8 +199,8 @@ void scratch_cuda_integer_radix_shift_and_rotate_kb_64(

 void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array, void const *lwe_shift, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks, uint32_t num_blocks);
+    CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI const *lwe_shift,
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_radix_shift_and_rotate(void *const *streams,
                                                 uint32_t const *gpu_indexes,
@@ -226,15 +243,17 @@ void scratch_cuda_integer_radix_bitop_kb_64(

 void cuda_bitop_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    uint32_t lwe_ciphertext_count);
+    CudaRadixCiphertextFFI *lwe_array_out,
+    CudaRadixCiphertextFFI const *lwe_array_1,
+    CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks);

 void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array_out, void const *lwe_array_input, void const *clear_blocks,
+    CudaRadixCiphertextFFI *lwe_array_out,
+    CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
    uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks, uint32_t lwe_ciphertext_count, BITOP_TYPE op);
+    void *const *ksks);

 void cleanup_cuda_integer_bitop(void *const *streams,
                                uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -251,9 +270,11 @@ void scratch_cuda_integer_radix_cmux_kb_64(

 void cuda_cmux_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array_out, void const *lwe_condition, void const *lwe_array_true,
-    void const *lwe_array_false, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks, uint32_t lwe_ciphertext_count);
+    CudaRadixCiphertextFFI *lwe_array_out,
+    CudaRadixCiphertextFFI const *lwe_condition,
+    CudaRadixCiphertextFFI const *lwe_array_true,
+    CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_radix_cmux(void *const *streams,
                                     uint32_t const *gpu_indexes,
@@ -270,8 +291,8 @@ void scratch_cuda_integer_radix_scalar_rotate_kb_64(

 void cuda_integer_radix_scalar_rotate_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array, uint32_t n, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks, uint32_t num_blocks);
+    CudaRadixCiphertextFFI *lwe_array, uint32_t n, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_radix_scalar_rotate(void *const *streams,
                                              uint32_t const *gpu_indexes,
@@ -298,15 +319,16 @@ void scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(

 void cuda_propagate_single_carry_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array, void *carry_out, const void *carry_in, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks, uint32_t num_blocks,
-    uint32_t requested_flag, uint32_t uses_carry);
+    CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI *carry_out,
+    const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t requested_flag, uint32_t uses_carry);

 void cuda_add_and_propagate_single_carry_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lhs_array, const void *rhs_array, void *carry_out,
-    const void *carry_in, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    uint32_t num_blocks, uint32_t requested_flag, uint32_t uses_carry);
+    CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
+    CudaRadixCiphertextFFI *carry_out, const CudaRadixCiphertextFFI *carry_in,
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
+    uint32_t requested_flag, uint32_t uses_carry);

 void cleanup_cuda_propagate_single_carry(void *const *streams,
                                         uint32_t const *gpu_indexes,
@@ -329,9 +351,10 @@ void scratch_cuda_integer_overflowing_sub_kb_64_inplace(

 void cuda_integer_overflowing_sub_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lhs_array, const void *rhs_array, void *overflow_block,
-    const void *input_borrow, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks, uint32_t num_blocks, uint32_t compute_overflow,
+    CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
+    CudaRadixCiphertextFFI *overflow_block,
+    const CudaRadixCiphertextFFI *input_borrow, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks, uint32_t compute_overflow,
    uint32_t uses_input_borrow);

 void cleanup_cuda_integer_overflowing_sub(void *const *streams,
@@ -404,12 +427,13 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_radix_blocks,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory);
+    uint64_t lut_degree, bool allocate_gpu_memory);

 void cuda_integer_compute_prefix_sum_hillis_steele_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *output_radix_lwe, void *generates_or_propagates, int8_t *mem_ptr,
-    void *const *ksks, void *const *bsks, uint32_t num_blocks, uint32_t shift);
+    CudaRadixCiphertextFFI *output_radix_lwe,
+    CudaRadixCiphertextFFI *generates_or_propagates, int8_t *mem_ptr,
+    void *const *ksks, void *const *bsks, uint32_t num_blocks);

 void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -417,9 +441,8 @@ void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(

 void cuda_integer_reverse_blocks_64_inplace(void *const *streams,
                                            uint32_t const *gpu_indexes,
-                                            uint32_t gpu_count, void *lwe_array,
-                                            uint32_t num_blocks,
-                                            uint32_t lwe_size);
+                                            uint32_t gpu_count,
+                                            CudaRadixCiphertextFFI *lwe_array);

 void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -432,13 +455,49 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(

 void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
-    void *const *ksks, uint32_t num_blocks);
+    CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
+    void *const *bsks, void *const *ksks);

 void cleanup_cuda_integer_abs_inplace(void *const *streams,
                                      uint32_t const *gpu_indexes,
                                      uint32_t gpu_count,
                                      int8_t **mem_ptr_void);

+void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
+    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
+    uint32_t grouping_factor, uint32_t num_radix_blocks,
+    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
+    bool allocate_gpu_memory);
+
+void cuda_integer_are_all_comparisons_block_true_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
+
+void cleanup_cuda_integer_are_all_comparisons_block_true(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr_void);
+
+void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
+    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
+    uint32_t grouping_factor, uint32_t num_radix_blocks,
+    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
+    bool allocate_gpu_memory);
+
+void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
+
+void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr_void);
+
 } // extern C
 #endif // CUDA_INTEGER_H
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
--- a/backends/tfhe-cuda-backend/cuda/include/integer/radix_ciphertext.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/radix_ciphertext.h
@@ -0,0 +1,8 @@
+#ifndef CUDA_RADIX_CIPHERTEXT_H
+#define CUDA_RADIX_CIPHERTEXT_H
+
+void release_radix_ciphertext(cudaStream_t const stream,
+                              uint32_t const gpu_index,
+                              CudaRadixCiphertextFFI *data);
+
+#endif
--- a/backends/tfhe-cuda-backend/cuda/include/keyswitch.h
+++ b/backends/tfhe-cuda-backend/cuda/include/keyswitch.h
@@ -21,8 +21,8 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(

 void scratch_packing_keyswitch_lwe_list_to_glwe_64(
    void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
-    bool allocate_gpu_memory);
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t num_lwes, bool allocate_gpu_memory);

 void cuda_packing_keyswitch_lwe_list_to_glwe_64(
    void *stream, uint32_t gpu_index, void *glwe_array_out,
--- a/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
+++ b/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
@@ -1,49 +1,52 @@
 #ifndef CUDA_LINALG_H_
 #define CUDA_LINALG_H_

+#include "integer/integer.h"
 #include <stdint.h>

 extern "C" {

-void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
-                                          void *lwe_array_out,
-                                          void const *lwe_array_in,
-                                          uint32_t input_lwe_dimension,
-                                          uint32_t input_lwe_ciphertext_count);
-void cuda_negate_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
-                                          void *lwe_array_out,
-                                          void const *lwe_array_in,
-                                          uint32_t input_lwe_dimension,
-                                          uint32_t input_lwe_ciphertext_count);
+void cuda_negate_lwe_ciphertext_vector_32(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in, const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
+void cuda_negate_lwe_ciphertext_vector_64(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in, const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
 void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
-                                       void *lwe_array_out,
-                                       void const *lwe_array_in_1,
-                                       void const *lwe_array_in_2,
-                                       uint32_t input_lwe_dimension,
-                                       uint32_t input_lwe_ciphertext_count);
+                                       CudaRadixCiphertextFFI *output,
+                                       CudaRadixCiphertextFFI const *input_1,
+                                       CudaRadixCiphertextFFI const *input_2);
 void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
-                                       void *lwe_array_out,
-                                       void const *lwe_array_in_1,
-                                       void const *lwe_array_in_2,
-                                       uint32_t input_lwe_dimension,
-                                       uint32_t input_lwe_ciphertext_count);
-
+                                       CudaRadixCiphertextFFI *output,
+                                       CudaRadixCiphertextFFI const *input_1,
+                                       CudaRadixCiphertextFFI const *input_2);
 void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *plaintext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
 void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *plaintext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
 void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *cleartext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
 void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *cleartext_array_in,
-    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_plaintext_64(
+    void *stream, uint32_t gpu_index, void *lwe_array_out,
+    void const *lwe_array_in, const uint64_t plaintext_in,
+    const uint32_t input_lwe_dimension,
+    const uint32_t input_lwe_ciphertext_count);
 }

 #endif // CUDA_LINALG_H_
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_multibit_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_multibit_utilities.h
@@ -5,12 +5,12 @@

 template <typename Torus>
 bool supports_distributed_shared_memory_on_multibit_programmable_bootstrap(
-    uint32_t polynomial_size);
+    uint32_t polynomial_size, int max_shared_memory);

 template <typename Torus>
 bool has_support_to_cuda_programmable_bootstrap_tbc_multi_bit(
    uint32_t num_samples, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t level_count);
+    uint32_t level_count, int max_shared_memory);

 #if CUDA_ARCH >= 900
 template <typename Torus>
@@ -114,6 +114,8 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
             uint32_t polynomial_size, uint32_t level_count,
             uint32_t input_lwe_ciphertext_count, uint32_t lwe_chunk_size,
             PBS_VARIANT pbs_variant, bool allocate_gpu_memory) {
+    cuda_set_device(gpu_index);
+
    this->pbs_variant = pbs_variant;
    this->lwe_chunk_size = lwe_chunk_size;
    auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_utilities.h
@@ -61,7 +61,7 @@ get_buffer_size_partial_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {

 template <typename Torus>
 bool supports_distributed_shared_memory_on_classic_programmable_bootstrap(
-    uint32_t polynomial_size);
+    uint32_t polynomial_size, int max_shared_memory);

 template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer;

@@ -77,10 +77,10 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
             uint32_t polynomial_size, uint32_t level_count,
             uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
             bool allocate_gpu_memory) {
-
+    cuda_set_device(gpu_index);
    this->pbs_variant = pbs_variant;

-    auto max_shared_memory = cuda_get_max_shared_memory(0);
+    auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);

    if (allocate_gpu_memory) {
      switch (pbs_variant) {
@@ -157,7 +157,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {

        bool supports_dsm =
            supports_distributed_shared_memory_on_classic_programmable_bootstrap<
-                Torus>(polynomial_size);
+                Torus>(polynomial_size, max_shared_memory);

        uint64_t full_sm =
            get_buffer_size_full_sm_programmable_bootstrap_tbc<Torus>(
@@ -218,8 +218,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
 template <typename Torus>
 uint64_t get_buffer_size_programmable_bootstrap_cg(
    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t input_lwe_ciphertext_count) {
-  int max_shared_memory = cuda_get_max_shared_memory(0);
+    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) {
  uint64_t full_sm =
      get_buffer_size_full_sm_programmable_bootstrap_cg<Torus>(polynomial_size);
  uint64_t partial_sm =
@@ -245,7 +244,8 @@ template <typename Torus>
 bool has_support_to_cuda_programmable_bootstrap_cg(uint32_t glwe_dimension,
                                                   uint32_t polynomial_size,
                                                   uint32_t level_count,
-                                                   uint32_t num_samples);
+                                                   uint32_t num_samples,
+                                                   int max_shared_memory);

 template <typename Torus>
 void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap_multibit.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap_multibit.h
@@ -8,7 +8,7 @@ extern "C" {

 bool has_support_to_cuda_programmable_bootstrap_cg_multi_bit(
    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t num_samples);
+    uint32_t num_samples, int max_shared_memory);

 void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64(
    void *stream, uint32_t gpu_index, void *dest, void const *src,
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
@@ -11,7 +11,7 @@ void cuda_convert_lwe_ciphertext_vector_to_gpu(cudaStream_t stream,
                                               uint32_t gpu_index, T *dest,
                                               T *src, uint32_t number_of_cts,
                                               uint32_t lwe_dimension) {
-  cudaSetDevice(gpu_index);
+  cuda_set_device(gpu_index);
  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
  cuda_memcpy_async_to_gpu(dest, src, size, stream, gpu_index);
 }
@@ -21,7 +21,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu(cudaStream_t stream,
                                               uint32_t gpu_index, T *dest,
                                               T *src, uint32_t number_of_cts,
                                               uint32_t lwe_dimension) {
-  cudaSetDevice(gpu_index);
+  cuda_set_device(gpu_index);
  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
  cuda_memcpy_async_to_cpu(dest, src, size, stream, gpu_index);
 }
@@ -55,7 +55,7 @@ __host__ void host_sample_extract(cudaStream_t stream, uint32_t gpu_index,
                                  Torus const *glwe_array_in,
                                  uint32_t const *nth_array, uint32_t num_nths,
                                  uint32_t glwe_dimension) {
-  cudaSetDevice(gpu_index);
+  cuda_set_device(gpu_index);

  dim3 grid(num_nths);
  dim3 thds(params::degree / params::opt);
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/fast_packing_keyswitch.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/fast_packing_keyswitch.cuh
@@ -0,0 +1,356 @@
+#ifndef CNCRT_FAST_KS_CUH
+#define CNCRT_FAST_KS_CUH
+
+#undef NDEBUG
+#include <assert.h>
+
+#include "device.h"
+#include "gadget.cuh"
+#include "helper_multi_gpu.h"
+#include "keyswitch.cuh"
+#include "polynomial/functions.cuh"
+#include "polynomial/polynomial_math.cuh"
+#include "torus.cuh"
+#include "utils/helper.cuh"
+#include "utils/kernel_dimensions.cuh"
+#include <thread>
+#include <vector>
+
+#define CEIL_DIV(M, N) ((M) + (N)-1) / (N)
+
+const int BLOCK_SIZE_GEMM = 64;
+const int THREADS_GEMM = 8;
+const int BLOCK_SIZE_DECOMP = 8;
+
+template <typename Torus> uint64_t get_shared_mem_size_tgemm() {
+  return BLOCK_SIZE_GEMM * THREADS_GEMM * 2 * sizeof(Torus);
+}
+
+// Initialize decomposition by performing rounding
+// and decomposing one level of an array of Torus LWEs. Only
+// decomposes the mask elements of the incoming LWEs.
+template <typename Torus, typename TorusVec>
+__global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,
+                                         uint32_t lwe_dimension,
+                                         uint32_t num_lwe, uint32_t base_log,
+                                         uint32_t level_count) {
+
+  // index of this LWE ct in the buffer
+  auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  // index of the LWE sample in the LWE ct
+  auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
+
+  if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
+    return;
+
+  // Input LWE array is [mask_0, .., mask_lwe_dim, message] and
+  // we only decompose the mask. Thus the stride for reading
+  // is lwe_dimension + 1, while for writing it is lwe_dimension
+  auto read_val_idx = lwe_idx * (lwe_dimension + 1) + lwe_sample_idx;
+  auto write_val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
+  auto write_state_idx =
+      num_lwe * lwe_dimension + lwe_idx * lwe_dimension + lwe_sample_idx;
+
+  Torus a_i = lwe_in[read_val_idx];
+
+  Torus state = init_decomposer_state(a_i, base_log, level_count);
+
+  Torus mod_b_mask = (1ll << base_log) - 1ll;
+  lwe_out[write_val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
+  synchronize_threads_in_block();
+  lwe_out[write_state_idx] = state;
+}
+
+// Continue decomposiion of an array of Torus elements in place. Supposes
+// that the array contains already decomposed elements and
+// computes the new decomposed level in place.
+template <typename Torus, typename TorusVec>
+__global__ void
+decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
+                                 uint32_t num_lwe, uint32_t base_log,
+                                 uint32_t level_count) {
+
+  // index of this LWE ct in the buffer
+  auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
+  // index of the LWE sample in the LWE ct
+  auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
+
+  if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
+    return;
+
+  auto val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
+  auto state_idx = num_lwe * lwe_dimension + val_idx;
+
+  Torus state = buffer_in[state_idx];
+  synchronize_threads_in_block();
+
+  Torus mod_b_mask = (1ll << base_log) - 1ll;
+
+  buffer_in[val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
+  synchronize_threads_in_block();
+  buffer_in[state_idx] = state;
+}
+
+// Multiply matrices A, B of size (M, K), (K, N) respectively
+// with K as the inner dimension.
+//
+// A block of threads processeds blocks of size (BLOCK_SIZE_GEMM,
+// BLOCK_SIZE_GEMM) splitting them in multiple tiles: (BLOCK_SIZE_GEMM,
+// THREADS_GEMM)-shaped tiles of values from A, and a (THREADS_GEMM,
+// BLOCK_SIZE_GEMM)-shaped tiles of values from B.
+//
+// This code is adapted by generalizing the 1d block-tiling
+// kernel from https://github.com/siboehm/SGEMM_CUDA
+// to any matrix dimension
+template <typename Torus, typename TorusVec>
+__global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
+                      int stride_B, Torus *C) {
+
+  const int BM = BLOCK_SIZE_GEMM;
+  const int BN = BLOCK_SIZE_GEMM;
+  const int BK = THREADS_GEMM;
+  const int TM = THREADS_GEMM;
+
+  const uint cRow = blockIdx.y;
+  const uint cCol = blockIdx.x;
+
+  const int threadCol = threadIdx.x % BN;
+  const int threadRow = threadIdx.x / BN;
+
+  // Allocate space for the current block tile in shared memory
+  __shared__ Torus As[BM * BK];
+  __shared__ Torus Bs[BK * BN];
+
+  // Initialize the pointers to the input blocks from A, B
+  // Tiles from these blocks are loaded to shared memory
+  A += cRow * BM * K;
+  B += cCol * BN;
+
+  // Each thread will handle multiple sub-blocks
+  const uint innerColA = threadIdx.x % BK;
+  const uint innerRowA = threadIdx.x / BK;
+  const uint innerColB = threadIdx.x % BN;
+  const uint innerRowB = threadIdx.x / BN;
+
+  // allocate thread-local cache for results in registerfile
+  Torus threadResults[TM] = {0};
+
+  auto row_A = cRow * BM + innerRowA;
+  auto col_B = cCol * BN + innerColB;
+
+  // For each thread, loop over block tiles
+  for (uint bkIdx = 0; bkIdx < K; bkIdx += BK) {
+    auto col_A = bkIdx + innerColA;
+    auto row_B = bkIdx + innerRowB;
+
+    if (row_A < M && col_A < K) {
+      As[innerRowA * BK + innerColA] = A[innerRowA * K + innerColA];
+    } else {
+      As[innerRowA * BK + innerColA] = 0;
+    }
+
+    if (col_B < N && row_B < K) {
+      Bs[innerRowB * BN + innerColB] = B[innerRowB * stride_B + innerColB];
+    } else {
+      Bs[innerRowB * BN + innerColB] = 0;
+    }
+    synchronize_threads_in_block();
+
+    // Advance blocktile for the next iteration of this loop
+    A += BK;
+    B += BK * stride_B;
+
+    // calculate per-thread results
+    for (uint dotIdx = 0; dotIdx < BK; ++dotIdx) {
+      // we make the dotproduct loop the outside loop, which facilitates
+      // reuse of the Bs entry, which we can cache in a tmp var.
+      Torus tmp = Bs[dotIdx * BN + threadCol];
+      for (uint resIdx = 0; resIdx < TM; ++resIdx) {
+        threadResults[resIdx] +=
+            As[(threadRow * TM + resIdx) * BK + dotIdx] * tmp;
+      }
+    }
+    synchronize_threads_in_block();
+  }
+
+  // Initialize the pointer to the output block of size (BLOCK_SIZE_GEMM,
+  // BLOCK_SIZE_GEMM)
+  C += cRow * BM * N + cCol * BN;
+
+  // write out the results
+  for (uint resIdx = 0; resIdx < TM; ++resIdx) {
+    int outRow = cRow * BM + threadRow * TM + resIdx;
+    int outCol = cCol * BN + threadCol;
+
+    if (outRow >= M)
+      continue;
+    if (outCol >= N)
+      continue;
+
+    C[(threadRow * TM + resIdx) * N + threadCol] += threadResults[resIdx];
+  }
+}
+
+// Finish the keyswitching operation and prepare GLWEs for accumulation.
+// 1. Finish the keyswitching computation partially performed with a GEMM:
+//  - negate the dot product between the GLWE and KSK polynomial
+//  - add the GLWE message for the N-th polynomial coeff in the message poly
+// 2. Rotate each of the GLWE . KSK poly dot products to
+//    prepare them for accumulation into a single GLWE
+template <typename Torus>
+__global__ void polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C(
+    Torus *in_glwe_buffer, Torus *out_glwe_buffer, Torus const *lwe_array,
+    uint32_t lwe_dimension, uint32_t num_glwes, uint32_t polynomial_size,
+    uint32_t glwe_dimension) {
+
+  uint32_t glwe_id = blockIdx.x * blockDim.x + threadIdx.x;
+  uint32_t degree = glwe_id; // lwe 0 rotate 0, lwe 1 rotate 1, .. , lwe
+                             // poly_size-1 rotate poly_size-1
+  uint32_t coeffIdx = blockIdx.y * blockDim.y + threadIdx.y;
+
+  if (glwe_id >= num_glwes)
+    return;
+  if (coeffIdx >= polynomial_size)
+    return;
+
+  auto in_poly =
+      in_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
+  auto out_result =
+      out_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
+  if (coeffIdx == 0) {
+    // Add the message value of the input LWE (`C`) to the N-th coefficient
+    // in the GLWE . KSK dot product
+
+    // The C is added to the first position of the last polynomial in the GLWE
+    // which has (glwe_dimension+1) polynomials
+    // The C value is extracted as the last value of the LWE ct. (of index
+    // glwe_id) the LWEs have (polynomial_size + 1) values
+    in_poly[polynomial_size * glwe_dimension] =
+        lwe_array[glwe_id * (lwe_dimension + 1) + lwe_dimension] -
+        in_poly[polynomial_size * glwe_dimension];
+
+    for (int gi = 1; gi < glwe_dimension; ++gi)
+      in_poly[coeffIdx + gi * polynomial_size] =
+          -in_poly[coeffIdx + gi * polynomial_size];
+
+  } else {
+    // Otherwise simply negate the input coefficient
+    for (int gi = 1; gi < glwe_dimension + 1; ++gi)
+      in_poly[coeffIdx + gi * polynomial_size] =
+          -in_poly[coeffIdx + gi * polynomial_size];
+  }
+  // Negate all the coefficients for rotation for the first poly
+  in_poly[coeffIdx] = -in_poly[coeffIdx];
+
+  // rotate the body
+  polynomial_accumulate_monic_monomial_mul<Torus>(
+      out_result, in_poly, degree, coeffIdx, polynomial_size, 1, true);
+  // rotate the mask too
+  for (int gi = 1; gi < glwe_dimension + 1; ++gi)
+    polynomial_accumulate_monic_monomial_mul<Torus>(
+        out_result + gi * polynomial_size, in_poly + gi * polynomial_size,
+        degree, coeffIdx, polynomial_size, 1, true);
+}
+
+template <typename Torus, typename TorusVec>
+__host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
+    cudaStream_t stream, uint32_t gpu_index, Torus *glwe_out,
+    Torus const *lwe_array_in, Torus const *fp_ksk_array, int8_t *fp_ks_buffer,
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t base_log, uint32_t level_count, uint32_t num_lwes) {
+
+  // Optimization of packing keyswitch when packing many LWEs
+
+  cuda_set_device(gpu_index);
+  check_cuda_error(cudaGetLastError());
+
+  int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
+
+  // The fast path of PKS uses the scratch buffer (d_mem) differently than the
+  // old path: it needs to store the decomposed masks in the first half of this
+  // buffer and the keyswitched GLWEs in the second half of the buffer. Thus the
+  // scratch buffer for the fast path must determine the half-size of the
+  // scratch buffer as the max between the size of the GLWE and the size of the
+  // LWE-mask times two (to keep both decomposition state and decomposed
+  // intermediate value)
+  int memory_unit = glwe_accumulator_size > lwe_dimension * 2
+                        ? glwe_accumulator_size
+                        : lwe_dimension * 2;
+
+  // ping pong the buffer between successive calls
+  // split the buffer in two parts of this size
+  auto d_mem_0 = (Torus *)fp_ks_buffer;
+  auto d_mem_1 = d_mem_0 + num_lwes * memory_unit;
+
+  // Set the scratch buffer to 0 as it is used to accumulate
+  // decomposition temporary results
+  cuda_memset_async(d_mem_1, 0, num_lwes * memory_unit * sizeof(Torus), stream,
+                    gpu_index);
+  check_cuda_error(cudaGetLastError());
+
+  // decompose LWEs
+  // don't decompose LWE body - the LWE has lwe_size + 1 elements. The last
+  // element, the body is ignored by rounding down the number of blocks assuming
+  // here that the LWE dimension is a multiple of the block size
+  dim3 grid_decomp(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
+                   CEIL_DIV(lwe_dimension, BLOCK_SIZE_DECOMP));
+  dim3 threads_decomp(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
+
+  // decompose first level
+  decompose_vectorize_init<Torus, TorusVec>
+      <<<grid_decomp, threads_decomp, 0, stream>>>(lwe_array_in, d_mem_0,
+                                                   lwe_dimension, num_lwes,
+                                                   base_log, level_count);
+  check_cuda_error(cudaGetLastError());
+
+  // gemm to ks the individual LWEs to GLWEs
+  dim3 grid_gemm(CEIL_DIV(glwe_accumulator_size, BLOCK_SIZE_GEMM),
+                 CEIL_DIV(num_lwes, BLOCK_SIZE_GEMM));
+  dim3 threads_gemm(BLOCK_SIZE_GEMM * THREADS_GEMM);
+
+  auto stride_KSK_buffer = glwe_accumulator_size * level_count;
+
+  uint32_t shared_mem_size = get_shared_mem_size_tgemm<Torus>();
+  tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
+      num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0, fp_ksk_array,
+      stride_KSK_buffer, d_mem_1);
+  check_cuda_error(cudaGetLastError());
+
+  auto ksk_block_size = glwe_accumulator_size;
+
+  for (int li = 1; li < level_count; ++li) {
+    decompose_vectorize_step_inplace<Torus, TorusVec>
+        <<<grid_decomp, threads_decomp, 0, stream>>>(
+            d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
+    check_cuda_error(cudaGetLastError());
+
+    tgemm<Torus, TorusVec>
+        <<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
+            num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
+            fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
+    check_cuda_error(cudaGetLastError());
+  }
+
+  // should we include the mask in the rotation ??
+  dim3 grid_rotate(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
+                   CEIL_DIV(polynomial_size, BLOCK_SIZE_DECOMP));
+  dim3 threads_rotate(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
+  // rotate the GLWEs
+  polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C<Torus>
+      <<<grid_rotate, threads_rotate, 0, stream>>>(
+          d_mem_1, d_mem_0, lwe_array_in, lwe_dimension, num_lwes,
+          polynomial_size, glwe_dimension);
+  check_cuda_error(cudaGetLastError());
+
+  dim3 grid_accumulate(
+      CEIL_DIV(polynomial_size * (glwe_dimension + 1), BLOCK_SIZE_DECOMP));
+  dim3 threads_accum(BLOCK_SIZE_DECOMP);
+
+  // accumulate to a single glwe
+  accumulate_glwes<Torus><<<grid_accumulate, threads_accum, 0, stream>>>(
+      glwe_out, d_mem_0, glwe_dimension, polynomial_size, num_lwes);
+
+  check_cuda_error(cudaGetLastError());
+}
+
+#endif
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
@@ -57,7 +57,7 @@ void batch_fft_ggsw_vector(cudaStream_t *streams, uint32_t *gpu_indexes,
  if (gpu_count != 1)
    PANIC("GPU error (batch_fft_ggsw_vector): multi-GPU execution is not "
          "supported yet.")
-  cudaSetDevice(gpu_indexes[0]);
+  cuda_set_device(gpu_indexes[0]);

  int shared_memory_size = sizeof(double) * polynomial_size;

--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
@@ -1,6 +1,8 @@
+#include "fast_packing_keyswitch.cuh"
 #include "keyswitch.cuh"
 #include "keyswitch.h"
 #include <cstdint>
+#include <stdio.h>

 /* Perform keyswitch on a batch of 32 bits input LWE ciphertexts.
 * Head out to the equivalent operation on 64 bits for more details.
@@ -53,15 +55,17 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(

 void scratch_packing_keyswitch_lwe_list_to_glwe_64(
    void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
-    bool allocate_gpu_memory) {
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t num_lwes, bool allocate_gpu_memory) {
  scratch_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
-      static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer,
+      static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer, lwe_dimension,
      glwe_dimension, polynomial_size, num_lwes, allocate_gpu_memory);
 }
+
 /* Perform functional packing keyswitch on a batch of 64 bits input LWE
 * ciphertexts.
 */
+
 void cuda_packing_keyswitch_lwe_list_to_glwe_64(
    void *stream, uint32_t gpu_index, void *glwe_array_out,
    void const *lwe_array_in, void const *fp_ksk_array, int8_t *fp_ks_buffer,
@@ -69,7 +73,7 @@ void cuda_packing_keyswitch_lwe_list_to_glwe_64(
    uint32_t output_polynomial_size, uint32_t base_log, uint32_t level_count,
    uint32_t num_lwes) {

-  host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
+  host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
      static_cast<cudaStream_t>(stream), gpu_index,
      static_cast<uint64_t *>(glwe_array_out),
      static_cast<const uint64_t *>(lwe_array_in),
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
@@ -45,19 +45,19 @@ keyswitch(Torus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
          const Torus *__restrict__ lwe_input_indexes,
          const Torus *__restrict__ ksk, uint32_t lwe_dimension_in,
          uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count) {
-  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  const int tid = threadIdx.x + blockIdx.y * blockDim.x;
  const int shmem_index = threadIdx.x + threadIdx.y * blockDim.x;

  extern __shared__ int8_t sharedmem[];
  Torus *lwe_acc_out = (Torus *)sharedmem;
  auto block_lwe_array_out = get_chunk(
-      lwe_array_out, lwe_output_indexes[blockIdx.y], lwe_dimension_out + 1);
+      lwe_array_out, lwe_output_indexes[blockIdx.x], lwe_dimension_out + 1);

  if (tid <= lwe_dimension_out) {

    Torus local_lwe_out = 0;
    auto block_lwe_array_in = get_chunk(
-        lwe_array_in, lwe_input_indexes[blockIdx.y], lwe_dimension_in + 1);
+        lwe_array_in, lwe_input_indexes[blockIdx.x], lwe_dimension_in + 1);

    if (tid == lwe_dimension_out && threadIdx.y == 0) {
      local_lwe_out = block_lwe_array_in[lwe_dimension_in];
@@ -105,16 +105,22 @@ __host__ void host_keyswitch_lwe_ciphertext_vector(
    uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
    uint32_t num_samples) {

-  cudaSetDevice(gpu_index);
+  cuda_set_device(gpu_index);

  constexpr int num_threads_y = 32;
-  int num_blocks, num_threads_x;
+  int num_blocks_per_sample, num_threads_x;

  getNumBlocksAndThreads2D(lwe_dimension_out + 1, 512, num_threads_y,
-                           num_blocks, num_threads_x);
+                           num_blocks_per_sample, num_threads_x);

  int shared_mem = sizeof(Torus) * num_threads_y * num_threads_x;
-  dim3 grid(num_blocks, num_samples, 1);
+  if (num_blocks_per_sample > 65536)
+    PANIC("Cuda error (Keyswith): number of blocks per sample is too large");
+
+  // In multiplication of large integers (512, 1024, 2048), the number of
+  // samples can be larger than 65536, so we need to set it in the first
+  // dimension of the grid
+  dim3 grid(num_samples, num_blocks_per_sample, 1);
  dim3 threads(num_threads_x, num_threads_y, 1);

  keyswitch<Torus><<<grid, threads, shared_mem, stream>>>(
@@ -158,16 +164,22 @@ void execute_keyswitch_async(cudaStream_t const *streams,
 template <typename Torus>
 __host__ void scratch_packing_keyswitch_lwe_list_to_glwe(
    cudaStream_t stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
-    bool allocate_gpu_memory) {
-  cudaSetDevice(gpu_index);
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t num_lwes, bool allocate_gpu_memory) {
+  cuda_set_device(gpu_index);

  int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;

-  if (allocate_gpu_memory)
+  // allocate at least LWE-mask times two: to keep both decomposition state and
+  // decomposed intermediate value
+  int memory_unit = glwe_accumulator_size > lwe_dimension * 2
+                        ? glwe_accumulator_size
+                        : lwe_dimension * 2;
+
+  if (allocate_gpu_memory) {
    *fp_ks_buffer = (int8_t *)cuda_malloc_async(
-        2 * num_lwes * glwe_accumulator_size * sizeof(Torus), stream,
-        gpu_index);
+        2 * num_lwes * memory_unit * sizeof(Torus), stream, gpu_index);
+  }
 }

 // public functional packing keyswitch for a single LWE ciphertext
@@ -217,43 +229,6 @@ __device__ void packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext(
  }
 }

-// public functional packing keyswitch for a batch of LWE ciphertexts
-//
-// Selects the input each thread is working on using the y-block index.
-//
-// Assumes there are (glwe_dimension+1) * polynomial_size threads split through
-// different thread blocks at the x-axis to work on that input.
-template <typename Torus>
-__global__ void packing_keyswitch_lwe_list_to_glwe(
-    Torus *glwe_array_out, Torus const *lwe_array_in, Torus const *fp_ksk,
-    uint32_t lwe_dimension_in, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
-    Torus *d_mem) {
-  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
-
-  const int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
-  const int lwe_size = (lwe_dimension_in + 1);
-
-  const int input_id = blockIdx.y;
-  const int degree = input_id;
-
-  // Select an input
-  auto lwe_in = lwe_array_in + input_id * lwe_size;
-  auto ks_glwe_out = d_mem + input_id * glwe_accumulator_size;
-  auto glwe_out = glwe_array_out + input_id * glwe_accumulator_size;
-  // KS LWE to GLWE
-  packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext<Torus>(
-      ks_glwe_out, lwe_in, fp_ksk, lwe_dimension_in, glwe_dimension,
-      polynomial_size, base_log, level_count);
-
-  // P * x ^degree
-  auto in_poly = ks_glwe_out + (tid / polynomial_size) * polynomial_size;
-  auto out_result = glwe_out + (tid / polynomial_size) * polynomial_size;
-  polynomial_accumulate_monic_monomial_mul<Torus>(out_result, in_poly, degree,
-                                                  tid % polynomial_size,
-                                                  polynomial_size, 1, true);
-}
-
 /// To-do: Rewrite this kernel for efficiency
 template <typename Torus>
 __global__ void accumulate_glwes(Torus *glwe_out, Torus *glwe_array_in,
@@ -271,42 +246,4 @@ __global__ void accumulate_glwes(Torus *glwe_out, Torus *glwe_array_in,
  }
 }

-template <typename Torus>
-__host__ void host_packing_keyswitch_lwe_list_to_glwe(
-    cudaStream_t stream, uint32_t gpu_index, Torus *glwe_out,
-    Torus const *lwe_array_in, Torus const *fp_ksk_array, int8_t *fp_ks_buffer,
-    uint32_t lwe_dimension_in, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
-    uint32_t num_lwes) {
-
-  if (num_lwes > polynomial_size)
-    PANIC("Cuda error: too many LWEs to pack. The number of LWEs should be "
-          "smaller than "
-          "polynomial_size.")
-
-  cudaSetDevice(gpu_index);
-  int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
-
-  int num_blocks = 0, num_threads = 0;
-  getNumBlocksAndThreads(glwe_accumulator_size, 128, num_blocks, num_threads);
-
-  dim3 grid(num_blocks, num_lwes);
-  dim3 threads(num_threads);
-
-  auto d_mem = (Torus *)fp_ks_buffer;
-  auto d_tmp_glwe_array_out = d_mem + num_lwes * glwe_accumulator_size;
-
-  // individually keyswitch each lwe
-  packing_keyswitch_lwe_list_to_glwe<Torus><<<grid, threads, 0, stream>>>(
-      d_tmp_glwe_array_out, lwe_array_in, fp_ksk_array, lwe_dimension_in,
-      glwe_dimension, polynomial_size, base_log, level_count, d_mem);
-  check_cuda_error(cudaGetLastError());
-
-  // accumulate to a single glwe
-  accumulate_glwes<Torus><<<num_blocks, threads, 0, stream>>>(
-      glwe_out, d_tmp_glwe_array_out, glwe_dimension, polynomial_size,
-      num_lwes);
-  check_cuda_error(cudaGetLastError());
-}
-
 #endif
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
@@ -110,7 +110,7 @@ template <typename Torus>
 __host__ void host_modulus_switch_inplace(cudaStream_t stream,
                                          uint32_t gpu_index, Torus *array,
                                          int size, uint32_t log_modulus) {
-  cudaSetDevice(gpu_index);
+  cuda_set_device(gpu_index);

  int num_threads = 0, num_blocks = 0;
  getNumBlocksAndThreads(size, 1024, num_blocks, num_threads);
--- a/backends/tfhe-cuda-backend/cuda/src/device.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/device.cu
@@ -2,8 +2,12 @@
 #include <cstdint>
 #include <cuda_runtime.h>

-cudaEvent_t cuda_create_event(uint32_t gpu_index) {
+void cuda_set_device(uint32_t gpu_index) {
  check_cuda_error(cudaSetDevice(gpu_index));
+}
+
+cudaEvent_t cuda_create_event(uint32_t gpu_index) {
+  cuda_set_device(gpu_index);
  cudaEvent_t event;
  check_cuda_error(cudaEventCreate(&event));
  return event;
@@ -11,24 +15,24 @@ cudaEvent_t cuda_create_event(uint32_t gpu_index) {

 void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
                       uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  check_cuda_error(cudaEventRecord(event, stream));
 }

 void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
                            uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  check_cuda_error(cudaStreamWaitEvent(stream, event, 0));
 }

 void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  check_cuda_error(cudaEventDestroy(event));
 }

 /// Unsafe function to create a CUDA stream, must check first that GPU exists
 cudaStream_t cuda_create_stream(uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  cudaStream_t stream;
  check_cuda_error(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
  return stream;
@@ -36,15 +40,22 @@ cudaStream_t cuda_create_stream(uint32_t gpu_index) {

 /// Unsafe function to destroy CUDA stream, must check first the GPU exists
 void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  check_cuda_error(cudaStreamDestroy(stream));
 }

 void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  check_cuda_error(cudaStreamSynchronize(stream));
 }

+void synchronize_streams(cudaStream_t const *streams,
+                         uint32_t const *gpu_indexes, uint32_t gpu_count) {
+  for (uint i = 0; i < gpu_count; i++) {
+    cuda_synchronize_stream(streams[i], gpu_indexes[i]);
+  }
+}
+
 // Determine if a CUDA device is available at runtime
 uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }

@@ -52,7 +63,7 @@ uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
 /// or if there's not enough memory. A safe wrapper around it must call
 /// cuda_check_valid_malloc() first
 void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  void *ptr;
  check_cuda_error(cudaMalloc((void **)&ptr, size));

@@ -63,7 +74,7 @@ void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
 /// asynchronously.
 void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
                        uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  void *ptr;

 #ifndef CUDART_VERSION
@@ -86,7 +97,7 @@ void *cuda_malloc_async(uint64_t size, cudaStream_t stream,

 /// Check that allocation is valid
 void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  size_t total_mem, free_mem;
  check_cuda_error(cudaMemGetInfo(&free_mem, &total_mem));
  if (size > free_mem) {
@@ -134,7 +145,7 @@ void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
    PANIC("Cuda error: invalid device pointer in async copy to GPU.")
  }

-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  check_cuda_error(
      cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream));
 }
@@ -154,7 +165,7 @@ void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
  if (attr_src.type != cudaMemoryTypeDevice) {
    PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
  }
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  if (attr_src.device == attr_dest.device) {
    check_cuda_error(
        cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice, stream));
@@ -179,7 +190,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,
  if (attr_src.type != cudaMemoryTypeDevice) {
    PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
  }
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  if (attr_src.device == attr_dest.device) {
    check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToDevice));
  } else {
@@ -190,7 +201,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,

 /// Synchronizes device
 void cuda_synchronize_device(uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  check_cuda_error(cudaDeviceSynchronize());
 }

@@ -203,7 +214,7 @@ void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
  if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
    PANIC("Cuda error: invalid dest device pointer in cuda memset.")
  }
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  check_cuda_error(cudaMemsetAsync(dest, val, size, stream));
 }

@@ -223,7 +234,7 @@ void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
    if (attr.type != cudaMemoryTypeDevice) {
      PANIC("Cuda error: invalid dest device pointer in cuda set value.")
    }
-    check_cuda_error(cudaSetDevice(gpu_index));
+    cuda_set_device(gpu_index);
    int block_size = 256;
    int num_blocks = (n + block_size - 1) / block_size;

@@ -253,7 +264,7 @@ void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
    PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
  }

-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  check_cuda_error(
      cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream));
 }
@@ -267,14 +278,14 @@ int cuda_get_number_of_gpus() {

 /// Drop a cuda array
 void cuda_drop(void *ptr, uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
  check_cuda_error(cudaFree(ptr));
 }

 /// Drop a cuda array asynchronously, if supported on the device
 void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index) {

-  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_set_device(gpu_index);
 #ifndef CUDART_VERSION
 #error CUDART_VERSION Undefined!
 #elif (CUDART_VERSION >= 11020)
--- a/backends/tfhe-cuda-backend/cuda/src/integer/abs.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/abs.cu
@@ -22,15 +22,14 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(

 void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
-    void *const *ksks, uint32_t num_blocks) {
+    CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
+    void *const *bsks, void *const *ksks) {

  auto mem = (int_abs_buffer<uint64_t> *)mem_ptr;

  host_integer_abs_kb<uint64_t>((cudaStream_t *)(streams), gpu_indexes,
-                                gpu_count, static_cast<uint64_t *>(ct), bsks,
-                                (uint64_t **)(ksks), mem, is_signed,
-                                num_blocks);
+                                gpu_count, ct, bsks, (uint64_t **)(ksks), mem,
+                                is_signed);
 }

 void cleanup_cuda_integer_abs_inplace(void *const *streams,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/abs.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/abs.cuh
@@ -2,15 +2,12 @@
 #define TFHE_RS_ABS_CUH

 #include "crypto/keyswitch.cuh"
-#include "device.h"
 #include "integer/bitwise_ops.cuh"
 #include "integer/comparison.cuh"
 #include "integer/integer.cuh"
-#include "integer/integer_utilities.h"
 #include "integer/negation.cuh"
 #include "integer/scalar_shifts.cuh"
-#include "linear_algebra.h"
-#include "pbs/programmable_bootstrap.h"
+#include "radix_ciphertext.cuh"
 #include "utils/helper.cuh"
 #include "utils/kernel_dimensions.cuh"
 #include <fstream>
@@ -32,16 +29,15 @@ __host__ void scratch_cuda_integer_abs_kb(
 }

 template <typename Torus>
-__host__ void
-host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
-                    uint32_t gpu_count, Torus *ct, void *const *bsks,
-                    uint64_t *const *ksks, int_abs_buffer<uint64_t> *mem_ptr,
-                    bool is_signed, uint32_t num_blocks) {
+__host__ void legacy_host_integer_abs_kb_async(
+    cudaStream_t const *streams, uint32_t const *gpu_indexes,
+    uint32_t gpu_count, Torus *ct, void *const *bsks, uint64_t *const *ksks,
+    int_abs_buffer<uint64_t> *mem_ptr, bool is_signed, uint32_t num_blocks) {
  if (!is_signed)
    return;

  auto radix_params = mem_ptr->params;
-  auto mask = mem_ptr->mask;
+  auto mask = (Torus *)(mem_ptr->mask->ptr);

  auto big_lwe_dimension = radix_params.big_lwe_dimension;
  auto big_lwe_size = big_lwe_dimension + 1;
@@ -52,20 +48,55 @@ host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
  cuda_memcpy_async_gpu_to_gpu(mask, ct, num_blocks * big_lwe_size_bytes,
                               streams[0], gpu_indexes[0]);

-  host_integer_radix_arithmetic_scalar_shift_kb_inplace(
+  legacy_host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
      streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
      mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks, num_blocks);
-  host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
-                       radix_params.big_lwe_dimension, num_blocks);
+  legacy_host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
+                              radix_params.big_lwe_dimension, num_blocks);

  uint32_t requested_flag = outputFlag::FLAG_NONE;
  uint32_t uses_carry = 0;
-  host_propagate_single_carry<Torus>(
+  legacy_host_propagate_single_carry<Torus>(
      streams, gpu_indexes, gpu_count, ct, nullptr, nullptr, mem_ptr->scp_mem,
      bsks, ksks, num_blocks, requested_flag, uses_carry);

-  host_integer_radix_bitop_kb(streams, gpu_indexes, gpu_count, ct, mask, ct,
-                              mem_ptr->bitxor_mem, bsks, ksks, num_blocks);
+  // legacy bitop
+  legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+      streams, gpu_indexes, gpu_count, ct, mask, ct, bsks, ksks, num_blocks,
+      mem_ptr->bitxor_mem->lut, mem_ptr->bitxor_mem->params.message_modulus);
+}
+
+template <typename Torus>
+__host__ void
+host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
+                    uint32_t gpu_count, CudaRadixCiphertextFFI *ct,
+                    void *const *bsks, uint64_t *const *ksks,
+                    int_abs_buffer<uint64_t> *mem_ptr, bool is_signed) {
+  if (!is_signed)
+    return;
+
+  auto mask = mem_ptr->mask;
+
+  uint32_t num_bits_in_ciphertext =
+      (31 - __builtin_clz(mem_ptr->params.message_modulus)) *
+      ct->num_radix_blocks;
+
+  copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], mask, ct);
+
+  host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
+      streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
+      mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks);
+  host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
+                       ct->num_radix_blocks);
+
+  uint32_t requested_flag = outputFlag::FLAG_NONE;
+  uint32_t uses_carry = 0;
+  host_propagate_single_carry<Torus>(streams, gpu_indexes, gpu_count, ct,
+                                     nullptr, nullptr, mem_ptr->scp_mem, bsks,
+                                     ksks, requested_flag, uses_carry);
+
+  host_integer_radix_bitop_kb<Torus>(streams, gpu_indexes, gpu_count, ct, mask,
+                                     ct, mem_ptr->bitxor_mem, bsks, ksks);
 }

 #endif // TFHE_RS_ABS_CUH
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
@@ -22,17 +22,15 @@ void scratch_cuda_integer_radix_bitop_kb_64(

 void cuda_bitop_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    uint32_t lwe_ciphertext_count) {
+    CudaRadixCiphertextFFI *lwe_array_out,
+    CudaRadixCiphertextFFI const *lwe_array_1,
+    CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks) {

  host_integer_radix_bitop_kb<uint64_t>(
-      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
-      static_cast<uint64_t *>(lwe_array_out),
-      static_cast<const uint64_t *>(lwe_array_1),
-      static_cast<const uint64_t *>(lwe_array_2),
-      (int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
-      lwe_ciphertext_count);
+      (cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
+      lwe_array_1, lwe_array_2, (int_bitop_buffer<uint64_t> *)mem_ptr, bsks,
+      (uint64_t **)(ksks));
 }

 void cleanup_cuda_integer_bitop(void *const *streams,
@@ -43,3 +41,50 @@ void cleanup_cuda_integer_bitop(void *const *streams,
      (int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
  mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
 }
+
+void update_degrees_after_bitand(uint64_t *output_degrees,
+                                 uint64_t *lwe_array_1_degrees,
+                                 uint64_t *lwe_array_2_degrees,
+                                 uint32_t num_radix_blocks) {
+  for (uint i = 0; i < num_radix_blocks; i++) {
+    output_degrees[i] =
+        std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
+  }
+}
+
+void update_degrees_after_bitor(uint64_t *output_degrees,
+                                uint64_t *lwe_array_1_degrees,
+                                uint64_t *lwe_array_2_degrees,
+                                uint32_t num_radix_blocks) {
+  for (uint i = 0; i < num_radix_blocks; i++) {
+    auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
+    auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
+    auto result = max;
+
+    for (uint j = 0; j < min + 1; j++) {
+      if (max | j > result) {
+        result = max | j;
+      }
+    }
+    output_degrees[i] = result;
+  }
+}
+
+void update_degrees_after_bitxor(uint64_t *output_degrees,
+                                 uint64_t *lwe_array_1_degrees,
+                                 uint64_t *lwe_array_2_degrees,
+                                 uint32_t num_radix_blocks) {
+  for (uint i = 0; i < num_radix_blocks; i++) {
+    auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
+    auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
+    auto result = max;
+
+    // Try every possibility to find the worst case
+    for (uint j = 0; j < min + 1; j++) {
+      if (max ^ j > result) {
+        result = max ^ j;
+      }
+    }
+    output_degrees[i] = result;
+  }
+}
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
@@ -14,15 +14,34 @@
 template <typename Torus>
 __host__ void host_integer_radix_bitop_kb(
    cudaStream_t const *streams, uint32_t const *gpu_indexes,
-    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_1,
-    Torus const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
-    void *const *bsks, Torus *const *ksks, uint32_t num_radix_blocks) {
+    uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
+    CudaRadixCiphertextFFI const *lwe_array_1,
+    CudaRadixCiphertextFFI const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
+    void *const *bsks, Torus *const *ksks) {

  auto lut = mem_ptr->lut;
+  uint64_t degrees[lwe_array_1->num_radix_blocks];
+  if (mem_ptr->op == BITOP_TYPE::BITAND) {
+    update_degrees_after_bitand(degrees, lwe_array_1->degrees,
+                                lwe_array_2->degrees,
+                                lwe_array_1->num_radix_blocks);
+  } else if (mem_ptr->op == BITOP_TYPE::BITOR) {
+    update_degrees_after_bitor(degrees, lwe_array_1->degrees,
+                               lwe_array_2->degrees,
+                               lwe_array_1->num_radix_blocks);
+  } else if (mem_ptr->op == BITXOR) {
+    update_degrees_after_bitxor(degrees, lwe_array_1->degrees,
+                                lwe_array_2->degrees,
+                                lwe_array_1->num_radix_blocks);
+  }

  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_1, lwe_array_2,
-      bsks, ksks, num_radix_blocks, lut, lut->params.message_modulus);
+      bsks, ksks, lut, lwe_array_out->num_radix_blocks,
+      lut->params.message_modulus);
+
+  memcpy(lwe_array_out->degrees, degrees,
+         lwe_array_out->num_radix_blocks * sizeof(uint64_t));
 }

 template <typename Torus>
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
@@ -25,19 +25,16 @@ void scratch_cuda_integer_radix_cmux_kb_64(

 void cuda_cmux_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array_out, void const *lwe_condition, void const *lwe_array_true,
-    void const *lwe_array_false, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks, uint32_t lwe_ciphertext_count) {
+    CudaRadixCiphertextFFI *lwe_array_out,
+    CudaRadixCiphertextFFI const *lwe_condition,
+    CudaRadixCiphertextFFI const *lwe_array_true,
+    CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks) {

  host_integer_radix_cmux_kb<uint64_t>(
-      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
-      static_cast<uint64_t *>(lwe_array_out),
-      static_cast<const uint64_t *>(lwe_condition),
-      static_cast<const uint64_t *>(lwe_array_true),
-      static_cast<const uint64_t *>(lwe_array_false),
-      (int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
-
-      lwe_ciphertext_count);
+      (cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
+      lwe_condition, lwe_array_true, lwe_array_false,
+      (int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks));
 }

 void cleanup_cuda_integer_radix_cmux(void *const *streams,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
@@ -2,6 +2,7 @@
 #define CUDA_INTEGER_CMUX_CUH

 #include "integer.cuh"
+#include "radix_ciphertext.cuh"

 template <typename Torus>
 __host__ void zero_out_if(cudaStream_t const *streams,
@@ -11,25 +12,25 @@ __host__ void zero_out_if(cudaStream_t const *streams,
                          int_zero_out_if_buffer<Torus> *mem_ptr,
                          int_radix_lut<Torus> *predicate, void *const *bsks,
                          Torus *const *ksks, uint32_t num_radix_blocks) {
-  cudaSetDevice(gpu_indexes[0]);
+  cuda_set_device(gpu_indexes[0]);
  auto params = mem_ptr->params;

  // We can't use integer_radix_apply_bivariate_lookup_table_kb since the
  // second operand is not an array
  auto tmp_lwe_array_input = mem_ptr->tmp;
-  pack_bivariate_blocks_with_single_block<Torus>(
+  host_pack_bivariate_blocks_with_single_block<Torus>(
      streams, gpu_indexes, gpu_count, tmp_lwe_array_input,
      predicate->lwe_indexes_in, lwe_array_input, lwe_condition,
      predicate->lwe_indexes_in, params.big_lwe_dimension,
      params.message_modulus, num_radix_blocks);

-  integer_radix_apply_univariate_lookup_table_kb<Torus>(
+  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out, tmp_lwe_array_input, bsks,
      ksks, num_radix_blocks, predicate);
 }

 template <typename Torus>
-__host__ void host_integer_radix_cmux_kb(
+__host__ void legacy_host_integer_radix_cmux_kb(
    cudaStream_t const *streams, uint32_t const *gpu_indexes,
    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_condition,
    Torus const *lwe_array_true, Torus const *lwe_array_false,
@@ -37,45 +38,92 @@ __host__ void host_integer_radix_cmux_kb(
    uint32_t num_radix_blocks) {

  auto params = mem_ptr->params;
-
-  // Since our CPU threads will be working on different streams we shall assert
-  // the work in the main stream is completed
-  auto true_streams = mem_ptr->zero_if_true_buffer->true_streams;
-  auto false_streams = mem_ptr->zero_if_false_buffer->false_streams;
-  for (uint j = 0; j < gpu_count; j++) {
-    cuda_synchronize_stream(streams[j], gpu_indexes[j]);
-  }
-
-  auto mem_true = mem_ptr->zero_if_true_buffer;
-  zero_out_if<Torus>(true_streams, gpu_indexes, gpu_count, mem_ptr->tmp_true_ct,
-                     lwe_array_true, lwe_condition, mem_true,
-                     mem_ptr->inverted_predicate_lut, bsks, ksks,
-                     num_radix_blocks);
-  auto mem_false = mem_ptr->zero_if_false_buffer;
-  zero_out_if<Torus>(false_streams, gpu_indexes, gpu_count,
-                     mem_ptr->tmp_false_ct, lwe_array_false, lwe_condition,
-                     mem_false, mem_ptr->predicate_lut, bsks, ksks,
-                     num_radix_blocks);
-  for (uint j = 0; j < mem_ptr->zero_if_true_buffer->active_gpu_count; j++) {
-    cuda_synchronize_stream(true_streams[j], gpu_indexes[j]);
-  }
-  for (uint j = 0; j < mem_ptr->zero_if_false_buffer->active_gpu_count; j++) {
-    cuda_synchronize_stream(false_streams[j], gpu_indexes[j]);
+  Torus lwe_size = params.big_lwe_dimension + 1;
+  Torus radix_lwe_size = lwe_size * num_radix_blocks;
+  cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in->ptr, lwe_array_true,
+                               radix_lwe_size * sizeof(Torus), streams[0],
+                               gpu_indexes[0]);
+  cuda_memcpy_async_gpu_to_gpu(
+      (Torus *)(mem_ptr->buffer_in->ptr) + radix_lwe_size, lwe_array_false,
+      radix_lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
+  for (uint i = 0; i < 2 * num_radix_blocks; i++) {
+    cuda_memcpy_async_gpu_to_gpu(
+        (Torus *)(mem_ptr->condition_array->ptr) + i * lwe_size, lwe_condition,
+        lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
  }
+  legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+      streams, gpu_indexes, gpu_count, (Torus *)(mem_ptr->buffer_out->ptr),
+      (Torus *)(mem_ptr->buffer_in->ptr),
+      (Torus *)(mem_ptr->condition_array->ptr), bsks, ksks,
+      2 * num_radix_blocks, mem_ptr->predicate_lut, params.message_modulus);

  // If the condition was true, true_ct will have kept its value and false_ct
  // will be 0 If the condition was false, true_ct will be 0 and false_ct will
  // have kept its value
-  auto added_cts = mem_ptr->tmp_true_ct;
-  host_addition<Torus>(streams[0], gpu_indexes[0], added_cts,
-                       mem_ptr->tmp_true_ct, mem_ptr->tmp_false_ct,
-                       params.big_lwe_dimension, num_radix_blocks);
+  auto mem_true = (Torus *)(mem_ptr->buffer_out->ptr);
+  auto ptr = (Torus *)mem_ptr->buffer_out->ptr;
+  auto mem_false = &ptr[radix_lwe_size];
+  auto added_cts = mem_true;
+  legacy_host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
+                              mem_false, params.big_lwe_dimension,
+                              num_radix_blocks);

-  integer_radix_apply_univariate_lookup_table_kb<Torus>(
+  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
      num_radix_blocks, mem_ptr->message_extract_lut);
 }

+template <typename Torus>
+__host__ void host_integer_radix_cmux_kb(
+    cudaStream_t const *streams, uint32_t const *gpu_indexes,
+    uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
+    CudaRadixCiphertextFFI const *lwe_condition,
+    CudaRadixCiphertextFFI const *lwe_array_true,
+    CudaRadixCiphertextFFI const *lwe_array_false,
+    int_cmux_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks) {
+
+  if (lwe_array_out->num_radix_blocks != lwe_array_true->num_radix_blocks)
+    PANIC("Cuda error: input and output num radix blocks must be the same")
+  if (lwe_array_out->num_radix_blocks != lwe_array_false->num_radix_blocks)
+    PANIC("Cuda error: input and output num radix blocks must be the same")
+
+  auto num_radix_blocks = lwe_array_out->num_radix_blocks;
+  auto params = mem_ptr->params;
+  Torus lwe_size = params.big_lwe_dimension + 1;
+  copy_radix_ciphertext_slice_async<Torus>(
+      streams[0], gpu_indexes[0], mem_ptr->buffer_in, 0, num_radix_blocks,
+      lwe_array_true, 0, num_radix_blocks);
+  copy_radix_ciphertext_slice_async<Torus>(
+      streams[0], gpu_indexes[0], mem_ptr->buffer_in, num_radix_blocks,
+      2 * num_radix_blocks, lwe_array_false, 0, num_radix_blocks);
+  for (uint i = 0; i < 2 * num_radix_blocks; i++) {
+    copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
+                                             mem_ptr->condition_array, i, i + 1,
+                                             lwe_condition, 0, 1);
+  }
+  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+      streams, gpu_indexes, gpu_count, mem_ptr->buffer_out, mem_ptr->buffer_in,
+      mem_ptr->condition_array, bsks, ksks, mem_ptr->predicate_lut,
+      2 * num_radix_blocks, params.message_modulus);
+
+  // If the condition was true, true_ct will have kept its value and false_ct
+  // will be 0 If the condition was false, true_ct will be 0 and false_ct will
+  // have kept its value
+  CudaRadixCiphertextFFI mem_true;
+  CudaRadixCiphertextFFI mem_false;
+  as_radix_ciphertext_slice<Torus>(&mem_true, mem_ptr->buffer_out, 0,
+                                   num_radix_blocks);
+  as_radix_ciphertext_slice<Torus>(&mem_false, mem_ptr->buffer_out,
+                                   num_radix_blocks, 2 * num_radix_blocks);
+
+  host_addition<Torus>(streams[0], gpu_indexes[0], &mem_true, &mem_true,
+                       &mem_false, num_radix_blocks);
+
+  integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      streams, gpu_indexes, gpu_count, lwe_array_out, &mem_true, bsks, ksks,
+      mem_ptr->message_extract_lut, num_radix_blocks);
+}
+
 template <typename Torus>
 __host__ void scratch_cuda_integer_radix_cmux_kb(
    cudaStream_t const *streams, uint32_t const *gpu_indexes,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
@@ -58,6 +58,9 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
  case GE:
  case LT:
  case LE:
+    if (num_radix_blocks % 2 != 0)
+      PANIC("Cuda error (comparisons): the number of radix blocks has to be "
+            "even.")
    host_integer_radix_difference_check_kb<uint64_t>(
        (cudaStream_t *)(streams), gpu_indexes, gpu_count,
        static_cast<uint64_t *>(lwe_array_out),
@@ -68,6 +71,8 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
    break;
  case MAX:
  case MIN:
+    if (num_radix_blocks % 2 != 0)
+      PANIC("Cuda error (max/min): the number of radix blocks has to be even.")
    host_integer_radix_maxmin_kb<uint64_t>(
        (cudaStream_t *)(streams), gpu_indexes, gpu_count,
        static_cast<uint64_t *>(lwe_array_out),
@@ -89,3 +94,91 @@ void cleanup_cuda_integer_comparison(void *const *streams,
      (int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
  mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
 }
+
+void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
+    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
+    uint32_t grouping_factor, uint32_t num_radix_blocks,
+    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
+    bool allocate_gpu_memory) {
+
+  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
+                          big_lwe_dimension, small_lwe_dimension, ks_level,
+                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
+                          message_modulus, carry_modulus);
+
+  scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
+      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
+      (int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
+      false, allocate_gpu_memory);
+}
+
+void cuda_integer_are_all_comparisons_block_true_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
+
+  int_comparison_buffer<uint64_t> *buffer =
+      (int_comparison_buffer<uint64_t> *)mem_ptr;
+
+  host_integer_are_all_comparisons_block_true_kb<uint64_t>(
+      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
+      static_cast<uint64_t *>(lwe_array_out),
+      static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
+      (uint64_t **)(ksks), num_radix_blocks);
+}
+
+void cleanup_cuda_integer_are_all_comparisons_block_true(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr_void) {
+
+  int_comparison_buffer<uint64_t> *mem_ptr =
+      (int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
+  mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
+}
+
+void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
+    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
+    uint32_t grouping_factor, uint32_t num_radix_blocks,
+    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
+    bool allocate_gpu_memory) {
+
+  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
+                          big_lwe_dimension, small_lwe_dimension, ks_level,
+                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
+                          message_modulus, carry_modulus);
+
+  scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
+      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
+      (int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
+      false, allocate_gpu_memory);
+}
+
+void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
+
+  int_comparison_buffer<uint64_t> *buffer =
+      (int_comparison_buffer<uint64_t> *)mem_ptr;
+
+  host_integer_is_at_least_one_comparisons_block_true_kb<uint64_t>(
+      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
+      static_cast<uint64_t *>(lwe_array_out),
+      static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
+      (uint64_t **)(ksks), num_radix_blocks);
+}
+
+void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
+    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
+    int8_t **mem_ptr_void) {
+
+  int_comparison_buffer<uint64_t> *mem_ptr =
+      (int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
+  mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
+}
--- a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh
@@ -38,7 +38,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
                                    uint32_t lwe_dimension,
                                    uint32_t num_radix_blocks) {

-  cudaSetDevice(gpu_index);
+  cuda_set_device(gpu_index);
  int num_blocks = 0, num_threads = 0;
  int num_entries = (lwe_dimension + 1);
  getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
@@ -58,7 +58,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
 template <typename Torus>
 __host__ void are_all_comparisons_block_true(
    cudaStream_t const *streams, uint32_t const *gpu_indexes,
-    uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
+    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
    Torus *const *ksks, uint32_t num_radix_blocks) {

@@ -85,16 +85,19 @@ __host__ void are_all_comparisons_block_true(

  while (remaining_blocks > 0) {
    // Split in max_value chunks
-    uint32_t chunk_length = std::min(max_value, remaining_blocks);
-    int num_chunks = remaining_blocks / chunk_length;
+    int num_chunks = (remaining_blocks + max_value - 1) / max_value;

    // Since all blocks encrypt either 0 or 1, we can sum max_value of them
    // as in the worst case we will be adding `max_value` ones
    auto input_blocks = tmp_out;
    auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
-    auto is_equal_to_num_blocks_map =
-        &are_all_block_true_buffer->is_equal_to_lut_map;
+    auto is_max_value_lut = are_all_block_true_buffer->is_max_value;
+    uint32_t chunk_lengths[num_chunks];
+    auto begin_remaining_blocks = remaining_blocks;
    for (int i = 0; i < num_chunks; i++) {
+      uint32_t chunk_length =
+          std::min(max_value, begin_remaining_blocks - i * max_value);
+      chunk_lengths[i] = chunk_length;
      accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
                                   input_blocks, big_lwe_dimension,
                                   chunk_length);
@@ -111,40 +114,46 @@ __host__ void are_all_comparisons_block_true(
      // is_non_zero_lut_buffer LUT
      lut = mem_ptr->eq_buffer->is_non_zero_lut;
    } else {
-      if ((*is_equal_to_num_blocks_map).find(chunk_length) !=
-          (*is_equal_to_num_blocks_map).end()) {
-        // The LUT is already computed
-        lut = (*is_equal_to_num_blocks_map)[chunk_length];
-      } else {
+      if (chunk_lengths[num_chunks - 1] != max_value) {
        // LUT needs to be computed
-        auto new_lut =
-            new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
-                                     max_value, num_radix_blocks, true);
-
+        uint32_t chunk_length = chunk_lengths[num_chunks - 1];
        auto is_equal_to_num_blocks_lut_f = [chunk_length](Torus x) -> Torus {
          return x == chunk_length;
        };
        generate_device_accumulator<Torus>(
-            streams[0], gpu_indexes[0], new_lut->get_lut(0, 0), glwe_dimension,
+            streams[0], gpu_indexes[0], is_max_value_lut->get_lut(0, 1),
+            is_max_value_lut->get_degree(1),
+            is_max_value_lut->get_max_degree(1), glwe_dimension,
            polynomial_size, message_modulus, carry_modulus,
            is_equal_to_num_blocks_lut_f);

-        new_lut->broadcast_lut(streams, gpu_indexes, 0);
-
-        (*is_equal_to_num_blocks_map)[chunk_length] = new_lut;
-        lut = new_lut;
+        Torus *h_lut_indexes = (Torus *)malloc(num_chunks * sizeof(Torus));
+        for (int index = 0; index < num_chunks; index++) {
+          if (index == num_chunks - 1) {
+            h_lut_indexes[index] = 1;
+          } else {
+            h_lut_indexes[index] = 0;
+          }
+        }
+        cuda_memcpy_async_to_gpu(is_max_value_lut->get_lut_indexes(0, 0),
+                                 h_lut_indexes, num_chunks * sizeof(Torus),
+                                 streams[0], gpu_indexes[0]);
+        is_max_value_lut->broadcast_lut(streams, gpu_indexes, 0);
+        cuda_synchronize_stream(streams[0], gpu_indexes[0]);
+        free(h_lut_indexes);
      }
+      lut = is_max_value_lut;
    }

    // Applies the LUT
    if (remaining_blocks == 1) {
      // In the last iteration we copy the output to the final address
-      integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
          ksks, 1, lut);
      return;
    } else {
-      integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, tmp_out, accumulator, bsks, ksks,
          num_chunks, lut);
    }
@@ -160,7 +169,7 @@ __host__ void are_all_comparisons_block_true(
 template <typename Torus>
 __host__ void is_at_least_one_comparisons_block_true(
    cudaStream_t const *streams, uint32_t const *gpu_indexes,
-    uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
+    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
    Torus *const *ksks, uint32_t num_radix_blocks) {

@@ -182,14 +191,18 @@ __host__ void is_at_least_one_comparisons_block_true(
  uint32_t remaining_blocks = num_radix_blocks;
  while (remaining_blocks > 0) {
    // Split in max_value chunks
-    uint32_t chunk_length = std::min(max_value, remaining_blocks);
-    int num_chunks = remaining_blocks / chunk_length;
+    int num_chunks = (remaining_blocks + max_value - 1) / max_value;

    // Since all blocks encrypt either 0 or 1, we can sum max_value of them
    // as in the worst case we will be adding `max_value` ones
    auto input_blocks = mem_ptr->tmp_lwe_array_out;
    auto accumulator = buffer->tmp_block_accumulated;
+    uint32_t chunk_lengths[num_chunks];
+    auto begin_remaining_blocks = remaining_blocks;
    for (int i = 0; i < num_chunks; i++) {
+      uint32_t chunk_length =
+          std::min(max_value, begin_remaining_blocks - i * max_value);
+      chunk_lengths[i] = chunk_length;
      accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
                                   input_blocks, big_lwe_dimension,
                                   chunk_length);
@@ -206,12 +219,12 @@ __host__ void is_at_least_one_comparisons_block_true(
    // Applies the LUT
    if (remaining_blocks == 1) {
      // In the last iteration we copy the output to the final address
-      integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
          ksks, 1, lut);
      return;
    } else {
-      integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, mem_ptr->tmp_lwe_array_out,
          accumulator, bsks, ksks, num_chunks, lut);
    }
@@ -292,7 +305,7 @@ __host__ void host_compare_with_zero_equality(
    }
  }

-  integer_radix_apply_univariate_lookup_table_kb<Torus>(
+  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, sum, sum, bsks, ksks, num_sum_blocks,
      zero_comparison);
  are_all_comparisons_block_true<Torus>(streams, gpu_indexes, gpu_count,
@@ -311,7 +324,7 @@ __host__ void host_integer_radix_equality_check_kb(

  // Applies the LUT for the comparison operation
  auto comparisons = mem_ptr->tmp_block_comparisons;
-  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+  legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, comparisons, lwe_array_1, lwe_array_2,
      bsks, ksks, num_radix_blocks, eq_buffer->operator_lut,
      eq_buffer->operator_lut->params.message_modulus);
@@ -358,14 +371,14 @@ __host__ void compare_radix_blocks_kb(

  // Apply LUT to compare to 0
  auto is_non_zero_lut = mem_ptr->eq_buffer->is_non_zero_lut;
-  integer_radix_apply_univariate_lookup_table_kb<Torus>(
+  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_out, bsks, ksks,
      num_radix_blocks, is_non_zero_lut);

  // Add one
  // Here Lhs can have the following values: (-1) % (message modulus * carry
  // modulus), 0, 1 So the output values after the addition will be: 0, 1, 2
-  host_integer_radix_add_scalar_one_inplace<Torus>(
+  legacy_host_integer_radix_add_scalar_one_inplace<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out, big_lwe_dimension,
      num_radix_blocks, message_modulus, carry_modulus);
 }
@@ -409,7 +422,7 @@ __host__ void tree_sign_reduction(
    pack_blocks<Torus>(streams[0], gpu_indexes[0], y, x, big_lwe_dimension,
                       partial_block_count, 4);

-    integer_radix_apply_univariate_lookup_table_kb<Torus>(
+    legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
        streams, gpu_indexes, gpu_count, x, y, bsks, ksks,
        partial_block_count >> 1, inner_tree_leaf);

@@ -449,12 +462,13 @@ __host__ void tree_sign_reduction(
    f = sign_handler_f;
  }
  generate_device_accumulator<Torus>(
-      streams[0], gpu_indexes[0], last_lut->get_lut(0, 0), glwe_dimension,
+      streams[0], gpu_indexes[0], last_lut->get_lut(0, 0),
+      last_lut->get_degree(0), last_lut->get_max_degree(0), glwe_dimension,
      polynomial_size, message_modulus, carry_modulus, f);
  last_lut->broadcast_lut(streams, gpu_indexes, 0);

  // Last leaf
-  integer_radix_apply_univariate_lookup_table_kb<Torus>(
+  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out, y, bsks, ksks, 1,
      last_lut);
 }
@@ -481,8 +495,9 @@ __host__ void host_integer_radix_difference_check_kb(
  if (carry_modulus >= message_modulus) {
    // Packing is possible
    // Pack inputs
-    Torus *packed_left = diff_buffer->tmp_packed_left;
-    Torus *packed_right = diff_buffer->tmp_packed_right;
+    Torus *packed_left = diff_buffer->tmp_packed;
+    Torus *packed_right =
+        diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
    // In case the ciphertext is signed, the sign block and the one before it
    // are handled separately
    if (mem_ptr->is_signed) {
@@ -499,12 +514,9 @@ __host__ void host_integer_radix_difference_check_kb(

    // Clean noise
    auto identity_lut = mem_ptr->identity_lut;
-    integer_radix_apply_univariate_lookup_table_kb<Torus>(
+    legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
        streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
-        packed_num_radix_blocks, identity_lut);
-    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, gpu_indexes, gpu_count, packed_right, packed_right, bsks, ksks,
-        packed_num_radix_blocks, identity_lut);
+        2 * packed_num_radix_blocks, identity_lut);

    lhs = packed_left;
    rhs = packed_right;
@@ -533,16 +545,18 @@ __host__ void host_integer_radix_difference_check_kb(

      // Compare the last block before the sign block separately
      auto identity_lut = mem_ptr->identity_lut;
+      Torus *packed_left = diff_buffer->tmp_packed;
+      Torus *packed_right =
+          diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
      Torus *last_left_block_before_sign_block =
-          diff_buffer->tmp_packed_left + packed_num_radix_blocks * big_lwe_size;
+          packed_left + packed_num_radix_blocks * big_lwe_size;
      Torus *last_right_block_before_sign_block =
-          diff_buffer->tmp_packed_right +
-          packed_num_radix_blocks * big_lwe_size;
-      integer_radix_apply_univariate_lookup_table_kb<Torus>(
+          packed_right + packed_num_radix_blocks * big_lwe_size;
+      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
          lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
          identity_lut);
-      integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, last_right_block_before_sign_block,
          lwe_array_right + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks,
          1, identity_lut);
@@ -552,7 +566,7 @@ __host__ void host_integer_radix_difference_check_kb(
          last_left_block_before_sign_block, last_right_block_before_sign_block,
          mem_ptr, bsks, ksks, 1);
      // Compare the sign block separately
-      integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+      legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count,
          comparisons + (packed_num_radix_blocks + 1) * big_lwe_size,
          lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
@@ -565,7 +579,7 @@ __host__ void host_integer_radix_difference_check_kb(
          streams, gpu_indexes, gpu_count, comparisons, lwe_array_left,
          lwe_array_right, mem_ptr, bsks, ksks, num_radix_blocks - 1);
      // Compare the sign block separately
-      integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+      legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count,
          comparisons + (num_radix_blocks - 1) * big_lwe_size,
          lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
@@ -609,10 +623,41 @@ __host__ void host_integer_radix_maxmin_kb(
      ksks, total_num_radix_blocks);

  // Selector
-  host_integer_radix_cmux_kb<Torus>(
+  legacy_host_integer_radix_cmux_kb<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out,
      mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right,
      mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);
 }

+template <typename Torus>
+__host__ void host_integer_are_all_comparisons_block_true_kb(
+    cudaStream_t const *streams, uint32_t const *gpu_indexes,
+    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
+    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
+    Torus *const *ksks, uint32_t num_radix_blocks) {
+
+  auto eq_buffer = mem_ptr->eq_buffer;
+
+  // It returns a block encrypting 1 if all input blocks are 1
+  // otherwise the block encrypts 0
+  are_all_comparisons_block_true<Torus>(streams, gpu_indexes, gpu_count,
+                                        lwe_array_out, lwe_array_in, mem_ptr,
+                                        bsks, ksks, num_radix_blocks);
+}
+
+template <typename Torus>
+__host__ void host_integer_is_at_least_one_comparisons_block_true_kb(
+    cudaStream_t const *streams, uint32_t const *gpu_indexes,
+    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
+    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
+    Torus *const *ksks, uint32_t num_radix_blocks) {
+
+  auto eq_buffer = mem_ptr->eq_buffer;
+
+  // It returns a block encrypting 1 if all input blocks are 1
+  // otherwise the block encrypts 0
+  is_at_least_one_comparisons_block_true<Torus>(
+      streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_in, mem_ptr,
+      bsks, ksks, num_radix_blocks);
+}
 #endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh
@@ -2,6 +2,7 @@
 #define CUDA_INTEGER_COMPRESSION_CUH

 #include "ciphertext.h"
+#include "crypto/fast_packing_keyswitch.cuh"
 #include "crypto/keyswitch.cuh"
 #include "device.h"
 #include "integer/compression/compression.h"
@@ -49,7 +50,7 @@ __host__ void host_pack(cudaStream_t stream, uint32_t gpu_index,
  if (array_in == array_out)
    PANIC("Cuda error: Input and output must be different");

-  cudaSetDevice(gpu_index);
+  cuda_set_device(gpu_index);
  auto compression_params = mem_ptr->compression_params;

  auto log_modulus = mem_ptr->storage_log_modulus;
@@ -116,7 +117,7 @@ host_integer_compress(cudaStream_t const *streams, uint32_t const *gpu_indexes,
  while (rem_lwes > 0) {
    auto chunk_size = min(rem_lwes, mem_ptr->lwe_per_glwe);

-    host_packing_keyswitch_lwe_list_to_glwe<Torus>(
+    host_fast_packing_keyswitch_lwe_list_to_glwe<Torus, ulonglong4>(
        streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
        fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
        compression_params.polynomial_size, compression_params.ks_base_log,
@@ -184,7 +185,7 @@ __host__ void host_extract(cudaStream_t stream, uint32_t gpu_index,
  if (array_in == glwe_array_out)
    PANIC("Cuda error: Input and output must be different");

-  cudaSetDevice(gpu_index);
+  cuda_set_device(gpu_index);

  auto compression_params = mem_ptr->compression_params;

@@ -300,7 +301,7 @@ __host__ void host_integer_decompress(
  /// Apply PBS to apply a LUT, reduce the noise and go from a small LWE
  /// dimension to a big LWE dimension
  auto encryption_params = h_mem_ptr->encryption_params;
-  auto lut = h_mem_ptr->carry_extract_lut;
+  auto lut = h_mem_ptr->decompression_rescale_lut;
  auto active_gpu_count = get_active_gpu_count(num_radix_blocks, gpu_count);
  if (active_gpu_count == 1) {
    execute_pbs_async<Torus>(
--- a/Show More
+++ b/Show More