clippy bug no span for large array on stack

chore(ci): toolchain update
chore: update dependencies
2026-04-28 03:01:21 -04:00 · 2024-12-03 11:00:31 +01:00 · 2024-12-02 16:34:19 +01:00 · 2024-12-02 11:12:46 +01:00
1124 changed files with 23101 additions and 134608 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -8,14 +8,8 @@ root = true
 end_of_line = lf
 insert_final_newline = true

-# 4 space indentation for rust and toml
-[*.{rs,toml}]
+# 4 space indentation
+[*.rs]
 charset = utf-8
 indent_style = space
 indent_size = 4
-
-# 2 for c and js
-[*.{js,json,c,h}]
-charset = utf-8
-indent_style = space
-indent_size = 2
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@@ -5,7 +5,6 @@ self-hosted-runner:
    - 4090-desktop
    - large_windows_16_latest
    - large_ubuntu_16
-    - large_ubuntu_16-22.04
 # Configuration variables in array of strings defined in your repository or
 # organization. `null` means disabling configuration variables check.
 # Empty array means no configuration variable is allowed.
--- a/.github/actions/gpu_setup/action.yml
+++ b/.github/actions/gpu_setup/action.yml
@@ -1,63 +0,0 @@
-name: Setup Cuda
-description: Setup Cuda on Hyperstack or GitHub instance
-
-inputs:
-  cuda-version:
-    description: Version of Cuda to use
-    required: true
-  gcc-version:
-    description: Version of GCC to use
-    required: true
-  cmake-version:
-    description: Version of cmake to use
-    default: 3.29.6
-  github-instance:
-    description: Instance is hosted on GitHub
-    default: 'false'
-
-runs:
-  using: "composite"
-  steps:
-    # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-    - name: Install dependencies
-      shell: bash
-      run: |
-        sudo apt update
-        curl -fsSL https://apt.kitware.com/keys/kitware-archive-latest.asc | sudo gpg --dearmour -o /etc/apt/trusted.gpg.d/kitware.gpg
-        sudo chmod 644 /etc/apt/trusted.gpg.d/kitware.gpg
-        echo 'deb [signed-by=/etc/apt/trusted.gpg.d/kitware.gpg] https://apt.kitware.com/ubuntu/ jammy main' | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null
-        sudo apt update
-        sudo apt install -y cmake cmake-format libclang-dev
-
-    - name: Install CUDA
-      if: inputs.github-instance == 'true'
-      shell: bash
-      run: |
-        TOOLKIT_VERSION="$(echo ${{ inputs.cuda-version }} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
-        wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-        sudo dpkg -i cuda-keyring_1.1-1_all.deb
-        sudo apt update
-        sudo apt -y install cuda-toolkit-${TOOLKIT_VERSION}
-
-    - name: Export CUDA variables
-      shell: bash
-      run: |
-        CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
-        echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-        echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-        echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-        echo "CUDA_MODULE_LOADER=EAGER" >> "${GITHUB_ENV}"
-
-    # Specify the correct host compilers
-    - name: Export gcc and g++ variables
-      shell: bash
-      run: |
-        {
-          echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
-          echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
-          echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
-        } >> "${GITHUB_ENV}"
-
-    - name: Check device is detected
-      shell: bash
-      run: nvidia-smi
--- a/.github/workflows/aws_tfhe_backward_compat_tests.yml
+++ b/.github/workflows/aws_tfhe_backward_compat_tests.yml
@@ -11,10 +11,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -26,12 +22,11 @@ jobs:
    name: Setup instance (backward-compat-tests)
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -40,18 +35,11 @@ jobs:
          backend: aws
          profile: cpu-small

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  backward-compat-tests:
    name: Backward compatibility tests
    needs: [ setup-instance ]
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -59,10 +47,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -88,7 +76,7 @@ jobs:
        with:
          persist-credentials: 'false'
          repository: zama-ai/tfhe-backward-compat-data
-          path: tests/tfhe-backward-compat-data
+          path: tfhe/tfhe-backward-compat-data
          lfs: 'true'
          ref: ${{ steps.backward_compat_branch.outputs.branch }}

@@ -102,18 +90,17 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (backward-compat-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, backward-compat-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -127,4 +114,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -12,10 +12,6 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -26,7 +22,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
      zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
@@ -58,13 +54,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            dependencies:
              - tfhe/Cargo.toml
@@ -107,7 +103,7 @@ jobs:
            user_docs:
              - tfhe/src/**
              - '!tfhe/src/c_api/**'
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - README.md

      - name: Aggregate file changes
@@ -128,17 +124,16 @@ jobs:

  setup-instance:
    name: Setup instance (fast-tests)
-    if: github.event_name == 'workflow_dispatch' ||
-      (github.event_name != 'workflow_dispatch' && needs.should-run.outputs.any_file_changed == 'true')
+    if: github.event_name != 'pull_request' ||
+      needs.should-run.outputs.any_file_changed == 'true'
    needs: should-run
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -147,18 +142,13 @@ jobs:
          backend: aws
          profile: cpu-big

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  fast-tests:
    name: Fast CPU tests
+    if: github.event_name != 'pull_request' ||
+      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    needs: [ should-run, setup-instance ]
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -166,10 +156,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -209,7 +199,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
+        uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
        with:
          path: |
            ~/.nvm
@@ -222,7 +212,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
+        uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -265,23 +255,22 @@ jobs:
          make test_zk

      - name: Slack Notification
-        if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
+        if: ${{ failure() }}
        continue-on-error: true
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (fast-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, fast-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -295,4 +284,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -14,16 +14,12 @@ env:
  # nextest
  TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
  NO_BIG_PARAMS: FALSE
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [labeled]
  push:
    branches:
      - main
@@ -32,11 +28,12 @@ jobs:
  should-run:
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
      (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      integer_test: ${{ github.event_name == 'workflow_dispatch' ||
        steps.changed-files.outputs.integer_any_changed }}
@@ -45,13 +42,14 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: "false"

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            integer:
              - tfhe/Cargo.toml
@@ -61,7 +59,6 @@ jobs:
              - tfhe/src/core_crypto/**
              - tfhe/src/shortint/**
              - tfhe/src/integer/**
-              - .github/workflows/aws_tfhe_integer_tests.yml

  setup-instance:
    name: Setup instance (unsigned-integer-tests)
@@ -69,16 +66,15 @@ jobs:
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
-      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
+      (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -87,18 +83,11 @@ jobs:
          backend: aws
          profile: cpu-big

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  unsigned-integer-tests:
    name: Unsigned integer tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -106,10 +95,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: "false"
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -140,18 +129,17 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (unsigned-integer-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [setup-instance, unsigned-integer-tests]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -165,4 +153,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -14,16 +14,12 @@ env:
  # nextest
  TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
  NO_BIG_PARAMS: FALSE
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [labeled]
  push:
    branches:
      - main
@@ -37,7 +33,7 @@ jobs:
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      integer_test: ${{ github.event_name == 'workflow_dispatch' ||
        steps.changed-files.outputs.integer_any_changed }}
@@ -46,13 +42,14 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: "false"

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            integer:
              - tfhe/Cargo.toml
@@ -62,7 +59,6 @@ jobs:
              - tfhe/src/core_crypto/**
              - tfhe/src/shortint/**
              - tfhe/src/integer/**
-              - .github/workflows/aws_tfhe_signed_integer_tests.yml

  setup-instance:
    name: Setup instance (unsigned-integer-tests)
@@ -70,16 +66,15 @@ jobs:
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
-      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
+      (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -88,18 +83,11 @@ jobs:
          backend: aws
          profile: cpu-big

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  signed-integer-tests:
    name: Signed integer tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -107,10 +95,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: "false"
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -145,18 +133,17 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (signed-integer-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [setup-instance, signed-integer-tests]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -170,4 +157,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -11,10 +11,6 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -31,7 +27,7 @@ jobs:
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
      zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
@@ -67,13 +63,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            dependencies:
              - tfhe/Cargo.toml
@@ -115,7 +111,7 @@ jobs:
            user_docs:
              - tfhe/src/**
              - '!tfhe/src/c_api/**'
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - README.md

      - name: Aggregate file changes
@@ -142,12 +138,11 @@ jobs:
    needs: should-run
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -156,20 +151,13 @@ jobs:
          backend: aws
          profile: cpu-big

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cpu-tests:
    name: CPU tests
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    needs: [ should-run, setup-instance ]
    concurrency:
-      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -177,10 +165,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -252,18 +240,17 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cpu-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cpu-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -277,4 +264,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -10,10 +10,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -27,12 +23,11 @@ jobs:
    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -41,18 +36,11 @@ jobs:
          backend: aws
          profile: cpu-small

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  wasm-tests:
    name: WASM tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -60,10 +48,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -73,7 +61,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
+        uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
        with:
          path: |
            ~/.nvm
@@ -86,7 +74,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
+        uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -121,18 +109,17 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (wasm-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, wasm-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -146,4 +133,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_boolean.yml
+++ b/.github/workflows/benchmark_boolean.yml
@@ -29,7 +29,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -51,8 +51,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -63,7 +62,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

@@ -94,7 +93,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_boolean
          path: ${{ env.RESULTS_FILENAME }}
@@ -104,8 +103,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -123,13 +121,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (boolean-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, boolean-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_core_crypto.yml
+++ b/.github/workflows/benchmark_core_crypto.yml
@@ -3,9 +3,6 @@ name: Core crypto benchmarks

 on:
  workflow_dispatch:
-  schedule:
-    # Weekly benchmarks will be triggered each Saturday at 5a.m.
-    - cron: '0 5 * * 6'

 env:
  CARGO_TERM_COLOR: always
@@ -29,7 +26,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -50,8 +47,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -62,7 +58,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

@@ -85,7 +81,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
@@ -95,8 +91,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -114,13 +109,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (core-crypto-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, core-crypto-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_erc20.yml
+++ b/.github/workflows/benchmark_erc20.yml
@@ -29,7 +29,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -52,8 +52,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -64,7 +63,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

@@ -73,8 +72,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run benchmarks
        run: |
@@ -99,7 +97,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_erc20
          path: ${{ env.RESULTS_FILENAME }}
@@ -120,13 +118,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (erc20-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, erc20-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_gpu_4090.yml
+++ b/.github/workflows/benchmark_gpu_4090.yml
@@ -17,7 +17,7 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [labeled]
  schedule:
    # Weekly benchmarks will be triggered each Friday at 9p.m.
    - cron: "0 21 * * 5"
@@ -33,13 +33,16 @@ jobs:
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ["self-hosted", "4090-desktop"]
    timeout-minutes: 1440 # 24 hours
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -51,7 +54,7 @@ jobs:
          echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

@@ -60,8 +63,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run integer benchmarks
        run: |
@@ -80,7 +82,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_integer_multi_bit_gpu_default
          path: ${{ env.RESULTS_FILENAME }}
@@ -97,7 +99,7 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  cuda-core-crypto-benchmarks:
    name: Cuda core crypto benchmarks  (RTX 4090)
@@ -114,8 +116,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -126,7 +127,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

@@ -135,8 +136,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run core crypto benchmarks
        run: |
@@ -157,7 +157,7 @@ jobs:
      

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
@@ -182,7 +182,7 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  remove_github_label:
    name: Remove 4090 bench label
--- a/.github/workflows/benchmark_gpu_core_crypto.yml
+++ b/.github/workflows/benchmark_gpu_core_crypto.yml
@@ -27,7 +27,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -48,19 +48,27 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-
-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -70,11 +78,37 @@ jobs:
            echo "COMMIT_HASH=$(git describe --tags --dirty)";
          } >> "${GITHUB_ENV}"

+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CUDA_PATH=$CUDA_PATH";
+            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
+            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
+          } >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
      - name: Run benchmarks with AVX512
        run: |
          make bench_pbs_gpu
@@ -94,7 +128,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
@@ -104,8 +138,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -128,13 +161,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-integer-full-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-core-crypto-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_gpu_erc20.yml
+++ b/.github/workflows/benchmark_gpu_erc20.yml
@@ -12,10 +12,7 @@ on:
          - "l40 (n3-L40x1)"
          - "single-h100 (n3-H100x1)"
          - "2-h100 (n3-H100x2)"
-          - "4-h100 (n3-H100x4)"
          - "multi-h100 (n3-H100x8)"
-          - "multi-h100-nvlink (n3-H100x8-NVLink)"
-          - "multi-h100-sxm5 (n3-H100x8-SXM5)"

 jobs:
  parse-inputs:
--- a/.github/workflows/benchmark_gpu_erc20_common.yml
+++ b/.github/workflows/benchmark_gpu_erc20_common.yml
@@ -14,7 +14,7 @@ on:
        type: string
        required: true
    secrets:
-      REPO_CHECKOUT_TOKEN:
+      FHE_ACTIONS_TOKEN:
        required: true
      SLAB_ACTION_TOKEN:
        required: true
@@ -54,7 +54,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -75,19 +75,27 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-
-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -97,11 +105,40 @@ jobs:
            echo "COMMIT_HASH=$(git describe --tags --dirty)";
          } >> "${GITHUB_ENV}"

+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CUDA_PATH=$CUDA_PATH";
+            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
+            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
+          } >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+          } >> "${GITHUB_ENV}"
+
+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run benchmarks
        run: |
          make bench_hlapi_erc20_gpu
@@ -120,9 +157,9 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
-          name: ${{ github.sha }}_erc20_${{ inputs.profile }}
+          name: ${{ github.sha }}_erc20
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
@@ -130,8 +167,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -154,13 +190,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-erc20-${{ inputs.profile }}-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_gpu_integer.yml
+++ b/.github/workflows/benchmark_gpu_integer.yml
@@ -15,7 +15,6 @@ on:
          - "4-h100 (n3-H100x4)"
          - "multi-h100 (n3-H100x8)"
          - "multi-h100-nvlink (n3-H100x8-NVLink)"
-          - "multi-h100-sxm5 (n3-H100x8-SXM5)"
          - "multi-a100-nvlink (n3-A100x8-NVLink)"
      command:
        description: "Benchmark command to run"
--- a/.github/workflows/benchmark_gpu_integer_common.yml
+++ b/.github/workflows/benchmark_gpu_integer_common.yml
@@ -26,7 +26,7 @@ on:
        type: boolean
        default: false
    secrets:
-      REPO_CHECKOUT_TOKEN:
+      FHE_ACTIONS_TOKEN:
        required: true
      SLAB_ACTION_TOKEN:
        required: true
@@ -118,7 +118,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -145,19 +145,27 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-
-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -167,11 +175,47 @@ jobs:
            echo "COMMIT_HASH=$(git describe --tags --dirty)";
          } >> "${GITHUB_ENV}"

+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CUDA_PATH=$CUDA_PATH";
+            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
+            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
+          } >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+          } >> "${GITHUB_ENV}"
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+
+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Should run benchmarks with all precisions
        if: inputs.all_precisions
        run: |
@@ -192,23 +236,14 @@ jobs:
          --commit-date "${{ env.COMMIT_DATE }}" \
          --bench-date "${{ env.BENCH_DATE }}" \
          --walk-subdirs \
-          --name-suffix avx512 \
-          --bench-type ${{ matrix.bench_type }}
+          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
-          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}
+          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
          path: ${{ env.RESULTS_FILENAME }}

-      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          repository: zama-ai/slab
-          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-
      - name: Send data to Slab
        shell: bash
        run: |
@@ -230,13 +265,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-${{ inputs.profile }}-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_integer.yml
+++ b/.github/workflows/benchmark_integer.yml
@@ -90,7 +90,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -119,8 +119,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -131,7 +130,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

@@ -140,8 +139,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Should run benchmarks with all precisions
        if: inputs.all_precisions
@@ -172,7 +170,7 @@ jobs:
          --bench-type ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -193,13 +191,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (integer-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, integer-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_shortint.yml
+++ b/.github/workflows/benchmark_shortint.yml
@@ -56,7 +56,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -82,8 +82,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -94,7 +93,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

@@ -103,8 +102,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run benchmarks with AVX512
        run: |
@@ -138,7 +136,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -159,13 +157,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (shortint-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, shortint-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_signed_integer.yml
+++ b/.github/workflows/benchmark_signed_integer.yml
@@ -90,7 +90,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -119,8 +119,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -131,7 +130,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

@@ -140,8 +139,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Should run benchmarks with all precisions
        if: inputs.all_precisions
@@ -166,7 +164,7 @@ jobs:
          --bench-type ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -187,13 +185,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (integer-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, signed-integer-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_tfhe_fft.yml
+++ b/.github/workflows/benchmark_tfhe_fft.yml
@@ -32,7 +32,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -84,7 +84,7 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_fft
          path: ${{ env.RESULTS_FILENAME }}
@@ -94,8 +94,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -127,7 +126,7 @@ jobs:
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_tfhe_ntt.yml
+++ b/.github/workflows/benchmark_tfhe_ntt.yml
@@ -32,7 +32,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -84,7 +84,7 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_ntt
          path: ${{ env.RESULTS_FILENAME }}
@@ -94,8 +94,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -127,7 +126,7 @@ jobs:
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_tfhe_zk_pok.yml
+++ b/.github/workflows/benchmark_tfhe_zk_pok.yml
@@ -3,14 +3,6 @@ name: tfhe-zk-pok benchmarks

 on:
  workflow_dispatch:
-    inputs:
-      bench_type:
-        description: "Benchmarks type"
-        type: choice
-        default: latency
-        options:
-          - latency
-          - throughput
  push:
    branches:
      - main
@@ -28,7 +20,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  BENCH_TYPE: ${{ inputs.bench_type || 'latency' }}

 jobs:
  should-run:
@@ -45,8 +36,9 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            zk_pok:
              - tfhe-zk-pok/**
@@ -66,7 +58,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -88,8 +80,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -100,7 +91,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

@@ -109,12 +100,11 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run benchmarks
        run: |
-          make BENCH_TYPE=${{ env.BENCH_TYPE }} bench_tfhe_zk_pok
+          make bench_tfhe_zk_pok

      - name: Parse results
        run: |
@@ -128,11 +118,10 @@ jobs:
          --commit-date "${{ env.COMMIT_DATE }}" \
          --bench-date "${{ env.BENCH_DATE }}" \
          --walk-subdirs \
-          --name-suffix avx512 \
-          --bench-type ${{ env.BENCH_TYPE }}
+          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_tfhe_zk_pok
          path: ${{ env.RESULTS_FILENAME }}
@@ -142,8 +131,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -161,13 +149,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (tfhe-zk-pok-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, tfhe-zk-pok-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_wasm_client.yml
+++ b/.github/workflows/benchmark_wasm_client.yml
@@ -28,7 +28,7 @@ jobs:
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
    steps:
@@ -36,13 +36,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            wasm_bench:
              - tfhe/Cargo.toml
@@ -65,7 +65,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -88,8 +88,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -100,7 +99,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

@@ -110,7 +109,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
+        uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
        with:
          path: |
            ~/.nvm
@@ -123,7 +122,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
+        uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -167,7 +166,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_wasm_${{ matrix.browser }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -177,8 +176,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -196,13 +194,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (wasm-client-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, wasm-client-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/benchmark_zk_pke.yml
+++ b/.github/workflows/benchmark_zk_pke.yml
@@ -43,13 +43,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            zk_pok:
              - tfhe/Cargo.toml
@@ -104,7 +104,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -130,8 +130,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -142,7 +141,7 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: nightly

@@ -151,8 +150,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run benchmarks with AVX512
        run: |
@@ -179,7 +177,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
        with:
          name: ${{ github.sha }}_integer_zk
          path: ${{ env.RESULTS_FILENAME }}
@@ -189,8 +187,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -208,13 +205,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (pke-zk-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, pke-zk-benchmarks ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/cargo_build.yml
+++ b/.github/workflows/cargo_build.yml
@@ -28,7 +28,7 @@ jobs:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

--- a/.github/workflows/cargo_build_tfhe_ntt.yml
+++ b/.github/workflows/cargo_build_tfhe_ntt.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  cargo-builds-ntt:
+  cargo-builds:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
--- a/.github/workflows/cargo_test_fft.yml
+++ b/.github/workflows/cargo_test_fft.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  cargo-tests-fft:
+  cargo-tests:
    runs-on: ${{ matrix.runner_type }}
    strategy:
      matrix:
@@ -38,7 +38,7 @@ jobs:
        run: |
          make test_fft_no_std

-  cargo-tests-fft-nightly:
+  cargo-tests-nightly:
    runs-on: ${{ matrix.runner_type }}
    strategy:
      matrix:
@@ -60,7 +60,7 @@ jobs:
        run: |
          make test_fft_no_std_nightly

-  cargo-tests-fft-node-js:
+  cargo-tests-node-js:
    runs-on: "ubuntu-latest"
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
--- a/.github/workflows/cargo_test_ntt.yml
+++ b/.github/workflows/cargo_test_ntt.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  cargo-tests-ntt:
+  cargo-tests:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@@ -33,7 +33,7 @@ jobs:
      - name: Test no-std
        run: make test_ntt_no_std

-  cargo-tests-ntt-nightly:
+  cargo-tests-nightly:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
--- a/.github/workflows/check_commit.yml
+++ b/.github/workflows/check_commit.yml
@@ -2,7 +2,6 @@
 name: Check commit and PR compliance
 on:
  pull_request:
-
 jobs:
  check-commit-pr:
    name: Check commit and PR
--- a/.github/workflows/ci_lint.yml
+++ b/.github/workflows/ci_lint.yml
@@ -6,7 +6,6 @@ on:

 env:
  ACTIONLINT_VERSION: 1.6.27
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}

 jobs:
  lint-check:
@@ -15,9 +14,6 @@ jobs:
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Get actionlint
        run: |
@@ -31,8 +27,7 @@ jobs:
          make lint_workflow

      - name: Ensure SHA pinned actions
-        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@25ed13d0628a1601b4b44048e63cc4328ed03633 # v3.0.22
+        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@5d6ac37a4cef8b8df67f482a8e384987766f0213 # v3.0.17
        with:
          allowlist: |
            slsa-framework/slsa-github-generator
-            ./
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -25,7 +25,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -47,13 +47,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
          files_yaml: |
            tfhe:
@@ -83,7 +83,7 @@ jobs:
          make test_shortint_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
+        uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -97,7 +97,7 @@ jobs:
          make test_integer_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
+        uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -115,13 +115,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (code-coverage)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, code-coverage ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/csprng_randomness_tests.yml
+++ b/.github/workflows/csprng_randomness_tests.yml
@@ -10,10 +10,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -27,12 +23,11 @@ jobs:
    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -41,18 +36,11 @@ jobs:
          backend: aws
          profile: cpu-small

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  csprng-randomness-tests:
    name: CSPRNG randomness tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -60,10 +48,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -77,18 +65,17 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (csprng-randomness-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, csprng-randomness-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -102,4 +89,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_4090_tests.yml
+++ b/.github/workflows/gpu_4090_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an RTX 4090 machine
-name: Cuda - 4090 full tests
+name: TFHE Cuda Backend - 4090 full tests

 env:
  CARGO_TERM_COLOR: always
@@ -11,7 +11,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -29,7 +28,7 @@ jobs:
      contains(github.event.label.name, '4090_test') ||
      (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ["self-hosted", "4090-desktop"]

@@ -38,10 +37,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -81,4 +80,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_fast_h100_tests.yml
+++ b/.github/workflows/gpu_fast_h100_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
-name: Cuda - Fast tests on H100
+name: TFHE Cuda Backend - Fast tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -12,22 +12,18 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+      types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -35,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -55,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_fast_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -68,12 +64,11 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -82,20 +77,13 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA H100 tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -106,25 +94,60 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run core crypto and internal CUDA backend tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
@@ -151,22 +174,20 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -180,4 +201,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_fast_tests.yml
+++ b/.github/workflows/gpu_fast_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an AWS instance
-name: Cuda - Fast tests
+name: TFHE Cuda Backend - Fast tests

 env:
  CARGO_TERM_COLOR: always
@@ -12,10 +12,6 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -26,7 +22,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -34,13 +30,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -54,7 +50,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_fast_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -62,16 +58,15 @@ jobs:
  setup-instance:
    name: Setup instance (cuda-tests)
    needs: should-run
-    if: github.event_name == 'workflow_dispatch' ||
+    if: github.event_name != 'pull_request' ||
      needs.should-run.outputs.gpu_test == 'true'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -80,20 +75,13 @@ jobs:
          backend: hyperstack
          profile: gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -104,25 +92,60 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run core crypto and internal CUDA backend tests
        run: |
          make test_core_crypto_gpu
@@ -149,22 +172,20 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -178,4 +199,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_full_h100_tests.yml
+++ b/.github/workflows/gpu_full_h100_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
-name: Cuda - Full tests on H100
+name: TFHE Cuda Backend - Full tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -11,6 +11,7 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}

 on:
  workflow_dispatch:
@@ -24,7 +25,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -48,6 +49,9 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
      - name: Install dependencies
@@ -65,19 +69,40 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run core crypto, integer and internal CUDA backend tests
        run: |
          make test_gpu
@@ -105,7 +130,7 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
@@ -114,7 +139,7 @@ jobs:
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -128,4 +153,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_full_multi_gpu_tests.yml
+++ b/.github/workflows/gpu_full_multi_gpu_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an AWS instance
-name: Cuda - Full tests multi-GPU
+name: TFHE Cuda Backend - Full tests multi-GPU

 env:
  CARGO_TERM_COLOR: always
@@ -12,10 +12,6 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -27,7 +23,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -35,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -55,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/**_multi_gpu_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -68,12 +64,11 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -82,20 +77,13 @@ jobs:
          backend: hyperstack
          profile: multi-gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA multi-GPU tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -106,25 +94,60 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run multi-bit CUDA integer compression tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_integer_compression_gpu
@@ -154,22 +177,20 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests-multi-gpu)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -183,4 +204,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_integer_long_run_tests.yml
+++ b/.github/workflows/gpu_integer_long_run_tests.yml
@@ -1,4 +1,4 @@
-name: Long Run Tests on GPU
+name: AWS Long Run Tests on GPU

 env:
  CARGO_TERM_COLOR: always
@@ -15,8 +15,8 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  schedule:
-    # Weekly tests will be triggered each Friday at 9p.m.
-    - cron: "0 21 * * 5"
+    # Weekly tests will be triggered each Friday at 1a.m.
+    - cron: '0 1 * * FRI'

 jobs:
  setup-instance:
@@ -29,17 +29,17 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
          backend: hyperstack
-          profile: multi-gpu-test
+          profile: single-h100

  cuda-tests:
-    name: Long run GPU tests
+    name: Long run GPU H100 tests
    needs: [ setup-instance ]
    concurrency:
      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
@@ -53,22 +53,57 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    timeout-minutes: 4320 # 72 hours
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run tests
        run: |
          make test_integer_long_run_gpu
@@ -84,17 +119,17 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests.result }}
-          SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer GPU H100 long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (gpu-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/gpu_pcc.yml
+++ b/.github/workflows/gpu_pcc.yml
@@ -1,5 +1,5 @@
 # Perfom tfhe-cuda-backend post-commit checks on an AWS instance
-name: Cuda - Post-commit Checks
+name: TFHE Cuda Backend - Post-commit Checks

 env:
  CARGO_TERM_COLOR: always
@@ -11,10 +11,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16-22.04"

 on:
  pull_request:
@@ -24,12 +20,11 @@ jobs:
    name: Setup instance (cuda-pcc)
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -38,18 +33,11 @@ jobs:
          backend: aws
          profile: gpu-build

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-pcc:
    name: CUDA post-commit checks
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -68,20 +56,14 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

-      - name: Install CUDA
-        if: env.SECRETS_AVAILABLE == 'false'
-        shell: bash
+      - name: Set up home
        run: |
-          TOOLKIT_VERSION="$(echo ${{ matrix.cuda }} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
-          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-          sudo dpkg -i cuda-keyring_1.1-1_all.deb
-          sudo apt update
-          sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -101,6 +83,7 @@ jobs:
            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
          } >> "${GITHUB_ENV}"

      - name: Run fmt checks
@@ -112,23 +95,22 @@ jobs:
          make pcc_gpu

      - name: Slack Notification
-        if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
+        if: ${{ failure() }}
        continue-on-error: true
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-pcc)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-pcc ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -142,4 +124,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_signed_integer_classic_tests.yml
+++ b/.github/workflows/gpu_signed_integer_classic_tests.yml
@@ -1,5 +1,5 @@
 # Signed integer GPU tests on an RTXA6000 VM on hyperstack with classical PBS
-name: Cuda - Signed integer tests with classical PBS
+name: TFHE Cuda Backend - Signed integer tests with classical PBS

 env:
  CARGO_TERM_COLOR: always
@@ -12,22 +12,18 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+      types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -35,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -55,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_signed_integer_classic_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -68,12 +64,11 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -82,20 +77,13 @@ jobs:
          backend: hyperstack
          profile: gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA signed integer tests with classical PBS
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -106,25 +94,58 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
+
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run signed integer tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
@@ -137,22 +158,20 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-signed-classic-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -166,4 +185,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_signed_integer_h100_tests.yml
+++ b/.github/workflows/gpu_signed_integer_h100_tests.yml
@@ -1,5 +1,5 @@
 # Signed integer GPU tests on an H100 VM on hyperstack
-name: Cuda - Signed integer tests on H100
+name: TFHE Cuda Backend - Signed integer tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -12,23 +12,18 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
-
+      types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -36,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -56,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_signed_integer_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -69,12 +64,11 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -83,20 +77,13 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA H100 signed integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -107,25 +94,58 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
+
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run signed integer multi-bit tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
@@ -138,22 +158,20 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -167,4 +185,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_signed_integer_tests.yml
+++ b/.github/workflows/gpu_signed_integer_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend signed integer on an AWS instance
-name: Cuda - Signed integer tests
+name: TFHE Cuda Backend - Signed integer tests

 env:
  CARGO_TERM_COLOR: always
@@ -14,15 +14,14 @@ env:
  FAST_TESTS: TRUE
  NIGHTLY_TESTS: FALSE
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
+    types:
+      - opened
+      - synchronize
  schedule:
    # Nightly tests @ 1AM after each work day
    - cron: "0 1 * * MON-FRI"
@@ -31,7 +30,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -39,13 +38,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -59,10 +58,11 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_signed_integer_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
+
  setup-instance:
    name: Setup instance (cuda-signed-integer-tests)
    runs-on: ubuntu-latest
@@ -71,12 +71,11 @@ jobs:
      github.event_name == 'workflow_dispatch' ||
      needs.should-run.outputs.gpu_test == 'true'
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -85,20 +84,13 @@ jobs:
          backend: hyperstack
          profile: gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-signed-integer-tests:
    name: CUDA signed integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -109,25 +101,57 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
+
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
      - name: Should run nightly tests
        if: github.event_name == 'schedule'
        run: |
@@ -136,6 +160,10 @@ jobs:
            echo "NIGHTLY_TESTS=TRUE";
          } >> "${GITHUB_ENV}"

+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run signed integer multi-bit tests
        run: |
          make test_signed_integer_multi_bit_gpu_ci
@@ -151,18 +179,17 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-signed-integer-tests.result }}
-          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-signed-integer-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -176,4 +203,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_unsigned_integer_classic_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_classic_tests.yml
@@ -1,5 +1,5 @@
 # Test unsigned integers on an RTXA6000 VM on hyperstack with the classical PBS
-name: Cuda - Unsigned integer tests with classical PBS
+name: TFHE Cuda Backend - Unsigned integer tests with classical PBS

 env:
  CARGO_TERM_COLOR: always
@@ -12,23 +12,18 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
-
+      types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -36,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -56,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_unsigned_integer_classic_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -64,17 +59,16 @@ jobs:
  setup-instance:
    name: Setup instance (cuda-unsigned-classic-tests)
    needs: should-run
-    if: github.event_name == 'workflow_dispatch' ||
+    if: github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -83,20 +77,13 @@ jobs:
          backend: hyperstack
          profile: gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA unsigned integer tests with classical PBS
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -107,25 +94,58 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
+
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run unsigned integer tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
@@ -138,22 +158,20 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-unsigned-classic-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -167,4 +185,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_unsigned_integer_h100_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_h100_tests.yml
@@ -1,5 +1,5 @@
 # Test unsigned integers on an H100 VM on hyperstack
-name: Cuda - Unsigned integer tests on H100
+name: TFHE Cuda Backend - Unsigned integer tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -12,22 +12,18 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+      types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -35,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -55,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_unsigned_integer_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -63,17 +59,16 @@ jobs:
  setup-instance:
    name: Setup instance (cuda-h100-tests)
    needs: should-run
-    if: github.event_name == 'workflow_dispatch' ||
+    if: github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -82,20 +77,13 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA H100 unsigned integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -106,25 +94,58 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
+
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run unsigned integer multi-bit tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
@@ -137,22 +158,20 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -166,4 +185,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_unsigned_integer_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend unsigned integer on an AWS instance
-name: Cuda - Unsigned integer tests
+name: TFHE Cuda Backend - Unsigned integer tests

 env:
  CARGO_TERM_COLOR: always
@@ -13,16 +13,14 @@ env:
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  FAST_TESTS: TRUE
  NIGHTLY_TESTS: FALSE
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types:
+      - opened
+      - synchronize
  schedule:
    # Nightly tests @ 1AM after each work day
    - cron: "0 1 * * MON-FRI"
@@ -31,7 +29,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -39,13 +37,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -59,7 +57,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_unsigned_integer_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -72,12 +70,11 @@ jobs:
      needs.should-run.outputs.gpu_test == 'true'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -86,20 +83,13 @@ jobs:
          backend: hyperstack
          profile: gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-unsigned-integer-tests:
    name: CUDA unsigned integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -110,25 +100,54 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+      CMAKE_VERSION: 3.29.6
    steps:
+      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
+          cd cmake-${{ env.CMAKE_VERSION }}
+          ./bootstrap
+          make -j"$(nproc)"
+          sudo make install
+
+
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

-      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
-        with:
-          cuda-version: ${{ matrix.cuda }}
-          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          {
+            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
+            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
+          } >> "${GITHUB_ENV}"
+
      - name: Should run nightly tests
        if: github.event_name == 'schedule'
        run: |
@@ -137,6 +156,10 @@ jobs:
            echo "NIGHTLY_TESTS=TRUE";
          } >> "${GITHUB_ENV}"

+      - name: Check device is detected
+        if: ${{ !cancelled() }}
+        run: nvidia-smi
+
      - name: Run unsigned integer multi-bit tests
        run: |
          make test_unsigned_integer_multi_bit_gpu_ci
@@ -149,22 +172,20 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-unsigned-integer-tests.result }}
-          SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-unsigned-integer-tests ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -178,4 +199,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/integer_long_run_tests.yml
+++ b/.github/workflows/integer_long_run_tests.yml
@@ -15,8 +15,8 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  schedule:
-    # Weekly tests will be triggered each Friday at 9p.m.
-    - cron: "0 21 * * 5"
+    # Weekly tests will be triggered each Friday at 1a.m.
+    - cron: '0 1 * * FRI'

 jobs:
  setup-instance:
@@ -29,7 +29,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -45,16 +45,15 @@ jobs:
      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
-    timeout-minutes: 4320 # 72 hours
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -72,13 +71,13 @@ jobs:

  teardown-instance:
    name: Teardown instance (cpu-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cpu-tests ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/m1_tests.yml
+++ b/.github/workflows/m1_tests.yml
@@ -3,7 +3,7 @@ name: Tests on M1 CPU
 on:
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [labeled]
  # Have a nightly build for M1 tests
  schedule:
    # * is a special character in YAML so you have to quote this string
@@ -21,17 +21,14 @@ env:
  # We clear the cache to reduce memory pressure because of the numerous processes of cargo
  # nextest
  TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}

 concurrency:
-  group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
+  group: ${{ github.workflow }}-${{ github.head_ref }}
  cancel-in-progress: true

 jobs:
  cargo-builds-m1:
-    if: ${{ (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs') ||
-      github.event_name == 'workflow_dispatch' ||
-      contains(github.event.label.name, 'm1_test') }}
+    if: ${{ (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'm1_test') }}
    runs-on: ["self-hosted", "m1mac"]
    # 12 hours, default is 6 hours, hopefully this is more than enough
    timeout-minutes: 720
@@ -40,10 +37,9 @@ jobs:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: "false"
-          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -195,8 +191,6 @@ jobs:
          SLACK_COLOR: ${{ needs.cargo-builds-m1.result }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }}. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-          MSG_MINIMAL: event,action url,commit
-          BRANCH: ${{ github.ref }}
--- a/.github/workflows/make_release.yml
+++ b/.github/workflows/make_release.yml
@@ -43,15 +43,14 @@ jobs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
      - name: Prepare package
        run: |
          cargo package -p tfhe
-      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+      - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
        with:
          name: crate
          path: target/package/*.crate
@@ -78,17 +77,15 @@ jobs:
    name: Publish Release
    needs: [package] # for comparing hashes
    runs-on: ubuntu-latest
-    # For provenance of npmjs publish
    permissions:
      contents: read
      id-token: write
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
      - name: Create NPM version tag
        if: ${{ inputs.npm_latest_tag }}
        run: |
@@ -113,7 +110,7 @@ jobs:
      - name: Slack notification (hashes comparison)
        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: failure
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
@@ -158,7 +155,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_concrete_csprng.yml
+++ b/.github/workflows/make_release_concrete_csprng.yml
@@ -0,0 +1,49 @@
+name: Publish tfhe-csprng release
+
+on:
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: "Dry-run"
+        type: boolean
+        default: true
+
+env:
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  verify_tag:
+    uses: ./.github/workflows/verify_tagged_commit.yml
+    secrets:
+      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
+      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
+
+  publish_release:
+    name: Publish tfhe-csprng Release
+    needs: verify_tag
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+
+      - name: Publish crate.io package
+        env:
+          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
+        run: |
+          cargo publish -p tfhe-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "tfhe-csprng release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_cuda.yml
+++ b/.github/workflows/make_release_cuda.yml
@@ -1,3 +1,4 @@
+# Publish new release of tfhe-cuda-backend on crates.io.
 name: Publish CUDA release

 on:
@@ -7,6 +8,10 @@ on:
        description: "Dry-run"
        type: boolean
        default: true
+      push_to_crates:
+        description: "Push to crate"
+        type: boolean
+        default: true

 env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
@@ -31,7 +36,7 @@ jobs:
    steps:
      - name: Start instance
        id: start-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -40,12 +45,10 @@ jobs:
          backend: aws
          profile: gpu-build

-  package:
-    name: Package CUDA Release for provenance
+  publish-cuda-release:
+    name: Publish CUDA Release
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
    strategy:
      fail-fast: false
      # explicit include-based build matrix, of known valid options
@@ -58,76 +61,17 @@ jobs:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: "false"
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
-        with:
-          toolchain: stable
-
-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-cuda-backend
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-  provenance:
-    if: ${{ !inputs.dry_run  }}
-    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
-  publish-cuda-release:
-    name: Publish CUDA Release
-    needs: [setup-instance, package] # for comparing hashes
-    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
-    strategy:
-      fail-fast: false
-      # explicit include-based build matrix, of known valid options
-      matrix:
-        include:
-          - os: ubuntu-22.04
-            cuda: "12.2"
-            gcc: 9
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-    steps:
-      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
+        uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
        with:
          toolchain: stable

@@ -153,45 +97,30 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Publish crate.io package
+        if: ${{ inputs.push_to_crates }}
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          cargo publish -p tfhe-cuda-backend --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-cuda-backend crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (publish-release)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
-    needs: [setup-instance, publish-cuda-release]
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    needs: [ setup-instance, publish-cuda-release ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
--- a/.github/workflows/make_release_tfhe_csprng.yml
+++ b/.github/workflows/make_release_tfhe_csprng.yml
@@ -1,103 +0,0 @@
-name: Publish tfhe-csprng release
-
-on:
-  workflow_dispatch:
-    inputs:
-      dry_run:
-        description: "Dry-run"
-        type: boolean
-        default: true
-
-env:
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-
-jobs:
-  verify_tag:
-    uses: ./.github/workflows/verify_tagged_commit.yml
-    secrets:
-      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
-      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
-
-  package:
-    runs-on: ubuntu-latest
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-csprng
-      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
-        with:
-          name: crate-tfhe-csprng
-          path: target/package/*.crate
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-
-  provenance:
-    if: ${{ !inputs.dry_run  }}
-    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
-
-  publish_release:
-    name: Publish tfhe-csprng Release
-    needs: [verify_tag, package]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-      - name: Download artifact
-        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
-        with:
-          name: crate-tfhe-csprng
-          path: target/package
-      - name: Publish crate.io package
-        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
-          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
-        run: |
-          cargo publish -p tfhe-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-csprng - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "tfhe-csprng release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_tfhe_fft.yml
+++ b/.github/workflows/make_release_tfhe_fft.yml
@@ -19,53 +19,15 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

-  package:
-    runs-on: ubuntu-latest
-    needs: verify_tag
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-fft
-      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
-        with:
-          name: crate
-          path: target/package/*.crate
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-  provenance:
-    if: ${{ !inputs.dry_run  }}
-    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
  publish_release:
    name: Publish tfhe-fft Release
    runs-on: ubuntu-latest
-    needs: [verify_tag, package] # for comparing hashes
+    needs: verify_tag
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Publish crate.io package
        env:
@@ -74,26 +36,10 @@ jobs:
        run: |
          cargo publish -p tfhe-fft --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-fft crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_ntt.yml
+++ b/.github/workflows/make_release_tfhe_ntt.yml
@@ -19,50 +19,13 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

-  package:
-    runs-on: ubuntu-latest
-    needs: verify_tag
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-ntt
-      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
-        with:
-          name: crate
-          path: target/package/*.crate
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-  provenance:
-    if: ${{ !inputs.dry_run  }}
-    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
  publish_release:
    name: Publish tfhe-ntt Release
    runs-on: ubuntu-latest
-    needs: [verify_tag, package] # for comparing hashes
+    needs: verify_tag
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0

@@ -73,26 +36,10 @@ jobs:
        run: |
          cargo publish -p tfhe-ntt --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-ntt crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_versionable.yml
+++ b/.github/workflows/make_release_tfhe_versionable.yml
@@ -18,161 +18,35 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

-  package-derive:
-    runs-on: ubuntu-latest
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-versionable-derive
-      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
-        with:
-          name: crate-tfhe-versionable-derive
-          path: target/package/*.crate
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-  provenance-derive:
-    needs: [package-derive]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package-derive.outputs.hash }}
-
-  publish_release-derive:
-    name: Publish tfhe-versionable Release
-    needs: [verify_tag, package-derive] # for comparing hashes
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      - name: Download artifact
-        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
-        with:
-          name: crate-tfhe-versionable-derive
-          path: target/package
-      - name: Publish crate.io package
-        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
-        run: |
-          cargo publish -p tfhe-versionable-derive --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package-derive.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-versionable-derive - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "tfhe-versionable-derive release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
-  package:
-    runs-on: ubuntu-latest
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          fetch-depth: 0
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-versionable
-      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
-        with:
-          name: crate-tfhe-versionable
-          path: target/package/*.crate
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-  provenance:
-    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
  publish_release:
    name: Publish tfhe-versionable Release
-    needs: [package] # for comparing hashes
+    needs: verify_tag
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-      - name: Download artifact
-        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
-        with:
-          name: crate-tfhe-versionable
-          path: target/package
-      - name: Publish crate.io package
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+
+      - name: Publish proc-macro crate
+        env:
+          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
+        run: |
+          cargo publish -p tfhe-versionable-derive --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+
+      - name: Publish main crate
+        if: ${{ ! inputs.dry_run }}
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
        run: |
-          cargo publish -p tfhe-versionable --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-
-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-versionable - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+          cargo publish -p tfhe-versionable --token ${{ env.CRATES_TOKEN }}

      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_zk_pok.yml
+++ b/.github/workflows/make_release_zk_pok.yml
@@ -1,3 +1,4 @@
+# Publish new release of tfhe-zk-pok on crates.io.
 name: Publish tfhe-zk-pok release

 on:
@@ -12,40 +13,6 @@ env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}

 jobs:
-  package:
-      runs-on: ubuntu-latest
-      outputs:
-        hash: ${{ steps.hash.outputs.hash }}
-      steps:
-        - name: Checkout
-          uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-          with:
-            fetch-depth: 0
-        - name: Prepare package
-          run: |
-            cargo package -p tfhe-zk-pok
-        - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
-          with:
-            name: crate-zk-pok
-            path: target/package/*.crate
-        - name: generate hash
-          id: hash
-          run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-  provenance:
-    if: ${{ !inputs.dry_run  }}
-    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
  verify_tag:
    uses: ./.github/workflows/verify_tagged_commit.yml
    secrets:
@@ -54,44 +21,26 @@ jobs:

  publish_release:
    name: Publish tfhe-zk-pok Release
-    needs: [verify_tag, package] # for comparing hashes
+    needs: verify_tag
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      - name: Download artifact
-        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
-        with:
-          name: crate-zk-pok
-          path: target/package
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+
      - name: Publish crate.io package
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          cargo publish -p tfhe-zk-pok --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-      - name: Verify hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-zk-pok crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -14,7 +14,7 @@ on:

 jobs:
  params-curves-security-check:
-    runs-on: large_ubuntu_16-22.04
+    runs-on: large_ubuntu_16
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
--- a/.github/workflows/sync_on_push.yml
+++ b/.github/workflows/sync_on_push.yml
@@ -16,8 +16,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
      - name: git-sync
        uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
        with:
--- a/.gitignore
+++ b/.gitignore
@@ -32,8 +32,5 @@ web-test-runner/
 node_modules/
 package-lock.json

-# Python .env
-.env
-
 # Dir used for backward compatibility test data
-tests/tfhe-backward-compat-data/
+tfhe/tfhe-backward-compat-data/
--- a/.linelint.yml
+++ b/.linelint.yml
@@ -1,15 +1,11 @@
 ignore:
  - .git
  - target
-  - tfhe/build
-  - venv
-  - web-test-runner
  - tfhe/benchmarks_parameters
  - tfhe/web_wasm_parallel_tests/node_modules
  - tfhe/web_wasm_parallel_tests/dist
  - keys
  - coverage
-  - utils/tfhe-lints/ui/main.stderr

 rules:
  # checks if file ends in a newline character
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,24 +9,26 @@ members = [
    "apps/trivium",
    "tfhe-csprng",
    "backends/tfhe-cuda-backend",
-    "backends/tfhe-hpu-backend",
    "utils/tfhe-versionable",
    "utils/tfhe-versionable-derive",
-    "mockups/tfhe-hpu-mockup",
 ]

-exclude = ["tests/backward_compatibility_tests", "utils/tfhe-lints"]
+exclude = [
+    "tfhe/backward_compatibility_tests",
+    "utils/cargo-tfhe-lints-inner",
+    "utils/cargo-tfhe-lints"
+]
 [workspace.dependencies]
 aligned-vec = { version = "0.6", default-features = false }
 bytemuck = "1.14.3"
 dyn-stack = { version = "0.11", default-features = false }
-itertools = "0.14"
+itertools = "0.13"
 num-complex = "0.4"
-pulp = { version = "0.20", default-features = false }
+pulp = { version = "0.20.0", default-features = false }
 rand = "0.8"
 rayon = "1"
 serde = { version = "1.0", default-features = false }
-wasm-bindgen = "0.2.100"
+wasm-bindgen = ">=0.2.86,<0.2.94"

 [profile.bench]
 lto = "fat"
@@ -44,6 +46,3 @@ inherits = "dev"
 opt-level = 3
 lto = "off"
 debug-assertions = false
-
-[workspace.metadata.dylint]
-libraries = [{ path = "utils/tfhe-lints" }]
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 BSD 3-Clause Clear License

-Copyright © 2025 ZAMA.
+Copyright © 2024 ZAMA.
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,
--- a/317
+++ b/317
@@ -2,6 +2,7 @@ SHELL:=$(shell /usr/bin/env which bash)
 OS:=$(shell uname)
 RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
 CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
+TARGET_ARCH_FEATURE:=$(shell ./scripts/get_arch_feature.sh)
 CPU_COUNT=$(shell ./scripts/cpu_count.sh)
 RS_BUILD_TOOLCHAIN:=stable
 CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
@@ -20,7 +21,7 @@ BENCH_OP_FLAVOR?=DEFAULT
 BENCH_TYPE?=latency
 NODE_VERSION=22.6
 BACKWARD_COMPAT_DATA_URL=https://github.com/zama-ai/tfhe-backward-compat-data.git
-BACKWARD_COMPAT_DATA_BRANCH?=$(shell ./scripts/backward_compat_data_version.py)
+BACKWARD_COMPAT_DATA_BRANCH?=v0.4
 BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
 BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
 TFHE_SPEC:=tfhe
@@ -117,7 +118,7 @@ install_wasm_bindgen_cli: install_rs_build_toolchain
 .PHONY: install_wasm_pack # Install wasm-pack to build JS packages
 install_wasm_pack: install_rs_build_toolchain
 	@wasm-pack --version | grep "$(WASM_PACK_VERSION)" > /dev/null 2>&1 || \
-	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@$(WASM_PACK_VERSION) || \
+	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.1 || \
 	( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )

 .PHONY: install_node # Install last version of NodeJS via nvm
@@ -151,9 +152,10 @@ install_tarpaulin: install_rs_build_toolchain
 	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cargo-tarpaulin --locked || \
 	( echo "Unable to install cargo tarpaulin, unknown error." && exit 1 )

-.PHONY: install_cargo_dylint # Install custom tfhe-rs lints
-install_cargo_dylint:
-	cargo install cargo-dylint dylint-link
+.PHONY: install_tfhe_lints # Install custom tfhe-rs lints
+install_tfhe_lints:
+	(cd utils/cargo-tfhe-lints-inner && cargo install --path .) && \
+	cd utils/cargo-tfhe-lints && cargo install --path .

 .PHONY: install_typos_checker # Install typos checker
 install_typos_checker: install_rs_build_toolchain
@@ -242,8 +244,7 @@ fmt_js: check_nvm_installed
 	source ~/.nvm/nvm.sh && \
 	nvm install $(NODE_VERSION) && \
 	nvm use $(NODE_VERSION) && \
-	$(MAKE) -C tfhe/web_wasm_parallel_tests fmt && \
-	$(MAKE) -C tfhe/js_on_wasm_tests fmt
+	$(MAKE) -C tfhe/web_wasm_parallel_tests fmt

 .PHONY: fmt_gpu # Format rust and cuda code
 fmt_gpu: install_rs_check_toolchain
@@ -272,8 +273,7 @@ check_fmt_js: check_nvm_installed
 	source ~/.nvm/nvm.sh && \
 	nvm install $(NODE_VERSION) && \
 	nvm use $(NODE_VERSION) && \
-	$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt && \
-	$(MAKE) -C tfhe/js_on_wasm_tests check_fmt
+	$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt

 .PHONY: check_typos # Check for typos in codebase
 check_typos: install_typos_checker
@@ -282,14 +282,14 @@ check_typos: install_typos_checker
 .PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
 clippy_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=boolean,shortint,integer,internal-keycache,gpu \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
 		--all-targets \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: check_gpu # Run check on tfhe with "gpu" enabled
 check_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
-		--features=boolean,shortint,integer,internal-keycache,gpu \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
 		--all-targets \
 		-p $(TFHE_SPEC)

@@ -308,51 +308,52 @@ lint_workflow: check_actionlint_installed
 .PHONY: clippy_core # Run clippy lints on core_crypto with and without experimental features
 clippy_core: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
+		--features=$(TARGET_ARCH_FEATURE) \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=experimental \
+		--features=$(TARGET_ARCH_FEATURE),experimental \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=nightly-avx512 \
+		--features=$(TARGET_ARCH_FEATURE),nightly-avx512 \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=experimental,nightly-avx512 \
+		--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=zk-pok \
+		--features=$(TARGET_ARCH_FEATURE),zk-pok \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_boolean # Run clippy lints enabling the boolean features
 clippy_boolean: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=boolean \
+		--features=$(TARGET_ARCH_FEATURE),boolean \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_shortint # Run clippy lints enabling the shortint features
 clippy_shortint: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=shortint \
+		--features=$(TARGET_ARCH_FEATURE),shortint \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=shortint,experimental \
+		--features=$(TARGET_ARCH_FEATURE),shortint,experimental \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=zk-pok,shortint \
+		--features=$(TARGET_ARCH_FEATURE),zk-pok,shortint \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_integer # Run clippy lints enabling the integer features
 clippy_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=integer \
+		--features=$(TARGET_ARCH_FEATURE),integer \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=integer,experimental \
+		--features=$(TARGET_ARCH_FEATURE),integer,experimental \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy # Run clippy lints enabling the boolean, shortint, integer
 clippy: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		--features=boolean,shortint,integer \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_rustdoc # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
@@ -363,24 +364,13 @@ clippy_rustdoc: install_rs_check_toolchain
 	fi && \
 	CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
 		cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
-		--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental \
-		-p $(TFHE_SPEC)
-
-.PHONY: clippy_rustdoc_gpu # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
-clippy_rustdoc_gpu: install_rs_check_toolchain
-	if [[ "$(OS)" != "Linux" ]]; then \
-		echo "WARNING: skipped clippy_rustdoc_gpu, unsupported OS $(OS)"; \
-		exit 0; \
-	fi && \
-	CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
-		cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
-		--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok,pbs-stats,strings \
 		-p $(TFHE_SPEC)

 .PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
 clippy_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=boolean-c-api,shortint-c-api,high-level-c-api \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
@@ -405,16 +395,17 @@ clippy_trivium: install_rs_check_toolchain
 .PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
 clippy_all_targets: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
 clippy_tfhe_csprng: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		--features=parallel,software-prng -p tfhe-csprng -- --no-deps -D warnings
+		--features=$(TARGET_ARCH_FEATURE) \
+		-p tfhe-csprng -- --no-deps -D warnings

 .PHONY: clippy_zk_pok # Run clippy lints on tfhe-zk-pok
 clippy_zk_pok: install_rs_check_toolchain
@@ -428,15 +419,10 @@ clippy_versionable: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		-p tfhe-versionable -- --no-deps -D warnings

-.PHONY: clippy_tfhe_lints # Run clippy lints on tfhe-lints
-clippy_tfhe_lints: install_cargo_dylint # the toolchain is selected with toolchain.toml
-	cd utils/tfhe-lints && \
-	cargo clippy --all-targets -- --no-deps -D warnings
-
 .PHONY: clippy_all # Run all clippy targets
 clippy_all: clippy_rustdoc clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets \
 clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_tfhe_csprng clippy_zk_pok clippy_trivium \
-clippy_versionable clippy_tfhe_lints
+clippy_versionable

 .PHONY: clippy_fast # Run main clippy targets
 clippy_fast: clippy_rustdoc clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks \
@@ -452,73 +438,73 @@ check_rust_bindings_did_not_change:
 	cargo build -p tfhe-cuda-backend && "$(MAKE)" fmt_gpu && \
 	git diff --quiet HEAD -- backends/tfhe-cuda-backend/src/bindings.rs || \
 	( echo "Generated bindings have changed! Please run 'git add backends/tfhe-cuda-backend/src/bindings.rs' \
-	and commit the changes." && exit 1 )
+	and commit the changes." && exit 1 ) 


 .PHONY: tfhe_lints # Run custom tfhe-rs lints
-tfhe_lints: install_cargo_dylint
-	RUSTFLAGS="$(RUSTFLAGS)" cargo dylint --all -p tfhe --no-deps -- \
-		--features=boolean,shortint,integer,strings,zk-pok
+tfhe_lints: install_tfhe_lints
+	cd tfhe && RUSTFLAGS="$(RUSTFLAGS)" cargo tfhe-lints \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok -- -D warnings

 .PHONY: build_core # Build core_crypto without experimental features
 build_core: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		-p $(TFHE_SPEC)
+		--features=$(TARGET_ARCH_FEATURE) -p $(TFHE_SPEC)
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-			--features=nightly-avx512 -p $(TFHE_SPEC); \
+			--features=$(TARGET_ARCH_FEATURE),nightly-avx512 -p $(TFHE_SPEC); \
 	fi

 .PHONY: build_core_experimental # Build core_crypto with experimental features
 build_core_experimental: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=experimental -p $(TFHE_SPEC)
+		--features=$(TARGET_ARCH_FEATURE),experimental -p $(TFHE_SPEC)
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-			--features=experimental,nightly-avx512 -p $(TFHE_SPEC); \
+			--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 -p $(TFHE_SPEC); \
 	fi

 .PHONY: build_boolean # Build with boolean enabled
 build_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=boolean -p $(TFHE_SPEC) --all-targets
+		--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) --all-targets

 .PHONY: build_shortint # Build with shortint enabled
 build_shortint: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=shortint -p $(TFHE_SPEC) --all-targets
+		--features=$(TARGET_ARCH_FEATURE),shortint -p $(TFHE_SPEC) --all-targets

 .PHONY: build_integer # Build with integer enabled
 build_integer: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=integer -p $(TFHE_SPEC) --all-targets
+		--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) --all-targets

 .PHONY: build_tfhe_full # Build with boolean, shortint and integer enabled
 build_tfhe_full: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --all-targets

 .PHONY: build_tfhe_coverage # Build with test coverage enabled
 build_tfhe_coverage: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests

 .PHONY: build_c_api # Build the C API for boolean, shortint and integer
 build_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
 		-p $(TFHE_SPEC)

 .PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
 build_c_api_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
 		-p $(TFHE_SPEC)

 .PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
 build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
 		-p $(TFHE_SPEC)

 .PHONY: build_web_js_api # Build the js API targeting the web browser
@@ -532,11 +518,11 @@ build_web_js_api: install_rs_build_toolchain install_wasm_pack
 build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
 	cd tfhe && \
 	rustup component add rust-src --toolchain $(RS_CHECK_TOOLCHAIN) && \
-	RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory" rustup run $(RS_CHECK_TOOLCHAIN) \
+	RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory,+mutable-globals" rustup run $(RS_CHECK_TOOLCHAIN) \
 		wasm-pack build --release --target=web \
 		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api,zk-pok \
 		-Z build-std=panic_abort,std && \
-	find pkg/snippets -type f -iname workerHelpers.js -exec sed -i "s|const pkg = await import('..\/..\/..');|const pkg = await import('..\/..\/..\/tfhe.js');|" {} \;
+	find pkg/snippets -type f -iname workerHelpers.worker.js -exec sed -i "s|from '..\/..\/..\/';|from '..\/..\/..\/tfhe.js';|" {} \;
 	jq '.files += ["snippets"]' tfhe/pkg/package.json > tmp_pkg.json && mv -f tmp_pkg.json tfhe/pkg/package.json

 .PHONY: build_node_js_api # Build the js API targeting nodejs
@@ -549,15 +535,15 @@ build_node_js_api: install_rs_build_toolchain install_wasm_pack
 .PHONY: build_tfhe_csprng # Build tfhe_csprng
 build_tfhe_csprng: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		-p tfhe-csprng --all-targets
+		--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng --all-targets

 .PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
 test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
+		--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-			--features=experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
+			--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
 	fi

 .PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
@@ -565,13 +551,13 @@ test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain inst
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 		--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
 		--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
-		--features=experimental,internal-keycache \
+		--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache \
 		-p $(TFHE_SPEC) -- core_crypto::
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 			--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
 			--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
-			--features=experimental,internal-keycache,nightly-avx512 \
+			--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,nightly-avx512 \
 			-p $(TFHE_SPEC) -- -Z unstable-options --report-time core_crypto::; \
 	fi

@@ -589,38 +575,35 @@ test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
 .PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
 test_core_crypto_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
+		--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
+		--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::

 .PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
 test_integer_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
+		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
+		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::

-.PHONY: test_integer_long_run_gpu # Run the long run integer tests on the gpu backend
-test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
-	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
-	LONG_TESTS=TRUE \
-		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
-		--tfhe-package "$(TFHE_SPEC)" --backend "gpu"
+.PHONY: test_integer_long_run_gpu # Run the tests of the integer module including experimental on the gpu backend
+test_integer_long_run_gpu: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),integer,gpu,__long_run_tests -p $(TFHE_SPEC) -- integer::gpu::server_key::radix::tests_long_run --test-threads=6

 .PHONY: test_integer_compression
 test_integer_compression: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
+		--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
+		--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compress

 .PHONY: test_integer_compression_gpu
 test_integer_compression_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
+		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
+		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress

 .PHONY: test_integer_gpu_ci # Run the tests for integer ci on gpu backend
 test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -679,20 +662,20 @@ test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_n
 .PHONY: test_boolean # Run the tests of the boolean module
 test_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=boolean -p $(TFHE_SPEC) -- boolean::
+		--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) -- boolean::

 .PHONY: test_boolean_cov # Run the tests of the boolean module with code coverage
 test_boolean_cov: install_rs_check_toolchain install_tarpaulin
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 		--out xml --output-dir coverage/boolean --line --engine llvm --timeout 500 \
 		$(COVERAGE_EXCLUDED_FILES) \
-		--features=boolean,internal-keycache \
+		--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache \
 		-p $(TFHE_SPEC) -- -Z unstable-options --report-time boolean::

 .PHONY: test_c_api_rs # Run the rust tests for the C API
 test_c_api_rs: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=boolean-c-api,shortint-c-api,high-level-c-api \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
 		-p $(TFHE_SPEC) \
 		c_api

@@ -724,14 +707,14 @@ test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
 .PHONY: test_shortint # Run all the tests for shortint
 test_shortint: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
+		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::

 .PHONY: test_shortint_cov # Run the tests of the shortint module with code coverage
 test_shortint_cov: install_rs_check_toolchain install_tarpaulin
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 		--out xml --output-dir coverage/shortint --line --engine llvm --timeout 500 \
 		$(COVERAGE_EXCLUDED_FILES) \
-		--features=shortint,internal-keycache \
+		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
 		-p $(TFHE_SPEC) -- -Z unstable-options --report-time shortint::

 .PHONY: test_integer_ci # Run the tests for integer ci
@@ -788,28 +771,26 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
 		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
 		--signed-only --tfhe-package "$(TFHE_SPEC)"

-.PHONY: test_integer_long_run # Run the long run integer tests
-test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
-	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
-	LONG_TESTS=TRUE \
-		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
-		--tfhe-package "$(TFHE_SPEC)"
+.PHONY: test_integer_long_run # Run the long run tests for integer
+test_integer_long_run: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+						--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,__long_run_tests -p $(TFHE_SPEC) -- integer::server_key::radix_parallel::tests_long_run
+

 .PHONY: test_safe_serialization # Run the tests for safe serialization
 test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::

 .PHONY: test_zk # Run the tests for the zk module of the TFHE-rs crate
 test_zk: install_rs_build_toolchain install_cargo_nextest
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=shortint,zk-pok -p $(TFHE_SPEC) -- zk::
+		--features=$(TARGET_ARCH_FEATURE),shortint,zk-pok -p $(TFHE_SPEC) -- zk::

 .PHONY: test_integer # Run all the tests for integer
 test_integer: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=integer,internal-keycache -p $(TFHE_SPEC) -- integer::
+		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache -p $(TFHE_SPEC) -- integer::

 .PHONY: test_integer_cov # Run the tests of the integer module with code coverage
 test_integer_cov: install_rs_check_toolchain install_tarpaulin
@@ -817,44 +798,38 @@ test_integer_cov: install_rs_check_toolchain install_tarpaulin
 		--out xml --output-dir coverage/integer --line --engine llvm --timeout 500 \
 		--implicit-test-threads \
 		--exclude-files $(COVERAGE_EXCLUDED_FILES) \
-		--features=integer,internal-keycache \
+		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
 		-p $(TFHE_SPEC) -- -Z unstable-options --report-time integer::

 .PHONY: test_high_level_api # Run all the tests for high_level_api
 test_high_level_api: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=boolean,shortint,integer,internal-keycache,zk-pok,strings -p $(TFHE_SPEC) \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok -p $(TFHE_SPEC) \
 		-- high_level_api::

 test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
-		--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) \
+		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) \
 		-E "test(/high_level_api::.*gpu.*/)"

-test_high_level_api_hpu: install_rs_build_toolchain install_cargo_nextest
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
-		--features=integer,internal-keycache,hpu,hpu-xfer -p $(TFHE_SPEC) \
-		-E "test(/high_level_api::.*hpu.*/)"
-
-
 .PHONY: test_strings # Run the tests for strings ci
 test_strings: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=shortint,integer,strings -p $(TFHE_SPEC) \
+		--features=$(TARGET_ARCH_FEATURE),shortint,integer,strings -p $(TFHE_SPEC) \
 		-- strings::


 .PHONY: test_user_doc # Run tests from the .md documentation
 test_user_doc: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok,strings \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
 		-p $(TFHE_SPEC) \
 		-- test_user_docs::

 .PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
 test_user_doc_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
 		-- test_user_docs::


@@ -862,12 +837,14 @@ test_user_doc_gpu: install_rs_build_toolchain
 .PHONY: test_regex_engine # Run tests for regex_engine example
 test_regex_engine: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--example regex_engine --features=integer
+		--example regex_engine \
+		--features=$(TARGET_ARCH_FEATURE),integer

 .PHONY: test_sha256_bool # Run tests for sha256_bool example
 test_sha256_bool: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--example sha256_bool --features=boolean
+		--example sha256_bool \
+		--features=$(TARGET_ARCH_FEATURE),boolean

 .PHONY: test_examples # Run tests for examples
 test_examples: test_sha256_bool test_regex_engine
@@ -885,7 +862,7 @@ test_kreyvium: install_rs_build_toolchain
 .PHONY: test_tfhe_csprng # Run tfhe-csprng tests
 test_tfhe_csprng: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		-p tfhe-csprng
+		--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng

 .PHONY: test_zk_pok # Run tfhe-zk-pok tests
 test_zk_pok: install_rs_build_toolchain
@@ -903,28 +880,23 @@ test_zk_wasm_x86_compat_ci: check_nvm_installed
 test_zk_wasm_x86_compat: install_rs_build_toolchain build_node_js_api
 	cd tfhe/tests/zk_wasm_x86_test && npm install
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		-p tfhe --test zk_wasm_x86_test --features=integer,zk-pok
+		-p tfhe --test zk_wasm_x86_test --features=$(TARGET_ARCH_FEATURE),integer,zk-pok

 .PHONY: test_versionable # Run tests for tfhe-versionable subcrate
 test_versionable: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
 		--all-targets -p tfhe-versionable

-.PHONY: test_tfhe_lints # Run test on tfhe-lints
-test_tfhe_lints: install_cargo_dylint
-	cd utils/tfhe-lints && \
-	cargo test
-
 # The backward compat data repo holds historical binary data but also rust code to generate and load them.
 # Here we use the "patch" functionality of Cargo to make sure the repo used for the data is the same as the one used for the code.
 .PHONY: test_backward_compatibility_ci
 test_backward_compatibility_ci: install_rs_build_toolchain
 	TFHE_BACKWARD_COMPAT_DATA_DIR="$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tests/$(BACKWARD_COMPAT_DATA_DIR)\"" \
-		--features=shortint,integer,zk-pok -p tests test_backward_compatibility -- --nocapture
+		--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tfhe/$(BACKWARD_COMPAT_DATA_DIR)\"" \
+		--features=$(TARGET_ARCH_FEATURE),shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture

 .PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
-test_backward_compatibility: tests/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
+test_backward_compatibility: tfhe/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci

 .PHONY: backward_compat_branch # Prints the required backward compatibility branch
 backward_compat_branch:
@@ -936,7 +908,7 @@ doc: install_rs_check_toolchain
 	DOCS_RS=1 \
 	RUSTDOCFLAGS="--html-in-header katex-header.html" \
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
-		--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)

 .PHONY: docs # Build rust doc alias for doc
 docs: doc
@@ -947,7 +919,7 @@ lint_doc: install_rs_check_toolchain
 	DOCS_RS=1 \
 	RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
-		--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps

 .PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
 lint_docs: lint_doc
@@ -973,14 +945,10 @@ check_intra_md_links: install_mlc
 check_md_links: install_mlc
 	mlc --match-file-extension tfhe/docs

-.PHONY: check_parameter_export_ok # Checks exported "current" shortint parameter module is correct
-check_parameter_export_ok:
-	python3 ./scripts/check_current_param_export.py
-
 .PHONY: check_compile_tests # Build tests in debug without running them
 check_compile_tests: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
-		--features=experimental,boolean,shortint,integer,internal-keycache \
+		--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache \
 		-p $(TFHE_SPEC)

 	@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
@@ -991,7 +959,7 @@ check_compile_tests: install_rs_build_toolchain
 .PHONY: check_compile_tests_benches_gpu # Build tests in debug without running them
 check_compile_tests_benches_gpu: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
-		--features=experimental,boolean,shortint,integer,internal-keycache,gpu \
+		--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache,gpu \
 		-p $(TFHE_SPEC)
 	mkdir -p "$(TFHECUDA_BUILD)" && \
 		cd "$(TFHECUDA_BUILD)" && \
@@ -1070,42 +1038,42 @@ dieharder_csprng: install_dieharder build_tfhe_csprng
 .PHONY: print_doc_bench_parameters # Print parameters used in doc benchmarks
 print_doc_bench_parameters:
 	RUSTFLAGS="" cargo run --example print_doc_bench_parameters \
-	--features=shortint,internal-keycache -p tfhe
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe

 .PHONY: bench_integer # Run benchmarks for unsigned integer
 bench_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_signed_integer # Run benchmarks for signed integer
 bench_signed_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-signed-bench \
-	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
 bench_integer_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
 bench_integer_compression: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench	glwe_packing_compression-integer-bench \
-	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_integer_compression_gpu
 bench_integer_compression_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench	glwe_packing_compression-integer-bench \
-	--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) --

 .PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
 bench_integer_multi_bit: install_rs_check_toolchain
@@ -1113,7 +1081,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
 bench_signed_integer_multi_bit: install_rs_check_toolchain
@@ -1121,7 +1089,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-signed-bench \
-	--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
 bench_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1129,7 +1097,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
 bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1137,14 +1105,14 @@ bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
+	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned

 .PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
 bench_integer_zk: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench zk-pke-bench \
-	--features=integer,internal-keycache,zk-pok,nightly-avx512 \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,zk-pok,nightly-avx512 \
 	-p $(TFHE_SPEC) --

 .PHONY: bench_shortint # Run benchmarks for shortint
@@ -1152,14 +1120,14 @@ bench_shortint: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench shortint-bench \
-	--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_shortint_oprf # Run benchmarks for shortint
 bench_shortint_oprf: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench oprf-shortint-bench \
-	--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
 bench_shortint_multi_bit: install_rs_check_toolchain
@@ -1167,43 +1135,43 @@ bench_shortint_multi_bit: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench shortint-bench \
-	--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_boolean # Run benchmarks for boolean
 bench_boolean: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench boolean-bench \
-	--features=boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_pbs # Run benchmarks for PBS
 bench_pbs: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench pbs-bench \
-	--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_pbs128 # Run benchmarks for PBS using FFT 128 bits
 bench_pbs128: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench pbs128-bench \
-	--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
 bench_pbs_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_FAST_BENCH=$(FAST_BENCH) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench pbs-bench \
-	--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_ks # Run benchmarks for keyswitch
 bench_ks: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench ks-bench \
-	--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_ks_gpu # Run benchmarks for PBS on GPU backend
 bench_ks_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench ks-bench \
-	--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 bench_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
 bench_web_js_api_parallel_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
@@ -1239,13 +1207,13 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
 bench_hlapi_erc20: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-erc20 \
-	--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ECR20 operations on GPU
 bench_hlapi_erc20_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-erc20 \
-	--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
 bench_tfhe_zk_pok: install_rs_check_toolchain
@@ -1260,32 +1228,32 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
 gen_key_cache: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 		--example generates_test_keys \
-		--features=boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
 		-- $(MULTI_BIT_ONLY) $(COVERAGE_ONLY)

 .PHONY: gen_key_cache_core_crypto # Run function to generate keys and cache them for core_crypto tests
 gen_key_cache_core_crypto: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --tests --profile $(CARGO_PROFILE) \
-		--features=experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
+		--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
 		core_crypto::keycache::generate_keys

 .PHONY: measure_hlapi_compact_pk_ct_sizes # Measure sizes of public keys and ciphertext for high-level API
 measure_hlapi_compact_pk_ct_sizes: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example hlapi_compact_pk_ct_sizes \
-	--features=integer,internal-keycache
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache

 .PHONY: measure_shortint_key_sizes # Measure sizes of bootstrapping and key switching keys for shortint
 measure_shortint_key_sizes: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example shortint_key_sizes \
-	--features=shortint,internal-keycache
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache

 .PHONY: measure_boolean_key_sizes # Measure sizes of bootstrapping and key switching keys for boolean
 measure_boolean_key_sizes: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example boolean_key_sizes \
-	--features=boolean,internal-keycache
+	--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache

 .PHONY: parse_integer_benches # Run python parser to output a csv containing integer benches data
 parse_integer_benches:
@@ -1297,19 +1265,20 @@ parse_integer_benches:
 parse_wasm_benchmarks: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example wasm_benchmarks_parser \
-	--features=shortint,internal-keycache \
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
 	-- wasm_benchmark_results.json

 .PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
 write_params_to_file: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run \
-	--example write_params_to_file --features=boolean,shortint,internal-keycache
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+	--example write_params_to_file \
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache

 .PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
 clone_backward_compat_data:
-	./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tests/$(BACKWARD_COMPAT_DATA_DIR)
+	./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tfhe/$(BACKWARD_COMPAT_DATA_DIR)

-tests/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
+tfhe/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data

 #
 # Real use case examples
@@ -1318,36 +1287,36 @@ tests/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
 .PHONY: regex_engine # Run regex_engine example
 regex_engine: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
-	--example regex_engine --features=integer \
+	--example regex_engine \
+	--features=$(TARGET_ARCH_FEATURE),integer \
 	-- $(REGEX_STRING) $(REGEX_PATTERN)

 .PHONY: dark_market # Run dark market example
 dark_market: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example dark_market \
-	--features=integer,internal-keycache \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
 	-- fhe-modified fhe-parallel plain fhe

 .PHONY: sha256_bool # Run sha256_bool example
 sha256_bool: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
-	--example sha256_bool --features=boolean
+	--example sha256_bool \
+	--features=$(TARGET_ARCH_FEATURE),boolean

 .PHONY: pcc # pcc stands for pre commit checks (except GPU)
-pcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
-check_md_docs_are_tested check_intra_md_links clippy_all check_compile_tests test_tfhe_lints \
-tfhe_lints
+pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
+clippy_all tfhe_lints check_compile_tests

 .PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
-pcc_gpu: check_rust_bindings_did_not_change clippy_rustdoc_gpu \
-clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu
+pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change

 .PHONY: fpcc # pcc stands for pre commit checks, the f stands for fast
-fpcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
-check_md_docs_are_tested clippy_fast check_compile_tests
+fpcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested clippy_fast \
+check_compile_tests

 .PHONY: conformance # Automatically fix problems that can be fixed
-conformance: fix_newline fmt fmt_js
+conformance: fix_newline fmt

 #=============================== FFT Section ==================================
 .PHONY: doc_fft # Build rust doc for tfhe-fft
@@ -1419,7 +1388,7 @@ test_fft_nightly: install_rs_check_toolchain
 .PHONY: test_fft_no_std
 test_fft_no_std: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --release -p tfhe-fft \
-		--no-default-features
+		--no-default-features 
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --release -p tfhe-fft \
 		--no-default-features \
 		--features=fft128
@@ -1513,7 +1482,7 @@ test_ntt_nightly: install_rs_check_toolchain
 .PHONY: test_ntt_no_std
 test_ntt_no_std: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --release -p tfhe-ntt \
-		--no-default-features
+		--no-default-features 

 .PHONY: test_ntt_no_std_nightly
 test_ntt_no_std_nightly: install_rs_check_toolchain
--- a/README.md
+++ b/README.md
@@ -70,8 +70,22 @@ production-ready library for all the advanced features of TFHE.
 ### Cargo.toml configuration
 To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:

+ For x86_64-based machines running Unix-like OSes:
+
 ```toml
-tfhe = { version = "*", features = ["boolean", "shortint", "integer"] }
+tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64-unix"] }
+```
+
+ For Apple Silicon or aarch64-based machines running Unix-like OSes:
+
+```toml
+tfhe = { version = "*", features = ["boolean", "shortint", "integer", "aarch64-unix"] }
+```
+
+ For x86_64-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) running Windows:
+
+```toml
+tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64"] }
 ```

 > [!Note]
--- a/apps/trivium/Cargo.toml
+++ b/apps/trivium/Cargo.toml
@@ -7,7 +7,14 @@ edition = "2021"

 [dependencies]
 rayon = { workspace = true }
-tfhe = { path = "../../tfhe", features = [ "boolean", "shortint", "integer" ] }
+
+[target.'cfg(target_arch = "x86_64")'.dependencies.tfhe]
+path = "../../tfhe"
+features = [ "boolean", "shortint", "integer", "x86_64" ]
+
+[target.'cfg(target_arch = "aarch64")'.dependencies.tfhe]
+path = "../../tfhe"
+features = [ "boolean", "shortint", "integer", "aarch64-unix" ]

 [dev-dependencies]
 criterion = { version = "0.5.1", features = [ "html_reports" ]}
--- a/apps/trivium/README.md
+++ b/apps/trivium/README.md
@@ -18,102 +18,102 @@ use tfhe::prelude::*;
 use tfhe_trivium::TriviumStream;

 fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
-    assert!(a.len() % 8 == 0);
-    let mut hexadecimal: String = "".to_string();
-    for test in a.chunks(8) {
-        // Encoding is bytes in LSB order
-        match test[4..8] {
-            [false, false, false, false] => hexadecimal.push('0'),
-            [true, false, false, false] => hexadecimal.push('1'),
-            [false, true, false, false] => hexadecimal.push('2'),
-            [true, true, false, false] => hexadecimal.push('3'),
+	assert!(a.len() % 8 == 0);
+	let mut hexadecimal: String = "".to_string();
+	for test in a.chunks(8) {
+		// Encoding is bytes in LSB order
+		match test[4..8] {
+			[false, false, false, false] => hexadecimal.push('0'),
+			[true, false, false, false] => hexadecimal.push('1'),
+			[false, true, false, false] => hexadecimal.push('2'),
+			[true, true, false, false] => hexadecimal.push('3'),

-            [false, false, true, false] => hexadecimal.push('4'),
-            [true, false, true, false] => hexadecimal.push('5'),
-            [false, true, true, false] => hexadecimal.push('6'),
-            [true, true, true, false] => hexadecimal.push('7'),
+			[false, false, true, false] => hexadecimal.push('4'),
+			[true, false, true, false] => hexadecimal.push('5'),
+			[false, true, true, false] => hexadecimal.push('6'),
+			[true, true, true, false] => hexadecimal.push('7'),

-            [false, false, false, true] => hexadecimal.push('8'),
-            [true, false, false, true] => hexadecimal.push('9'),
-            [false, true, false, true] => hexadecimal.push('A'),
-            [true, true, false, true] => hexadecimal.push('B'),
+			[false, false, false, true] => hexadecimal.push('8'),
+			[true, false, false, true] => hexadecimal.push('9'),
+			[false, true, false, true] => hexadecimal.push('A'),
+			[true, true, false, true] => hexadecimal.push('B'),

-            [false, false, true, true] => hexadecimal.push('C'),
-            [true, false, true, true] => hexadecimal.push('D'),
-            [false, true, true, true] => hexadecimal.push('E'),
-            [true, true, true, true] => hexadecimal.push('F'),
-            _ => ()
-        };
-        match test[0..4] {
-            [false, false, false, false] => hexadecimal.push('0'),
-            [true, false, false, false] => hexadecimal.push('1'),
-            [false, true, false, false] => hexadecimal.push('2'),
-            [true, true, false, false] => hexadecimal.push('3'),
+			[false, false, true, true] => hexadecimal.push('C'),
+			[true, false, true, true] => hexadecimal.push('D'),
+			[false, true, true, true] => hexadecimal.push('E'),
+			[true, true, true, true] => hexadecimal.push('F'),
+			_ => ()
+		};
+		match test[0..4] {
+			[false, false, false, false] => hexadecimal.push('0'),
+			[true, false, false, false] => hexadecimal.push('1'),
+			[false, true, false, false] => hexadecimal.push('2'),
+			[true, true, false, false] => hexadecimal.push('3'),

-            [false, false, true, false] => hexadecimal.push('4'),
-            [true, false, true, false] => hexadecimal.push('5'),
-            [false, true, true, false] => hexadecimal.push('6'),
-            [true, true, true, false] => hexadecimal.push('7'),
+			[false, false, true, false] => hexadecimal.push('4'),
+			[true, false, true, false] => hexadecimal.push('5'),
+			[false, true, true, false] => hexadecimal.push('6'),
+			[true, true, true, false] => hexadecimal.push('7'),

-            [false, false, false, true] => hexadecimal.push('8'),
-            [true, false, false, true] => hexadecimal.push('9'),
-            [false, true, false, true] => hexadecimal.push('A'),
-            [true, true, false, true] => hexadecimal.push('B'),
+			[false, false, false, true] => hexadecimal.push('8'),
+			[true, false, false, true] => hexadecimal.push('9'),
+			[false, true, false, true] => hexadecimal.push('A'),
+			[true, true, false, true] => hexadecimal.push('B'),

-            [false, false, true, true] => hexadecimal.push('C'),
-            [true, false, true, true] => hexadecimal.push('D'),
-            [false, true, true, true] => hexadecimal.push('E'),
-            [true, true, true, true] => hexadecimal.push('F'),
-            _ => ()
-        };
-    }
-    return hexadecimal;
+			[false, false, true, true] => hexadecimal.push('C'),
+			[true, false, true, true] => hexadecimal.push('D'),
+			[false, true, true, true] => hexadecimal.push('E'),
+			[true, true, true, true] => hexadecimal.push('F'),
+			_ => ()
+		};
+	}
+	return hexadecimal;
 }

 fn main() {
-    let config = ConfigBuilder::default().build();
-    let (client_key, server_key) = generate_keys(config);
+	let config = ConfigBuilder::default().build();
+	let (client_key, server_key) = generate_keys(config);

-    let key_string = "0053A6F94C9FF24598EB".to_string();
-    let mut key = [false; 80];
+	let key_string = "0053A6F94C9FF24598EB".to_string();
+	let mut key = [false; 80];

-    for i in (0..key_string.len()).step_by(2) {
-        let mut val: u8 = u8::from_str_radix(&key_string[i..i+2], 16).unwrap();
-        for j in 0..8 {
-            key[8*(i>>1) + j] = val % 2 == 1;
-            val >>= 1;
-        }
-    }
+	for i in (0..key_string.len()).step_by(2) {
+		let mut val: u8 = u8::from_str_radix(&key_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			key[8*(i>>1) + j] = val % 2 == 1;
+			val >>= 1;
+		}
+	}

-    let iv_string = "0D74DB42A91077DE45AC".to_string();
-    let mut iv = [false; 80];
+	let iv_string = "0D74DB42A91077DE45AC".to_string();
+	let mut iv = [false; 80];

-    for i in (0..iv_string.len()).step_by(2) {
-        let mut val: u8 = u8::from_str_radix(&iv_string[i..i+2], 16).unwrap();
-        for j in 0..8 {
-            iv[8*(i>>1) + j] = val % 2 == 1;
-            val >>= 1;
-        }
-    }
+	for i in (0..iv_string.len()).step_by(2) {
+		let mut val: u8 = u8::from_str_radix(&iv_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			iv[8*(i>>1) + j] = val % 2 == 1;
+			val >>= 1;
+		}
+	}
+	
+	let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();

-    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
-
-    let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
-    let cipher_iv = iv.map(|x| FheBool::encrypt(x, &client_key));
+	let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
+	let cipher_iv = iv.map(|x| FheBool::encrypt(x, &client_key));


-    let mut trivium = TriviumStream::<FheBool>::new(cipher_key, cipher_iv, &server_key);
+	let mut trivium = TriviumStream::<FheBool>::new(cipher_key, cipher_iv, &server_key);

-    let mut vec = Vec::<bool>::with_capacity(64*8);
-    while vec.len() < 64*8 {
-        let cipher_outputs = trivium.next_64();
-        for c in cipher_outputs {
-            vec.push(c.decrypt(&client_key))
-        }
-    }
+	let mut vec = Vec::<bool>::with_capacity(64*8);
+	while vec.len() < 64*8 {
+		let cipher_outputs = trivium.next_64();
+		for c in cipher_outputs {
+			vec.push(c.decrypt(&client_key))
+		}
+	}

-    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
-    assert_eq!(output_0_63, hexadecimal[0..64*2]);
+	let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+	assert_eq!(output_0_63, hexadecimal[0..64*2]);
 }
 ```

@@ -129,76 +129,63 @@ Other sizes than 64 bit are expected to be available in the future.

 # FHE shortint Trivium implementation

-The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
-It uses a lower level API of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run
-on the same cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes
+The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `PARAM_MESSAGE_1_CARRY_1_KS_PBS`). It uses a lower level API 
+of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run on the same 
+cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes 
 its setup a little more intricate.

 Example code:
 ```rust
 use tfhe::shortint::prelude::*;
-use tfhe::shortint::parameters::v1_0::{
-    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-};
+use tfhe::shortint::CastingKey;
 use tfhe::{ConfigBuilder, generate_keys, FheUint64};
 use tfhe::prelude::*;
 use tfhe_trivium::TriviumStreamShortint;

 fn test_shortint() {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
-    let (hl_client_key, hl_server_key) = generate_keys(config);
-    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
-    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
+	let config = ConfigBuilder::default().build();
+	let (hl_client_key, hl_server_key) = generate_keys(config);
+	let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
+	let ksk = CastingKey::new((&client_key, &server_key), (&hl_client_key, &hl_server_key));

-    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
-    let ksk = KeySwitchingKey::new(
-        (&client_key, Some(&server_key)),
-        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
-    );
+	let key_string = "0053A6F94C9FF24598EB".to_string();
+	let mut key = [0; 80];

-    let key_string = "0053A6F94C9FF24598EB".to_string();
-    let mut key = [0; 80];
+	for i in (0..key_string.len()).step_by(2) {
+		let mut val = u64::from_str_radix(&key_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			key[8*(i>>1) + j] = val % 2;
+			val >>= 1;
+		}
+	}

-    for i in (0..key_string.len()).step_by(2) {
-        let mut val = u64::from_str_radix(&key_string[i..i+2], 16).unwrap();
-        for j in 0..8 {
-            key[8*(i>>1) + j] = val % 2;
-            val >>= 1;
-        }
-    }
+	let iv_string = "0D74DB42A91077DE45AC".to_string();
+	let mut iv = [0; 80];

-    let iv_string = "0D74DB42A91077DE45AC".to_string();
-    let mut iv = [0; 80];
+	for i in (0..iv_string.len()).step_by(2) {
+		let mut val = u64::from_str_radix(&iv_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			iv[8*(i>>1) + j] = val % 2;
+			val >>= 1;
+		}
+	}
+	let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();

-    for i in (0..iv_string.len()).step_by(2) {
-        let mut val = u64::from_str_radix(&iv_string[i..i+2], 16).unwrap();
-        for j in 0..8 {
-            iv[8*(i>>1) + j] = val % 2;
-            val >>= 1;
-        }
-    }
-    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+	let cipher_key = key.map(|x| client_key.encrypt(x));
+	let cipher_iv = iv.map(|x| client_key.encrypt(x));

-    let cipher_key = key.map(|x| client_key.encrypt(x));
-    let cipher_iv = iv.map(|x| client_key.encrypt(x));
+	let mut ciphered_message = vec![FheUint64::try_encrypt(0u64, &hl_client_key).unwrap(); 9];

-    let mut ciphered_message = vec![FheUint64::try_encrypt(0u64, &hl_client_key).unwrap(); 9];
+	let mut trivium = TriviumStreamShortint::new(cipher_key, cipher_iv, &server_key, &ksk);

-    let mut trivium = TriviumStreamShortint::new(cipher_key, cipher_iv, &server_key, &ksk);
+	let mut vec = Vec::<u64>::with_capacity(8);
+	while vec.len() < 8 {
+		let trans_ciphered_message = trivium.trans_encrypt_64(ciphered_message.pop().unwrap(), &hl_server_key);
+		vec.push(trans_ciphered_message.decrypt(&hl_client_key));
+	}

-    let mut vec = Vec::<u64>::with_capacity(8);
-    while vec.len() < 8 {
-        let trans_ciphered_message = trivium.trans_encrypt_64(ciphered_message.pop().unwrap(), &hl_server_key);
-        vec.push(trans_ciphered_message.decrypt(&hl_client_key));
-    }
-
-    let hexadecimal = get_hexagonal_string_from_u64(vec);
-    assert_eq!(output_0_63, hexadecimal[0..64*2]);
+	let hexadecimal = get_hexagonal_string_from_u64(vec);
+	assert_eq!(output_0_63, hexadecimal[0..64*2]);
 }
 ```

--- a/apps/trivium/benches/kreyvium_shortint.rs
+++ b/apps/trivium/benches/kreyvium_shortint.rs
@@ -1,29 +1,23 @@
 use criterion::Criterion;
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::v1_0::{
-    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
-};
+use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
 use tfhe::shortint::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
 use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};

 pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -63,20 +57,18 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
 }

 pub fn kreyvium_shortint_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -111,20 +103,18 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
 }

 pub fn kreyvium_shortint_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
--- a/apps/trivium/benches/trivium_shortint.rs
+++ b/apps/trivium/benches/trivium_shortint.rs
@@ -1,29 +1,23 @@
 use criterion::Criterion;
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::v1_0::{
-    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
-};
+use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
 use tfhe::shortint::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
 use tfhe_trivium::{TransCiphering, TriviumStreamShortint};

 pub fn trivium_shortint_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -63,20 +57,18 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
 }

 pub fn trivium_shortint_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -111,20 +103,18 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
 }

 pub fn trivium_shortint_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
--- a/apps/trivium/src/kreyvium/test.rs
+++ b/apps/trivium/src/kreyvium/test.rs
@@ -1,10 +1,6 @@
 use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::v1_0::{
-    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
-};
+use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
 // Values for these tests come from the github repo renaud1239/Kreyvium,
 // commit fd6828f68711276c25f55e605935028f5e843f43
@@ -220,20 +216,18 @@ use tfhe::shortint::prelude::*;

 #[test]
 fn kreyvium_test_shortint_long() {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
--- a/apps/trivium/src/trivium/test.rs
+++ b/apps/trivium/src/trivium/test.rs
@@ -1,10 +1,6 @@
 use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::v1_0::{
-    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
-};
+use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
 // Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
 // file testvectors/trivium-80.80.test-vectors
@@ -356,20 +352,18 @@ use tfhe::shortint::prelude::*;

 #[test]
 fn trivium_test_shortint_long() {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
--- a/backends/tfhe-cuda-backend/Cargo.toml
+++ b/backends/tfhe-cuda-backend/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tfhe-cuda-backend"
-version = "0.8.0"
+version = "0.6.0"
 edition = "2021"
 authors = ["Zama team"]
 license = "BSD-3-Clause-Clear"
@@ -14,4 +14,4 @@ keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
 [build-dependencies]
 cmake = { version = "0.1" }
 pkg-config = { version = "0.3" }
-bindgen = "0.71"
+bindgen = "0.70.1"
--- a/backends/tfhe-cuda-backend/LICENSE
+++ b/backends/tfhe-cuda-backend/LICENSE
@@ -1,6 +1,6 @@
 BSD 3-Clause Clear License

-Copyright © 2025 ZAMA.
+Copyright © 2024 ZAMA.
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,
--- a/backends/tfhe-cuda-backend/cuda/include/device.h
+++ b/backends/tfhe-cuda-backend/cuda/include/device.h
@@ -27,8 +27,6 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
    std::abort();                                                              \
  }

-void cuda_set_device(uint32_t gpu_index);
-
 cudaEvent_t cuda_create_event(uint32_t gpu_index);

 void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
--- a/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression_utilities.h
@@ -38,7 +38,6 @@ template <typename Torus> struct int_compression {

      scratch_packing_keyswitch_lwe_list_to_glwe_64(
          streams[0], gpu_indexes[0], &fp_ks_buffer,
-          compression_params.small_lwe_dimension,
          compression_params.glwe_dimension, compression_params.polynomial_size,
          num_radix_blocks, true);
    }
@@ -65,7 +64,7 @@ template <typename Torus> struct int_decompression {
  Torus *tmp_extracted_lwe;
  uint32_t *tmp_indexes_array;

-  int_radix_lut<Torus> *decompression_rescale_lut;
+  int_radix_lut<Torus> *carry_extract_lut;

  int_decompression(cudaStream_t const *streams, uint32_t const *gpu_indexes,
                    uint32_t gpu_count, int_radix_params encryption_params,
@@ -84,7 +83,7 @@ template <typename Torus> struct int_decompression {
      Torus lwe_accumulator_size = (compression_params.glwe_dimension *
                                        compression_params.polynomial_size +
                                    1);
-      decompression_rescale_lut = new int_radix_lut<Torus>(
+      carry_extract_lut = new int_radix_lut<Torus>(
          streams, gpu_indexes, gpu_count, encryption_params, 1,
          num_radix_blocks, allocate_gpu_memory);

@@ -97,30 +96,19 @@ template <typename Torus> struct int_decompression {
          num_radix_blocks * lwe_accumulator_size * sizeof(Torus), streams[0],
          gpu_indexes[0]);

-      // Rescale is done using an identity LUT
-      // Here we do not divide by message_modulus
-      // Example: in the 2_2 case we are mapping a 2 bits message onto a 4 bits
-      // space, we want to keep the original 2 bits value in the 4 bits space,
-      // so we apply the identity and the encoding will rescale it for us.
-      auto decompression_rescale_f = [encryption_params](Torus x) -> Torus {
-        return x;
+      // Carry extract LUT
+      auto carry_extract_f = [encryption_params](Torus x) -> Torus {
+        return x / encryption_params.message_modulus;
      };

-      auto effective_compression_message_modulus =
-          encryption_params.carry_modulus;
-      auto effective_compression_carry_modulus = 1;
-
-      generate_device_accumulator_with_encoding<Torus>(
-          streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
-          decompression_rescale_lut->get_degree(0),
-          decompression_rescale_lut->get_max_degree(0),
+      generate_device_accumulator<Torus>(
+          streams[0], gpu_indexes[0],
+          carry_extract_lut->get_lut(gpu_indexes[0], 0),
          encryption_params.glwe_dimension, encryption_params.polynomial_size,
-          effective_compression_message_modulus,
-          effective_compression_carry_modulus,
          encryption_params.message_modulus, encryption_params.carry_modulus,
-          decompression_rescale_f);
+          carry_extract_f);

-      decompression_rescale_lut->broadcast_lut(streams, gpu_indexes, 0);
+      carry_extract_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
    }
  }
  void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
@@ -129,8 +117,8 @@ template <typename Torus> struct int_decompression {
    cuda_drop_async(tmp_extracted_lwe, streams[0], gpu_indexes[0]);
    cuda_drop_async(tmp_indexes_array, streams[0], gpu_indexes[0]);

-    decompression_rescale_lut->release(streams, gpu_indexes, gpu_count);
-    delete decompression_rescale_lut;
+    carry_extract_lut->release(streams, gpu_indexes, gpu_count);
+    delete carry_extract_lut;
  }
 };
 #endif
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
@@ -38,15 +38,6 @@ enum SIGNED_OPERATION { ADDITION = 1, SUBTRACTION = -1 };
 enum outputFlag { FLAG_NONE = 0, FLAG_OVERFLOW = 1, FLAG_CARRY = 2 };

 extern "C" {
-
-typedef struct {
-  void *ptr;
-  uint64_t *degrees;
-  uint64_t *noise_levels;
-  uint32_t num_radix_blocks;
-  uint32_t lwe_dimension;
-} CudaRadixCiphertextFFI;
-
 void scratch_cuda_apply_univariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
@@ -54,20 +45,14 @@ void scratch_cuda_apply_univariate_lut_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint64_t lut_degree, bool allocate_gpu_memory);
-void scratch_cuda_apply_many_univariate_lut_kb_64(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
-    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
-    uint32_t grouping_factor, uint32_t num_radix_blocks,
-    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory);
-void cuda_apply_univariate_lut_kb_64(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *output_radix_lwe,
-    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
-    void *const *ksks, void *const *bsks);
+    bool allocate_gpu_memory);
+
+void cuda_apply_univariate_lut_kb_64(void *const *streams,
+                                     uint32_t const *gpu_indexes,
+                                     uint32_t gpu_count, void *output_radix_lwe,
+                                     void const *input_radix_lwe,
+                                     int8_t *mem_ptr, void *const *ksks,
+                                     void *const *bsks, uint32_t num_blocks);

 void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
                                             uint32_t const *gpu_indexes,
@@ -81,15 +66,13 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint64_t lut_degree, bool allocate_gpu_memory);
+    bool allocate_gpu_memory);

 void cuda_apply_bivariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *output_radix_lwe,
-    CudaRadixCiphertextFFI const *input_radix_lwe_1,
-    CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
-    void *const *ksks, void *const *bsks, uint32_t num_radix_blocks,
-    uint32_t shift);
+    void *output_radix_lwe, void const *input_radix_lwe_1,
+    void const *input_radix_lwe_2, int8_t *mem_ptr, void *const *ksks,
+    void *const *bsks, uint32_t num_blocks, uint32_t shift);

 void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
                                            uint32_t const *gpu_indexes,
@@ -98,10 +81,9 @@ void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,

 void cuda_apply_many_univariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *output_radix_lwe,
-    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
-    void *const *ksks, void *const *bsks, uint32_t num_luts,
-    uint32_t lut_stride);
+    void *output_radix_lwe, void const *input_radix_lwe, int8_t *mem_ptr,
+    void *const *ksks, void *const *bsks, uint32_t num_blocks,
+    uint32_t num_luts, uint32_t lut_stride);

 void scratch_cuda_full_propagation_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -143,14 +125,15 @@ void cleanup_cuda_integer_mult(void *const *streams,

 void cuda_negate_integer_radix_ciphertext_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_in, uint32_t message_modulus,
+    void *lwe_array_out, void const *lwe_array_in, uint32_t lwe_dimension,
+    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
    uint32_t carry_modulus);

 void cuda_scalar_addition_integer_radix_ciphertext_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, void const *scalar_input,
-    uint32_t num_scalars, uint32_t message_modulus, uint32_t carry_modulus);
+    void *lwe_array, void const *scalar_input, uint32_t lwe_dimension,
+    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
+    uint32_t carry_modulus);

 void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -163,8 +146,8 @@ void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(

 void cuda_integer_radix_logical_scalar_shift_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, uint32_t shift, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks);
+    void *lwe_array, uint32_t shift, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks);

 void scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -177,8 +160,8 @@ void scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(

 void cuda_integer_radix_arithmetic_scalar_shift_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, uint32_t shift, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks);
+    void *lwe_array, uint32_t shift, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks);

 void cleanup_cuda_integer_radix_logical_scalar_shift(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -199,8 +182,8 @@ void scratch_cuda_integer_radix_shift_and_rotate_kb_64(

 void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI const *lwe_shift,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks);
+    void *lwe_array, void const *lwe_shift, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks);

 void cleanup_cuda_integer_radix_shift_and_rotate(void *const *streams,
                                                 uint32_t const *gpu_indexes,
@@ -243,17 +226,15 @@ void scratch_cuda_integer_radix_bitop_kb_64(

 void cuda_bitop_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_1,
-    CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks);
+    void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
+    uint32_t lwe_ciphertext_count);

 void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
+    void *lwe_array_out, void const *lwe_array_input, void const *clear_blocks,
    uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks);
+    void *const *ksks, uint32_t lwe_ciphertext_count, BITOP_TYPE op);

 void cleanup_cuda_integer_bitop(void *const *streams,
                                uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -270,11 +251,9 @@ void scratch_cuda_integer_radix_cmux_kb_64(

 void cuda_cmux_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_condition,
-    CudaRadixCiphertextFFI const *lwe_array_true,
-    CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks);
+    void *lwe_array_out, void const *lwe_condition, void const *lwe_array_true,
+    void const *lwe_array_false, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t lwe_ciphertext_count);

 void cleanup_cuda_integer_radix_cmux(void *const *streams,
                                     uint32_t const *gpu_indexes,
@@ -291,8 +270,8 @@ void scratch_cuda_integer_radix_scalar_rotate_kb_64(

 void cuda_integer_radix_scalar_rotate_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, uint32_t n, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks);
+    void *lwe_array, uint32_t n, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks);

 void cleanup_cuda_integer_radix_scalar_rotate(void *const *streams,
                                              uint32_t const *gpu_indexes,
@@ -319,16 +298,15 @@ void scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(

 void cuda_propagate_single_carry_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI *carry_out,
-    const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks, uint32_t requested_flag, uint32_t uses_carry);
+    void *lwe_array, void *carry_out, const void *carry_in, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks, uint32_t num_blocks,
+    uint32_t requested_flag, uint32_t uses_carry);

 void cuda_add_and_propagate_single_carry_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
-    CudaRadixCiphertextFFI *carry_out, const CudaRadixCiphertextFFI *carry_in,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    uint32_t requested_flag, uint32_t uses_carry);
+    void *lhs_array, const void *rhs_array, void *carry_out,
+    const void *carry_in, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
+    uint32_t num_blocks, uint32_t requested_flag, uint32_t uses_carry);

 void cleanup_cuda_propagate_single_carry(void *const *streams,
                                         uint32_t const *gpu_indexes,
@@ -351,10 +329,9 @@ void scratch_cuda_integer_overflowing_sub_kb_64_inplace(

 void cuda_integer_overflowing_sub_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
-    CudaRadixCiphertextFFI *overflow_block,
-    const CudaRadixCiphertextFFI *input_borrow, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks, uint32_t compute_overflow,
+    void *lhs_array, const void *rhs_array, void *overflow_block,
+    const void *input_borrow, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks, uint32_t compute_overflow,
    uint32_t uses_input_borrow);

 void cleanup_cuda_integer_overflowing_sub(void *const *streams,
@@ -427,13 +404,12 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_radix_blocks,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint64_t lut_degree, bool allocate_gpu_memory);
+    bool allocate_gpu_memory);

 void cuda_integer_compute_prefix_sum_hillis_steele_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *output_radix_lwe,
-    CudaRadixCiphertextFFI *generates_or_propagates, int8_t *mem_ptr,
-    void *const *ksks, void *const *bsks, uint32_t num_blocks);
+    void *output_radix_lwe, void *generates_or_propagates, int8_t *mem_ptr,
+    void *const *ksks, void *const *bsks, uint32_t num_blocks, uint32_t shift);

 void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -441,8 +417,9 @@ void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(

 void cuda_integer_reverse_blocks_64_inplace(void *const *streams,
                                            uint32_t const *gpu_indexes,
-                                            uint32_t gpu_count,
-                                            CudaRadixCiphertextFFI *lwe_array);
+                                            uint32_t gpu_count, void *lwe_array,
+                                            uint32_t num_blocks,
+                                            uint32_t lwe_size);

 void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -455,49 +432,13 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(

 void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
-    void *const *bsks, void *const *ksks);
+    void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks);

 void cleanup_cuda_integer_abs_inplace(void *const *streams,
                                      uint32_t const *gpu_indexes,
                                      uint32_t gpu_count,
                                      int8_t **mem_ptr_void);

-void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
-    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
-    uint32_t grouping_factor, uint32_t num_radix_blocks,
-    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory);
-
-void cuda_integer_are_all_comparisons_block_true_kb_64(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
-
-void cleanup_cuda_integer_are_all_comparisons_block_true(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    int8_t **mem_ptr_void);
-
-void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
-    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
-    uint32_t grouping_factor, uint32_t num_radix_blocks,
-    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory);
-
-void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
-
-void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    int8_t **mem_ptr_void);
-
 } // extern C
 #endif // CUDA_INTEGER_H
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
--- a/backends/tfhe-cuda-backend/cuda/include/integer/radix_ciphertext.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/radix_ciphertext.h
@@ -1,8 +0,0 @@
-#ifndef CUDA_RADIX_CIPHERTEXT_H
-#define CUDA_RADIX_CIPHERTEXT_H
-
-void release_radix_ciphertext(cudaStream_t const stream,
-                              uint32_t const gpu_index,
-                              CudaRadixCiphertextFFI *data);
-
-#endif
--- a/backends/tfhe-cuda-backend/cuda/include/keyswitch.h
+++ b/backends/tfhe-cuda-backend/cuda/include/keyswitch.h
@@ -21,8 +21,8 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(

 void scratch_packing_keyswitch_lwe_list_to_glwe_64(
    void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t num_lwes, bool allocate_gpu_memory);
+    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
+    bool allocate_gpu_memory);

 void cuda_packing_keyswitch_lwe_list_to_glwe_64(
    void *stream, uint32_t gpu_index, void *glwe_array_out,
--- a/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
+++ b/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
@@ -1,52 +1,49 @@
 #ifndef CUDA_LINALG_H_
 #define CUDA_LINALG_H_

-#include "integer/integer.h"
 #include <stdint.h>

 extern "C" {

-void cuda_negate_lwe_ciphertext_vector_32(
-    void *stream, uint32_t gpu_index, void *lwe_array_out,
-    void const *lwe_array_in, const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
-void cuda_negate_lwe_ciphertext_vector_64(
-    void *stream, uint32_t gpu_index, void *lwe_array_out,
-    void const *lwe_array_in, const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
+void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
+                                          void *lwe_array_out,
+                                          void const *lwe_array_in,
+                                          uint32_t input_lwe_dimension,
+                                          uint32_t input_lwe_ciphertext_count);
+void cuda_negate_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
+                                          void *lwe_array_out,
+                                          void const *lwe_array_in,
+                                          uint32_t input_lwe_dimension,
+                                          uint32_t input_lwe_ciphertext_count);
 void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
-                                       CudaRadixCiphertextFFI *output,
-                                       CudaRadixCiphertextFFI const *input_1,
-                                       CudaRadixCiphertextFFI const *input_2);
+                                       void *lwe_array_out,
+                                       void const *lwe_array_in_1,
+                                       void const *lwe_array_in_2,
+                                       uint32_t input_lwe_dimension,
+                                       uint32_t input_lwe_ciphertext_count);
 void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
-                                       CudaRadixCiphertextFFI *output,
-                                       CudaRadixCiphertextFFI const *input_1,
-                                       CudaRadixCiphertextFFI const *input_2);
+                                       void *lwe_array_out,
+                                       void const *lwe_array_in_1,
+                                       void const *lwe_array_in_2,
+                                       uint32_t input_lwe_dimension,
+                                       uint32_t input_lwe_ciphertext_count);
+
 void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *plaintext_array_in,
-    const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
+    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
 void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *plaintext_array_in,
-    const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
+    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
 void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *cleartext_array_in,
-    const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
+    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
 void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *cleartext_array_in,
-    const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
-void cuda_add_lwe_ciphertext_vector_plaintext_64(
-    void *stream, uint32_t gpu_index, void *lwe_array_out,
-    void const *lwe_array_in, const uint64_t plaintext_in,
-    const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
+    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
 }

 #endif // CUDA_LINALG_H_
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_multibit_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_multibit_utilities.h
@@ -5,12 +5,12 @@

 template <typename Torus>
 bool supports_distributed_shared_memory_on_multibit_programmable_bootstrap(
-    uint32_t polynomial_size, int max_shared_memory);
+    uint32_t polynomial_size);

 template <typename Torus>
 bool has_support_to_cuda_programmable_bootstrap_tbc_multi_bit(
    uint32_t num_samples, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t level_count, int max_shared_memory);
+    uint32_t level_count);

 #if CUDA_ARCH >= 900
 template <typename Torus>
@@ -114,8 +114,6 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
             uint32_t polynomial_size, uint32_t level_count,
             uint32_t input_lwe_ciphertext_count, uint32_t lwe_chunk_size,
             PBS_VARIANT pbs_variant, bool allocate_gpu_memory) {
-    cuda_set_device(gpu_index);
-
    this->pbs_variant = pbs_variant;
    this->lwe_chunk_size = lwe_chunk_size;
    auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_utilities.h
@@ -61,7 +61,7 @@ get_buffer_size_partial_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {

 template <typename Torus>
 bool supports_distributed_shared_memory_on_classic_programmable_bootstrap(
-    uint32_t polynomial_size, int max_shared_memory);
+    uint32_t polynomial_size);

 template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer;

@@ -77,10 +77,10 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
             uint32_t polynomial_size, uint32_t level_count,
             uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
             bool allocate_gpu_memory) {
-    cuda_set_device(gpu_index);
+
    this->pbs_variant = pbs_variant;

-    auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
+    auto max_shared_memory = cuda_get_max_shared_memory(0);

    if (allocate_gpu_memory) {
      switch (pbs_variant) {
@@ -157,7 +157,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {

        bool supports_dsm =
            supports_distributed_shared_memory_on_classic_programmable_bootstrap<
-                Torus>(polynomial_size, max_shared_memory);
+                Torus>(polynomial_size);

        uint64_t full_sm =
            get_buffer_size_full_sm_programmable_bootstrap_tbc<Torus>(
@@ -218,7 +218,8 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
 template <typename Torus>
 uint64_t get_buffer_size_programmable_bootstrap_cg(
    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) {
+    uint32_t input_lwe_ciphertext_count) {
+  int max_shared_memory = cuda_get_max_shared_memory(0);
  uint64_t full_sm =
      get_buffer_size_full_sm_programmable_bootstrap_cg<Torus>(polynomial_size);
  uint64_t partial_sm =
@@ -244,8 +245,7 @@ template <typename Torus>
 bool has_support_to_cuda_programmable_bootstrap_cg(uint32_t glwe_dimension,
                                                   uint32_t polynomial_size,
                                                   uint32_t level_count,
-                                                   uint32_t num_samples,
-                                                   int max_shared_memory);
+                                                   uint32_t num_samples);

 template <typename Torus>
 void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap_multibit.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap_multibit.h
@@ -8,7 +8,7 @@ extern "C" {

 bool has_support_to_cuda_programmable_bootstrap_cg_multi_bit(
    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t num_samples, int max_shared_memory);
+    uint32_t num_samples);

 void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64(
    void *stream, uint32_t gpu_index, void *dest, void const *src,
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
@@ -11,7 +11,7 @@ void cuda_convert_lwe_ciphertext_vector_to_gpu(cudaStream_t stream,
                                               uint32_t gpu_index, T *dest,
                                               T *src, uint32_t number_of_cts,
                                               uint32_t lwe_dimension) {
-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);
  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
  cuda_memcpy_async_to_gpu(dest, src, size, stream, gpu_index);
 }
@@ -21,7 +21,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu(cudaStream_t stream,
                                               uint32_t gpu_index, T *dest,
                                               T *src, uint32_t number_of_cts,
                                               uint32_t lwe_dimension) {
-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);
  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
  cuda_memcpy_async_to_cpu(dest, src, size, stream, gpu_index);
 }
@@ -55,7 +55,7 @@ __host__ void host_sample_extract(cudaStream_t stream, uint32_t gpu_index,
                                  Torus const *glwe_array_in,
                                  uint32_t const *nth_array, uint32_t num_nths,
                                  uint32_t glwe_dimension) {
-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);

  dim3 grid(num_nths);
  dim3 thds(params::degree / params::opt);
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/fast_packing_keyswitch.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/fast_packing_keyswitch.cuh
@@ -1,356 +0,0 @@
-#ifndef CNCRT_FAST_KS_CUH
-#define CNCRT_FAST_KS_CUH
-
-#undef NDEBUG
-#include <assert.h>
-
-#include "device.h"
-#include "gadget.cuh"
-#include "helper_multi_gpu.h"
-#include "keyswitch.cuh"
-#include "polynomial/functions.cuh"
-#include "polynomial/polynomial_math.cuh"
-#include "torus.cuh"
-#include "utils/helper.cuh"
-#include "utils/kernel_dimensions.cuh"
-#include <thread>
-#include <vector>
-
-#define CEIL_DIV(M, N) ((M) + (N)-1) / (N)
-
-const int BLOCK_SIZE_GEMM = 64;
-const int THREADS_GEMM = 8;
-const int BLOCK_SIZE_DECOMP = 8;
-
-template <typename Torus> uint64_t get_shared_mem_size_tgemm() {
-  return BLOCK_SIZE_GEMM * THREADS_GEMM * 2 * sizeof(Torus);
-}
-
-// Initialize decomposition by performing rounding
-// and decomposing one level of an array of Torus LWEs. Only
-// decomposes the mask elements of the incoming LWEs.
-template <typename Torus, typename TorusVec>
-__global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,
-                                         uint32_t lwe_dimension,
-                                         uint32_t num_lwe, uint32_t base_log,
-                                         uint32_t level_count) {
-
-  // index of this LWE ct in the buffer
-  auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
-  // index of the LWE sample in the LWE ct
-  auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
-
-  if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
-    return;
-
-  // Input LWE array is [mask_0, .., mask_lwe_dim, message] and
-  // we only decompose the mask. Thus the stride for reading
-  // is lwe_dimension + 1, while for writing it is lwe_dimension
-  auto read_val_idx = lwe_idx * (lwe_dimension + 1) + lwe_sample_idx;
-  auto write_val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
-  auto write_state_idx =
-      num_lwe * lwe_dimension + lwe_idx * lwe_dimension + lwe_sample_idx;
-
-  Torus a_i = lwe_in[read_val_idx];
-
-  Torus state = init_decomposer_state(a_i, base_log, level_count);
-
-  Torus mod_b_mask = (1ll << base_log) - 1ll;
-  lwe_out[write_val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
-  synchronize_threads_in_block();
-  lwe_out[write_state_idx] = state;
-}
-
-// Continue decomposiion of an array of Torus elements in place. Supposes
-// that the array contains already decomposed elements and
-// computes the new decomposed level in place.
-template <typename Torus, typename TorusVec>
-__global__ void
-decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
-                                 uint32_t num_lwe, uint32_t base_log,
-                                 uint32_t level_count) {
-
-  // index of this LWE ct in the buffer
-  auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
-  // index of the LWE sample in the LWE ct
-  auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
-
-  if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
-    return;
-
-  auto val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
-  auto state_idx = num_lwe * lwe_dimension + val_idx;
-
-  Torus state = buffer_in[state_idx];
-  synchronize_threads_in_block();
-
-  Torus mod_b_mask = (1ll << base_log) - 1ll;
-
-  buffer_in[val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
-  synchronize_threads_in_block();
-  buffer_in[state_idx] = state;
-}
-
-// Multiply matrices A, B of size (M, K), (K, N) respectively
-// with K as the inner dimension.
-//
-// A block of threads processeds blocks of size (BLOCK_SIZE_GEMM,
-// BLOCK_SIZE_GEMM) splitting them in multiple tiles: (BLOCK_SIZE_GEMM,
-// THREADS_GEMM)-shaped tiles of values from A, and a (THREADS_GEMM,
-// BLOCK_SIZE_GEMM)-shaped tiles of values from B.
-//
-// This code is adapted by generalizing the 1d block-tiling
-// kernel from https://github.com/siboehm/SGEMM_CUDA
-// to any matrix dimension
-template <typename Torus, typename TorusVec>
-__global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
-                      int stride_B, Torus *C) {
-
-  const int BM = BLOCK_SIZE_GEMM;
-  const int BN = BLOCK_SIZE_GEMM;
-  const int BK = THREADS_GEMM;
-  const int TM = THREADS_GEMM;
-
-  const uint cRow = blockIdx.y;
-  const uint cCol = blockIdx.x;
-
-  const int threadCol = threadIdx.x % BN;
-  const int threadRow = threadIdx.x / BN;
-
-  // Allocate space for the current block tile in shared memory
-  __shared__ Torus As[BM * BK];
-  __shared__ Torus Bs[BK * BN];
-
-  // Initialize the pointers to the input blocks from A, B
-  // Tiles from these blocks are loaded to shared memory
-  A += cRow * BM * K;
-  B += cCol * BN;
-
-  // Each thread will handle multiple sub-blocks
-  const uint innerColA = threadIdx.x % BK;
-  const uint innerRowA = threadIdx.x / BK;
-  const uint innerColB = threadIdx.x % BN;
-  const uint innerRowB = threadIdx.x / BN;
-
-  // allocate thread-local cache for results in registerfile
-  Torus threadResults[TM] = {0};
-
-  auto row_A = cRow * BM + innerRowA;
-  auto col_B = cCol * BN + innerColB;
-
-  // For each thread, loop over block tiles
-  for (uint bkIdx = 0; bkIdx < K; bkIdx += BK) {
-    auto col_A = bkIdx + innerColA;
-    auto row_B = bkIdx + innerRowB;
-
-    if (row_A < M && col_A < K) {
-      As[innerRowA * BK + innerColA] = A[innerRowA * K + innerColA];
-    } else {
-      As[innerRowA * BK + innerColA] = 0;
-    }
-
-    if (col_B < N && row_B < K) {
-      Bs[innerRowB * BN + innerColB] = B[innerRowB * stride_B + innerColB];
-    } else {
-      Bs[innerRowB * BN + innerColB] = 0;
-    }
-    synchronize_threads_in_block();
-
-    // Advance blocktile for the next iteration of this loop
-    A += BK;
-    B += BK * stride_B;
-
-    // calculate per-thread results
-    for (uint dotIdx = 0; dotIdx < BK; ++dotIdx) {
-      // we make the dotproduct loop the outside loop, which facilitates
-      // reuse of the Bs entry, which we can cache in a tmp var.
-      Torus tmp = Bs[dotIdx * BN + threadCol];
-      for (uint resIdx = 0; resIdx < TM; ++resIdx) {
-        threadResults[resIdx] +=
-            As[(threadRow * TM + resIdx) * BK + dotIdx] * tmp;
-      }
-    }
-    synchronize_threads_in_block();
-  }
-
-  // Initialize the pointer to the output block of size (BLOCK_SIZE_GEMM,
-  // BLOCK_SIZE_GEMM)
-  C += cRow * BM * N + cCol * BN;
-
-  // write out the results
-  for (uint resIdx = 0; resIdx < TM; ++resIdx) {
-    int outRow = cRow * BM + threadRow * TM + resIdx;
-    int outCol = cCol * BN + threadCol;
-
-    if (outRow >= M)
-      continue;
-    if (outCol >= N)
-      continue;
-
-    C[(threadRow * TM + resIdx) * N + threadCol] += threadResults[resIdx];
-  }
-}
-
-// Finish the keyswitching operation and prepare GLWEs for accumulation.
-// 1. Finish the keyswitching computation partially performed with a GEMM:
-//  - negate the dot product between the GLWE and KSK polynomial
-//  - add the GLWE message for the N-th polynomial coeff in the message poly
-// 2. Rotate each of the GLWE . KSK poly dot products to
-//    prepare them for accumulation into a single GLWE
-template <typename Torus>
-__global__ void polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C(
-    Torus *in_glwe_buffer, Torus *out_glwe_buffer, Torus const *lwe_array,
-    uint32_t lwe_dimension, uint32_t num_glwes, uint32_t polynomial_size,
-    uint32_t glwe_dimension) {
-
-  uint32_t glwe_id = blockIdx.x * blockDim.x + threadIdx.x;
-  uint32_t degree = glwe_id; // lwe 0 rotate 0, lwe 1 rotate 1, .. , lwe
-                             // poly_size-1 rotate poly_size-1
-  uint32_t coeffIdx = blockIdx.y * blockDim.y + threadIdx.y;
-
-  if (glwe_id >= num_glwes)
-    return;
-  if (coeffIdx >= polynomial_size)
-    return;
-
-  auto in_poly =
-      in_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
-  auto out_result =
-      out_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
-  if (coeffIdx == 0) {
-    // Add the message value of the input LWE (`C`) to the N-th coefficient
-    // in the GLWE . KSK dot product
-
-    // The C is added to the first position of the last polynomial in the GLWE
-    // which has (glwe_dimension+1) polynomials
-    // The C value is extracted as the last value of the LWE ct. (of index
-    // glwe_id) the LWEs have (polynomial_size + 1) values
-    in_poly[polynomial_size * glwe_dimension] =
-        lwe_array[glwe_id * (lwe_dimension + 1) + lwe_dimension] -
-        in_poly[polynomial_size * glwe_dimension];
-
-    for (int gi = 1; gi < glwe_dimension; ++gi)
-      in_poly[coeffIdx + gi * polynomial_size] =
-          -in_poly[coeffIdx + gi * polynomial_size];
-
-  } else {
-    // Otherwise simply negate the input coefficient
-    for (int gi = 1; gi < glwe_dimension + 1; ++gi)
-      in_poly[coeffIdx + gi * polynomial_size] =
-          -in_poly[coeffIdx + gi * polynomial_size];
-  }
-  // Negate all the coefficients for rotation for the first poly
-  in_poly[coeffIdx] = -in_poly[coeffIdx];
-
-  // rotate the body
-  polynomial_accumulate_monic_monomial_mul<Torus>(
-      out_result, in_poly, degree, coeffIdx, polynomial_size, 1, true);
-  // rotate the mask too
-  for (int gi = 1; gi < glwe_dimension + 1; ++gi)
-    polynomial_accumulate_monic_monomial_mul<Torus>(
-        out_result + gi * polynomial_size, in_poly + gi * polynomial_size,
-        degree, coeffIdx, polynomial_size, 1, true);
-}
-
-template <typename Torus, typename TorusVec>
-__host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
-    cudaStream_t stream, uint32_t gpu_index, Torus *glwe_out,
-    Torus const *lwe_array_in, Torus const *fp_ksk_array, int8_t *fp_ks_buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t base_log, uint32_t level_count, uint32_t num_lwes) {
-
-  // Optimization of packing keyswitch when packing many LWEs
-
-  cuda_set_device(gpu_index);
-  check_cuda_error(cudaGetLastError());
-
-  int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
-
-  // The fast path of PKS uses the scratch buffer (d_mem) differently than the
-  // old path: it needs to store the decomposed masks in the first half of this
-  // buffer and the keyswitched GLWEs in the second half of the buffer. Thus the
-  // scratch buffer for the fast path must determine the half-size of the
-  // scratch buffer as the max between the size of the GLWE and the size of the
-  // LWE-mask times two (to keep both decomposition state and decomposed
-  // intermediate value)
-  int memory_unit = glwe_accumulator_size > lwe_dimension * 2
-                        ? glwe_accumulator_size
-                        : lwe_dimension * 2;
-
-  // ping pong the buffer between successive calls
-  // split the buffer in two parts of this size
-  auto d_mem_0 = (Torus *)fp_ks_buffer;
-  auto d_mem_1 = d_mem_0 + num_lwes * memory_unit;
-
-  // Set the scratch buffer to 0 as it is used to accumulate
-  // decomposition temporary results
-  cuda_memset_async(d_mem_1, 0, num_lwes * memory_unit * sizeof(Torus), stream,
-                    gpu_index);
-  check_cuda_error(cudaGetLastError());
-
-  // decompose LWEs
-  // don't decompose LWE body - the LWE has lwe_size + 1 elements. The last
-  // element, the body is ignored by rounding down the number of blocks assuming
-  // here that the LWE dimension is a multiple of the block size
-  dim3 grid_decomp(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
-                   CEIL_DIV(lwe_dimension, BLOCK_SIZE_DECOMP));
-  dim3 threads_decomp(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
-
-  // decompose first level
-  decompose_vectorize_init<Torus, TorusVec>
-      <<<grid_decomp, threads_decomp, 0, stream>>>(lwe_array_in, d_mem_0,
-                                                   lwe_dimension, num_lwes,
-                                                   base_log, level_count);
-  check_cuda_error(cudaGetLastError());
-
-  // gemm to ks the individual LWEs to GLWEs
-  dim3 grid_gemm(CEIL_DIV(glwe_accumulator_size, BLOCK_SIZE_GEMM),
-                 CEIL_DIV(num_lwes, BLOCK_SIZE_GEMM));
-  dim3 threads_gemm(BLOCK_SIZE_GEMM * THREADS_GEMM);
-
-  auto stride_KSK_buffer = glwe_accumulator_size * level_count;
-
-  uint32_t shared_mem_size = get_shared_mem_size_tgemm<Torus>();
-  tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
-      num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0, fp_ksk_array,
-      stride_KSK_buffer, d_mem_1);
-  check_cuda_error(cudaGetLastError());
-
-  auto ksk_block_size = glwe_accumulator_size;
-
-  for (int li = 1; li < level_count; ++li) {
-    decompose_vectorize_step_inplace<Torus, TorusVec>
-        <<<grid_decomp, threads_decomp, 0, stream>>>(
-            d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
-    check_cuda_error(cudaGetLastError());
-
-    tgemm<Torus, TorusVec>
-        <<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
-            num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
-            fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
-    check_cuda_error(cudaGetLastError());
-  }
-
-  // should we include the mask in the rotation ??
-  dim3 grid_rotate(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
-                   CEIL_DIV(polynomial_size, BLOCK_SIZE_DECOMP));
-  dim3 threads_rotate(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
-  // rotate the GLWEs
-  polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C<Torus>
-      <<<grid_rotate, threads_rotate, 0, stream>>>(
-          d_mem_1, d_mem_0, lwe_array_in, lwe_dimension, num_lwes,
-          polynomial_size, glwe_dimension);
-  check_cuda_error(cudaGetLastError());
-
-  dim3 grid_accumulate(
-      CEIL_DIV(polynomial_size * (glwe_dimension + 1), BLOCK_SIZE_DECOMP));
-  dim3 threads_accum(BLOCK_SIZE_DECOMP);
-
-  // accumulate to a single glwe
-  accumulate_glwes<Torus><<<grid_accumulate, threads_accum, 0, stream>>>(
-      glwe_out, d_mem_0, glwe_dimension, polynomial_size, num_lwes);
-
-  check_cuda_error(cudaGetLastError());
-}
-
-#endif
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
@@ -57,7 +57,7 @@ void batch_fft_ggsw_vector(cudaStream_t *streams, uint32_t *gpu_indexes,
  if (gpu_count != 1)
    PANIC("GPU error (batch_fft_ggsw_vector): multi-GPU execution is not "
          "supported yet.")
-  cuda_set_device(gpu_indexes[0]);
+  cudaSetDevice(gpu_indexes[0]);

  int shared_memory_size = sizeof(double) * polynomial_size;

--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
@@ -1,8 +1,6 @@
-#include "fast_packing_keyswitch.cuh"
 #include "keyswitch.cuh"
 #include "keyswitch.h"
 #include <cstdint>
-#include <stdio.h>

 /* Perform keyswitch on a batch of 32 bits input LWE ciphertexts.
 * Head out to the equivalent operation on 64 bits for more details.
@@ -55,17 +53,15 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(

 void scratch_packing_keyswitch_lwe_list_to_glwe_64(
    void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t num_lwes, bool allocate_gpu_memory) {
+    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
+    bool allocate_gpu_memory) {
  scratch_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
-      static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer, lwe_dimension,
+      static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer,
      glwe_dimension, polynomial_size, num_lwes, allocate_gpu_memory);
 }
-
 /* Perform functional packing keyswitch on a batch of 64 bits input LWE
 * ciphertexts.
 */
-
 void cuda_packing_keyswitch_lwe_list_to_glwe_64(
    void *stream, uint32_t gpu_index, void *glwe_array_out,
    void const *lwe_array_in, void const *fp_ksk_array, int8_t *fp_ks_buffer,
@@ -73,7 +69,7 @@ void cuda_packing_keyswitch_lwe_list_to_glwe_64(
    uint32_t output_polynomial_size, uint32_t base_log, uint32_t level_count,
    uint32_t num_lwes) {

-  host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
+  host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
      static_cast<cudaStream_t>(stream), gpu_index,
      static_cast<uint64_t *>(glwe_array_out),
      static_cast<const uint64_t *>(lwe_array_in),
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
@@ -45,19 +45,19 @@ keyswitch(Torus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
          const Torus *__restrict__ lwe_input_indexes,
          const Torus *__restrict__ ksk, uint32_t lwe_dimension_in,
          uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count) {
-  const int tid = threadIdx.x + blockIdx.y * blockDim.x;
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
  const int shmem_index = threadIdx.x + threadIdx.y * blockDim.x;

  extern __shared__ int8_t sharedmem[];
  Torus *lwe_acc_out = (Torus *)sharedmem;
  auto block_lwe_array_out = get_chunk(
-      lwe_array_out, lwe_output_indexes[blockIdx.x], lwe_dimension_out + 1);
+      lwe_array_out, lwe_output_indexes[blockIdx.y], lwe_dimension_out + 1);

  if (tid <= lwe_dimension_out) {

    Torus local_lwe_out = 0;
    auto block_lwe_array_in = get_chunk(
-        lwe_array_in, lwe_input_indexes[blockIdx.x], lwe_dimension_in + 1);
+        lwe_array_in, lwe_input_indexes[blockIdx.y], lwe_dimension_in + 1);

    if (tid == lwe_dimension_out && threadIdx.y == 0) {
      local_lwe_out = block_lwe_array_in[lwe_dimension_in];
@@ -105,22 +105,16 @@ __host__ void host_keyswitch_lwe_ciphertext_vector(
    uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
    uint32_t num_samples) {

-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);

  constexpr int num_threads_y = 32;
-  int num_blocks_per_sample, num_threads_x;
+  int num_blocks, num_threads_x;

  getNumBlocksAndThreads2D(lwe_dimension_out + 1, 512, num_threads_y,
-                           num_blocks_per_sample, num_threads_x);
+                           num_blocks, num_threads_x);

  int shared_mem = sizeof(Torus) * num_threads_y * num_threads_x;
-  if (num_blocks_per_sample > 65536)
-    PANIC("Cuda error (Keyswith): number of blocks per sample is too large");
-
-  // In multiplication of large integers (512, 1024, 2048), the number of
-  // samples can be larger than 65536, so we need to set it in the first
-  // dimension of the grid
-  dim3 grid(num_samples, num_blocks_per_sample, 1);
+  dim3 grid(num_blocks, num_samples, 1);
  dim3 threads(num_threads_x, num_threads_y, 1);

  keyswitch<Torus><<<grid, threads, shared_mem, stream>>>(
@@ -164,22 +158,16 @@ void execute_keyswitch_async(cudaStream_t const *streams,
 template <typename Torus>
 __host__ void scratch_packing_keyswitch_lwe_list_to_glwe(
    cudaStream_t stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t num_lwes, bool allocate_gpu_memory) {
-  cuda_set_device(gpu_index);
+    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
+    bool allocate_gpu_memory) {
+  cudaSetDevice(gpu_index);

  int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;

-  // allocate at least LWE-mask times two: to keep both decomposition state and
-  // decomposed intermediate value
-  int memory_unit = glwe_accumulator_size > lwe_dimension * 2
-                        ? glwe_accumulator_size
-                        : lwe_dimension * 2;
-
-  if (allocate_gpu_memory) {
+  if (allocate_gpu_memory)
    *fp_ks_buffer = (int8_t *)cuda_malloc_async(
-        2 * num_lwes * memory_unit * sizeof(Torus), stream, gpu_index);
-  }
+        2 * num_lwes * glwe_accumulator_size * sizeof(Torus), stream,
+        gpu_index);
 }

 // public functional packing keyswitch for a single LWE ciphertext
@@ -229,6 +217,43 @@ __device__ void packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext(
  }
 }

+// public functional packing keyswitch for a batch of LWE ciphertexts
+//
+// Selects the input each thread is working on using the y-block index.
+//
+// Assumes there are (glwe_dimension+1) * polynomial_size threads split through
+// different thread blocks at the x-axis to work on that input.
+template <typename Torus>
+__global__ void packing_keyswitch_lwe_list_to_glwe(
+    Torus *glwe_array_out, Torus const *lwe_array_in, Torus const *fp_ksk,
+    uint32_t lwe_dimension_in, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
+    Torus *d_mem) {
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
+
+  const int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
+  const int lwe_size = (lwe_dimension_in + 1);
+
+  const int input_id = blockIdx.y;
+  const int degree = input_id;
+
+  // Select an input
+  auto lwe_in = lwe_array_in + input_id * lwe_size;
+  auto ks_glwe_out = d_mem + input_id * glwe_accumulator_size;
+  auto glwe_out = glwe_array_out + input_id * glwe_accumulator_size;
+  // KS LWE to GLWE
+  packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext<Torus>(
+      ks_glwe_out, lwe_in, fp_ksk, lwe_dimension_in, glwe_dimension,
+      polynomial_size, base_log, level_count);
+
+  // P * x ^degree
+  auto in_poly = ks_glwe_out + (tid / polynomial_size) * polynomial_size;
+  auto out_result = glwe_out + (tid / polynomial_size) * polynomial_size;
+  polynomial_accumulate_monic_monomial_mul<Torus>(out_result, in_poly, degree,
+                                                  tid % polynomial_size,
+                                                  polynomial_size, 1, true);
+}
+
 /// To-do: Rewrite this kernel for efficiency
 template <typename Torus>
 __global__ void accumulate_glwes(Torus *glwe_out, Torus *glwe_array_in,
@@ -246,4 +271,42 @@ __global__ void accumulate_glwes(Torus *glwe_out, Torus *glwe_array_in,
  }
 }

+template <typename Torus>
+__host__ void host_packing_keyswitch_lwe_list_to_glwe(
+    cudaStream_t stream, uint32_t gpu_index, Torus *glwe_out,
+    Torus const *lwe_array_in, Torus const *fp_ksk_array, int8_t *fp_ks_buffer,
+    uint32_t lwe_dimension_in, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
+    uint32_t num_lwes) {
+
+  if (num_lwes > polynomial_size)
+    PANIC("Cuda error: too many LWEs to pack. The number of LWEs should be "
+          "smaller than "
+          "polynomial_size.")
+
+  cudaSetDevice(gpu_index);
+  int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
+
+  int num_blocks = 0, num_threads = 0;
+  getNumBlocksAndThreads(glwe_accumulator_size, 128, num_blocks, num_threads);
+
+  dim3 grid(num_blocks, num_lwes);
+  dim3 threads(num_threads);
+
+  auto d_mem = (Torus *)fp_ks_buffer;
+  auto d_tmp_glwe_array_out = d_mem + num_lwes * glwe_accumulator_size;
+
+  // individually keyswitch each lwe
+  packing_keyswitch_lwe_list_to_glwe<Torus><<<grid, threads, 0, stream>>>(
+      d_tmp_glwe_array_out, lwe_array_in, fp_ksk_array, lwe_dimension_in,
+      glwe_dimension, polynomial_size, base_log, level_count, d_mem);
+  check_cuda_error(cudaGetLastError());
+
+  // accumulate to a single glwe
+  accumulate_glwes<Torus><<<num_blocks, threads, 0, stream>>>(
+      glwe_out, d_tmp_glwe_array_out, glwe_dimension, polynomial_size,
+      num_lwes);
+  check_cuda_error(cudaGetLastError());
+}
+
 #endif
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
@@ -110,7 +110,7 @@ template <typename Torus>
 __host__ void host_modulus_switch_inplace(cudaStream_t stream,
                                          uint32_t gpu_index, Torus *array,
                                          int size, uint32_t log_modulus) {
-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);

  int num_threads = 0, num_blocks = 0;
  getNumBlocksAndThreads(size, 1024, num_blocks, num_threads);
--- a/backends/tfhe-cuda-backend/cuda/src/device.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/device.cu
@@ -2,12 +2,8 @@
 #include <cstdint>
 #include <cuda_runtime.h>

-void cuda_set_device(uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
-}
-
 cudaEvent_t cuda_create_event(uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  cudaEvent_t event;
  check_cuda_error(cudaEventCreate(&event));
  return event;
@@ -15,24 +11,24 @@ cudaEvent_t cuda_create_event(uint32_t gpu_index) {

 void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
                       uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaEventRecord(event, stream));
 }

 void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
                            uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaStreamWaitEvent(stream, event, 0));
 }

 void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaEventDestroy(event));
 }

 /// Unsafe function to create a CUDA stream, must check first that GPU exists
 cudaStream_t cuda_create_stream(uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  cudaStream_t stream;
  check_cuda_error(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
  return stream;
@@ -40,22 +36,15 @@ cudaStream_t cuda_create_stream(uint32_t gpu_index) {

 /// Unsafe function to destroy CUDA stream, must check first the GPU exists
 void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaStreamDestroy(stream));
 }

 void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaStreamSynchronize(stream));
 }

-void synchronize_streams(cudaStream_t const *streams,
-                         uint32_t const *gpu_indexes, uint32_t gpu_count) {
-  for (uint i = 0; i < gpu_count; i++) {
-    cuda_synchronize_stream(streams[i], gpu_indexes[i]);
-  }
-}
-
 // Determine if a CUDA device is available at runtime
 uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }

@@ -63,7 +52,7 @@ uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
 /// or if there's not enough memory. A safe wrapper around it must call
 /// cuda_check_valid_malloc() first
 void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  void *ptr;
  check_cuda_error(cudaMalloc((void **)&ptr, size));

@@ -74,7 +63,7 @@ void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
 /// asynchronously.
 void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
                        uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  void *ptr;

 #ifndef CUDART_VERSION
@@ -97,7 +86,7 @@ void *cuda_malloc_async(uint64_t size, cudaStream_t stream,

 /// Check that allocation is valid
 void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  size_t total_mem, free_mem;
  check_cuda_error(cudaMemGetInfo(&free_mem, &total_mem));
  if (size > free_mem) {
@@ -145,7 +134,7 @@ void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
    PANIC("Cuda error: invalid device pointer in async copy to GPU.")
  }

-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(
      cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream));
 }
@@ -165,7 +154,7 @@ void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
  if (attr_src.type != cudaMemoryTypeDevice) {
    PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
  }
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  if (attr_src.device == attr_dest.device) {
    check_cuda_error(
        cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice, stream));
@@ -190,7 +179,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,
  if (attr_src.type != cudaMemoryTypeDevice) {
    PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
  }
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  if (attr_src.device == attr_dest.device) {
    check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToDevice));
  } else {
@@ -201,7 +190,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,

 /// Synchronizes device
 void cuda_synchronize_device(uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaDeviceSynchronize());
 }

@@ -214,7 +203,7 @@ void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
  if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
    PANIC("Cuda error: invalid dest device pointer in cuda memset.")
  }
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaMemsetAsync(dest, val, size, stream));
 }

@@ -234,7 +223,7 @@ void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
    if (attr.type != cudaMemoryTypeDevice) {
      PANIC("Cuda error: invalid dest device pointer in cuda set value.")
    }
-    cuda_set_device(gpu_index);
+    check_cuda_error(cudaSetDevice(gpu_index));
    int block_size = 256;
    int num_blocks = (n + block_size - 1) / block_size;

@@ -264,7 +253,7 @@ void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
    PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
  }

-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(
      cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream));
 }
@@ -278,14 +267,14 @@ int cuda_get_number_of_gpus() {

 /// Drop a cuda array
 void cuda_drop(void *ptr, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaFree(ptr));
 }

 /// Drop a cuda array asynchronously, if supported on the device
 void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index) {

-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
 #ifndef CUDART_VERSION
 #error CUDART_VERSION Undefined!
 #elif (CUDART_VERSION >= 11020)
--- a/backends/tfhe-cuda-backend/cuda/src/integer/abs.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/abs.cu
@@ -22,14 +22,15 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(

 void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
-    void *const *bsks, void *const *ksks) {
+    void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks) {

  auto mem = (int_abs_buffer<uint64_t> *)mem_ptr;

  host_integer_abs_kb<uint64_t>((cudaStream_t *)(streams), gpu_indexes,
-                                gpu_count, ct, bsks, (uint64_t **)(ksks), mem,
-                                is_signed);
+                                gpu_count, static_cast<uint64_t *>(ct), bsks,
+                                (uint64_t **)(ksks), mem, is_signed,
+                                num_blocks);
 }

 void cleanup_cuda_integer_abs_inplace(void *const *streams,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/abs.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/abs.cuh
@@ -2,12 +2,15 @@
 #define TFHE_RS_ABS_CUH

 #include "crypto/keyswitch.cuh"
+#include "device.h"
 #include "integer/bitwise_ops.cuh"
 #include "integer/comparison.cuh"
 #include "integer/integer.cuh"
+#include "integer/integer_utilities.h"
 #include "integer/negation.cuh"
 #include "integer/scalar_shifts.cuh"
-#include "radix_ciphertext.cuh"
+#include "linear_algebra.h"
+#include "pbs/programmable_bootstrap.h"
 #include "utils/helper.cuh"
 #include "utils/kernel_dimensions.cuh"
 #include <fstream>
@@ -29,15 +32,16 @@ __host__ void scratch_cuda_integer_abs_kb(
 }

 template <typename Torus>
-__host__ void legacy_host_integer_abs_kb_async(
-    cudaStream_t const *streams, uint32_t const *gpu_indexes,
-    uint32_t gpu_count, Torus *ct, void *const *bsks, uint64_t *const *ksks,
-    int_abs_buffer<uint64_t> *mem_ptr, bool is_signed, uint32_t num_blocks) {
+__host__ void
+host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
+                    uint32_t gpu_count, Torus *ct, void *const *bsks,
+                    uint64_t *const *ksks, int_abs_buffer<uint64_t> *mem_ptr,
+                    bool is_signed, uint32_t num_blocks) {
  if (!is_signed)
    return;

  auto radix_params = mem_ptr->params;
-  auto mask = (Torus *)(mem_ptr->mask->ptr);
+  auto mask = mem_ptr->mask;

  auto big_lwe_dimension = radix_params.big_lwe_dimension;
  auto big_lwe_size = big_lwe_dimension + 1;
@@ -48,55 +52,20 @@ __host__ void legacy_host_integer_abs_kb_async(
  cuda_memcpy_async_gpu_to_gpu(mask, ct, num_blocks * big_lwe_size_bytes,
                               streams[0], gpu_indexes[0]);

-  legacy_host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
+  host_integer_radix_arithmetic_scalar_shift_kb_inplace(
      streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
      mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks, num_blocks);
-  legacy_host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
-                              radix_params.big_lwe_dimension, num_blocks);
+  host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
+                       radix_params.big_lwe_dimension, num_blocks);

  uint32_t requested_flag = outputFlag::FLAG_NONE;
  uint32_t uses_carry = 0;
-  legacy_host_propagate_single_carry<Torus>(
+  host_propagate_single_carry<Torus>(
      streams, gpu_indexes, gpu_count, ct, nullptr, nullptr, mem_ptr->scp_mem,
      bsks, ksks, num_blocks, requested_flag, uses_carry);

-  // legacy bitop
-  legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
-      streams, gpu_indexes, gpu_count, ct, mask, ct, bsks, ksks, num_blocks,
-      mem_ptr->bitxor_mem->lut, mem_ptr->bitxor_mem->params.message_modulus);
-}
-
-template <typename Torus>
-__host__ void
-host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
-                    uint32_t gpu_count, CudaRadixCiphertextFFI *ct,
-                    void *const *bsks, uint64_t *const *ksks,
-                    int_abs_buffer<uint64_t> *mem_ptr, bool is_signed) {
-  if (!is_signed)
-    return;
-
-  auto mask = mem_ptr->mask;
-
-  uint32_t num_bits_in_ciphertext =
-      (31 - __builtin_clz(mem_ptr->params.message_modulus)) *
-      ct->num_radix_blocks;
-
-  copy_radix_ciphertext_async<Torus>(streams[0], gpu_indexes[0], mask, ct);
-
-  host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
-      streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
-      mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks);
-  host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
-                       ct->num_radix_blocks);
-
-  uint32_t requested_flag = outputFlag::FLAG_NONE;
-  uint32_t uses_carry = 0;
-  host_propagate_single_carry<Torus>(streams, gpu_indexes, gpu_count, ct,
-                                     nullptr, nullptr, mem_ptr->scp_mem, bsks,
-                                     ksks, requested_flag, uses_carry);
-
-  host_integer_radix_bitop_kb<Torus>(streams, gpu_indexes, gpu_count, ct, mask,
-                                     ct, mem_ptr->bitxor_mem, bsks, ksks);
+  host_integer_radix_bitop_kb(streams, gpu_indexes, gpu_count, ct, mask, ct,
+                              mem_ptr->bitxor_mem, bsks, ksks, num_blocks);
 }

 #endif // TFHE_RS_ABS_CUH
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
@@ -22,15 +22,17 @@ void scratch_cuda_integer_radix_bitop_kb_64(

 void cuda_bitop_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_1,
-    CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks) {
+    void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
+    uint32_t lwe_ciphertext_count) {

  host_integer_radix_bitop_kb<uint64_t>(
-      (cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
-      lwe_array_1, lwe_array_2, (int_bitop_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)(ksks));
+      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
+      static_cast<uint64_t *>(lwe_array_out),
+      static_cast<const uint64_t *>(lwe_array_1),
+      static_cast<const uint64_t *>(lwe_array_2),
+      (int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
+      lwe_ciphertext_count);
 }

 void cleanup_cuda_integer_bitop(void *const *streams,
@@ -41,50 +43,3 @@ void cleanup_cuda_integer_bitop(void *const *streams,
      (int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
  mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
 }
-
-void update_degrees_after_bitand(uint64_t *output_degrees,
-                                 uint64_t *lwe_array_1_degrees,
-                                 uint64_t *lwe_array_2_degrees,
-                                 uint32_t num_radix_blocks) {
-  for (uint i = 0; i < num_radix_blocks; i++) {
-    output_degrees[i] =
-        std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
-  }
-}
-
-void update_degrees_after_bitor(uint64_t *output_degrees,
-                                uint64_t *lwe_array_1_degrees,
-                                uint64_t *lwe_array_2_degrees,
-                                uint32_t num_radix_blocks) {
-  for (uint i = 0; i < num_radix_blocks; i++) {
-    auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
-    auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
-    auto result = max;
-
-    for (uint j = 0; j < min + 1; j++) {
-      if (max | j > result) {
-        result = max | j;
-      }
-    }
-    output_degrees[i] = result;
-  }
-}
-
-void update_degrees_after_bitxor(uint64_t *output_degrees,
-                                 uint64_t *lwe_array_1_degrees,
-                                 uint64_t *lwe_array_2_degrees,
-                                 uint32_t num_radix_blocks) {
-  for (uint i = 0; i < num_radix_blocks; i++) {
-    auto max = std::max(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
-    auto min = std::min(lwe_array_1_degrees[i], lwe_array_2_degrees[i]);
-    auto result = max;
-
-    // Try every possibility to find the worst case
-    for (uint j = 0; j < min + 1; j++) {
-      if (max ^ j > result) {
-        result = max ^ j;
-      }
-    }
-    output_degrees[i] = result;
-  }
-}
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
@@ -14,34 +14,15 @@
 template <typename Torus>
 __host__ void host_integer_radix_bitop_kb(
    cudaStream_t const *streams, uint32_t const *gpu_indexes,
-    uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_1,
-    CudaRadixCiphertextFFI const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
-    void *const *bsks, Torus *const *ksks) {
+    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_1,
+    Torus const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
+    void *const *bsks, Torus *const *ksks, uint32_t num_radix_blocks) {

  auto lut = mem_ptr->lut;
-  uint64_t degrees[lwe_array_1->num_radix_blocks];
-  if (mem_ptr->op == BITOP_TYPE::BITAND) {
-    update_degrees_after_bitand(degrees, lwe_array_1->degrees,
-                                lwe_array_2->degrees,
-                                lwe_array_1->num_radix_blocks);
-  } else if (mem_ptr->op == BITOP_TYPE::BITOR) {
-    update_degrees_after_bitor(degrees, lwe_array_1->degrees,
-                               lwe_array_2->degrees,
-                               lwe_array_1->num_radix_blocks);
-  } else if (mem_ptr->op == BITXOR) {
-    update_degrees_after_bitxor(degrees, lwe_array_1->degrees,
-                                lwe_array_2->degrees,
-                                lwe_array_1->num_radix_blocks);
-  }

  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_1, lwe_array_2,
-      bsks, ksks, lut, lwe_array_out->num_radix_blocks,
-      lut->params.message_modulus);
-
-  memcpy(lwe_array_out->degrees, degrees,
-         lwe_array_out->num_radix_blocks * sizeof(uint64_t));
+      bsks, ksks, num_radix_blocks, lut, lut->params.message_modulus);
 }

 template <typename Torus>
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
@@ -25,16 +25,19 @@ void scratch_cuda_integer_radix_cmux_kb_64(

 void cuda_cmux_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_condition,
-    CudaRadixCiphertextFFI const *lwe_array_true,
-    CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks) {
+    void *lwe_array_out, void const *lwe_condition, void const *lwe_array_true,
+    void const *lwe_array_false, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t lwe_ciphertext_count) {

  host_integer_radix_cmux_kb<uint64_t>(
-      (cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
-      lwe_condition, lwe_array_true, lwe_array_false,
-      (int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks));
+      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
+      static_cast<uint64_t *>(lwe_array_out),
+      static_cast<const uint64_t *>(lwe_condition),
+      static_cast<const uint64_t *>(lwe_array_true),
+      static_cast<const uint64_t *>(lwe_array_false),
+      (int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
+
+      lwe_ciphertext_count);
 }

 void cleanup_cuda_integer_radix_cmux(void *const *streams,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
@@ -2,7 +2,6 @@
 #define CUDA_INTEGER_CMUX_CUH

 #include "integer.cuh"
-#include "radix_ciphertext.cuh"

 template <typename Torus>
 __host__ void zero_out_if(cudaStream_t const *streams,
@@ -12,25 +11,25 @@ __host__ void zero_out_if(cudaStream_t const *streams,
                          int_zero_out_if_buffer<Torus> *mem_ptr,
                          int_radix_lut<Torus> *predicate, void *const *bsks,
                          Torus *const *ksks, uint32_t num_radix_blocks) {
-  cuda_set_device(gpu_indexes[0]);
+  cudaSetDevice(gpu_indexes[0]);
  auto params = mem_ptr->params;

  // We can't use integer_radix_apply_bivariate_lookup_table_kb since the
  // second operand is not an array
  auto tmp_lwe_array_input = mem_ptr->tmp;
-  host_pack_bivariate_blocks_with_single_block<Torus>(
+  pack_bivariate_blocks_with_single_block<Torus>(
      streams, gpu_indexes, gpu_count, tmp_lwe_array_input,
      predicate->lwe_indexes_in, lwe_array_input, lwe_condition,
      predicate->lwe_indexes_in, params.big_lwe_dimension,
      params.message_modulus, num_radix_blocks);

-  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+  integer_radix_apply_univariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out, tmp_lwe_array_input, bsks,
      ksks, num_radix_blocks, predicate);
 }

 template <typename Torus>
-__host__ void legacy_host_integer_radix_cmux_kb(
+__host__ void host_integer_radix_cmux_kb(
    cudaStream_t const *streams, uint32_t const *gpu_indexes,
    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_condition,
    Torus const *lwe_array_true, Torus const *lwe_array_false,
@@ -38,90 +37,43 @@ __host__ void legacy_host_integer_radix_cmux_kb(
    uint32_t num_radix_blocks) {

  auto params = mem_ptr->params;
-  Torus lwe_size = params.big_lwe_dimension + 1;
-  Torus radix_lwe_size = lwe_size * num_radix_blocks;
-  cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in->ptr, lwe_array_true,
-                               radix_lwe_size * sizeof(Torus), streams[0],
-                               gpu_indexes[0]);
-  cuda_memcpy_async_gpu_to_gpu(
-      (Torus *)(mem_ptr->buffer_in->ptr) + radix_lwe_size, lwe_array_false,
-      radix_lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
-  for (uint i = 0; i < 2 * num_radix_blocks; i++) {
-    cuda_memcpy_async_gpu_to_gpu(
-        (Torus *)(mem_ptr->condition_array->ptr) + i * lwe_size, lwe_condition,
-        lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
+
+  // Since our CPU threads will be working on different streams we shall assert
+  // the work in the main stream is completed
+  auto true_streams = mem_ptr->zero_if_true_buffer->true_streams;
+  auto false_streams = mem_ptr->zero_if_false_buffer->false_streams;
+  for (uint j = 0; j < gpu_count; j++) {
+    cuda_synchronize_stream(streams[j], gpu_indexes[j]);
+  }
+
+  auto mem_true = mem_ptr->zero_if_true_buffer;
+  zero_out_if<Torus>(true_streams, gpu_indexes, gpu_count, mem_ptr->tmp_true_ct,
+                     lwe_array_true, lwe_condition, mem_true,
+                     mem_ptr->inverted_predicate_lut, bsks, ksks,
+                     num_radix_blocks);
+  auto mem_false = mem_ptr->zero_if_false_buffer;
+  zero_out_if<Torus>(false_streams, gpu_indexes, gpu_count,
+                     mem_ptr->tmp_false_ct, lwe_array_false, lwe_condition,
+                     mem_false, mem_ptr->predicate_lut, bsks, ksks,
+                     num_radix_blocks);
+  for (uint j = 0; j < mem_ptr->zero_if_true_buffer->active_gpu_count; j++) {
+    cuda_synchronize_stream(true_streams[j], gpu_indexes[j]);
+  }
+  for (uint j = 0; j < mem_ptr->zero_if_false_buffer->active_gpu_count; j++) {
+    cuda_synchronize_stream(false_streams[j], gpu_indexes[j]);
  }
-  legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
-      streams, gpu_indexes, gpu_count, (Torus *)(mem_ptr->buffer_out->ptr),
-      (Torus *)(mem_ptr->buffer_in->ptr),
-      (Torus *)(mem_ptr->condition_array->ptr), bsks, ksks,
-      2 * num_radix_blocks, mem_ptr->predicate_lut, params.message_modulus);

  // If the condition was true, true_ct will have kept its value and false_ct
  // will be 0 If the condition was false, true_ct will be 0 and false_ct will
  // have kept its value
-  auto mem_true = (Torus *)(mem_ptr->buffer_out->ptr);
-  auto ptr = (Torus *)mem_ptr->buffer_out->ptr;
-  auto mem_false = &ptr[radix_lwe_size];
-  auto added_cts = mem_true;
-  legacy_host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
-                              mem_false, params.big_lwe_dimension,
-                              num_radix_blocks);
-
-  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
-      num_radix_blocks, mem_ptr->message_extract_lut);
-}
-
-template <typename Torus>
-__host__ void host_integer_radix_cmux_kb(
-    cudaStream_t const *streams, uint32_t const *gpu_indexes,
-    uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_condition,
-    CudaRadixCiphertextFFI const *lwe_array_true,
-    CudaRadixCiphertextFFI const *lwe_array_false,
-    int_cmux_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks) {
-
-  if (lwe_array_out->num_radix_blocks != lwe_array_true->num_radix_blocks)
-    PANIC("Cuda error: input and output num radix blocks must be the same")
-  if (lwe_array_out->num_radix_blocks != lwe_array_false->num_radix_blocks)
-    PANIC("Cuda error: input and output num radix blocks must be the same")
-
-  auto num_radix_blocks = lwe_array_out->num_radix_blocks;
-  auto params = mem_ptr->params;
-  Torus lwe_size = params.big_lwe_dimension + 1;
-  copy_radix_ciphertext_slice_async<Torus>(
-      streams[0], gpu_indexes[0], mem_ptr->buffer_in, 0, num_radix_blocks,
-      lwe_array_true, 0, num_radix_blocks);
-  copy_radix_ciphertext_slice_async<Torus>(
-      streams[0], gpu_indexes[0], mem_ptr->buffer_in, num_radix_blocks,
-      2 * num_radix_blocks, lwe_array_false, 0, num_radix_blocks);
-  for (uint i = 0; i < 2 * num_radix_blocks; i++) {
-    copy_radix_ciphertext_slice_async<Torus>(streams[0], gpu_indexes[0],
-                                             mem_ptr->condition_array, i, i + 1,
-                                             lwe_condition, 0, 1);
-  }
-  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
-      streams, gpu_indexes, gpu_count, mem_ptr->buffer_out, mem_ptr->buffer_in,
-      mem_ptr->condition_array, bsks, ksks, mem_ptr->predicate_lut,
-      2 * num_radix_blocks, params.message_modulus);
-
-  // If the condition was true, true_ct will have kept its value and false_ct
-  // will be 0 If the condition was false, true_ct will be 0 and false_ct will
-  // have kept its value
-  CudaRadixCiphertextFFI mem_true;
-  CudaRadixCiphertextFFI mem_false;
-  as_radix_ciphertext_slice<Torus>(&mem_true, mem_ptr->buffer_out, 0,
-                                   num_radix_blocks);
-  as_radix_ciphertext_slice<Torus>(&mem_false, mem_ptr->buffer_out,
-                                   num_radix_blocks, 2 * num_radix_blocks);
-
-  host_addition<Torus>(streams[0], gpu_indexes[0], &mem_true, &mem_true,
-                       &mem_false, num_radix_blocks);
+  auto added_cts = mem_ptr->tmp_true_ct;
+  host_addition<Torus>(streams[0], gpu_indexes[0], added_cts,
+                       mem_ptr->tmp_true_ct, mem_ptr->tmp_false_ct,
+                       params.big_lwe_dimension, num_radix_blocks);

  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      streams, gpu_indexes, gpu_count, lwe_array_out, &mem_true, bsks, ksks,
-      mem_ptr->message_extract_lut, num_radix_blocks);
+      streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
+      num_radix_blocks, mem_ptr->message_extract_lut);
 }

 template <typename Torus>
--- a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
@@ -58,9 +58,6 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
  case GE:
  case LT:
  case LE:
-    if (num_radix_blocks % 2 != 0)
-      PANIC("Cuda error (comparisons): the number of radix blocks has to be "
-            "even.")
    host_integer_radix_difference_check_kb<uint64_t>(
        (cudaStream_t *)(streams), gpu_indexes, gpu_count,
        static_cast<uint64_t *>(lwe_array_out),
@@ -71,8 +68,6 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
    break;
  case MAX:
  case MIN:
-    if (num_radix_blocks % 2 != 0)
-      PANIC("Cuda error (max/min): the number of radix blocks has to be even.")
    host_integer_radix_maxmin_kb<uint64_t>(
        (cudaStream_t *)(streams), gpu_indexes, gpu_count,
        static_cast<uint64_t *>(lwe_array_out),
@@ -94,91 +89,3 @@ void cleanup_cuda_integer_comparison(void *const *streams,
      (int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
  mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
 }
-
-void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
-    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
-    uint32_t grouping_factor, uint32_t num_radix_blocks,
-    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory) {
-
-  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
-                          big_lwe_dimension, small_lwe_dimension, ks_level,
-                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
-                          message_modulus, carry_modulus);
-
-  scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
-      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
-      (int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
-      false, allocate_gpu_memory);
-}
-
-void cuda_integer_are_all_comparisons_block_true_kb_64(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
-
-  int_comparison_buffer<uint64_t> *buffer =
-      (int_comparison_buffer<uint64_t> *)mem_ptr;
-
-  host_integer_are_all_comparisons_block_true_kb<uint64_t>(
-      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
-      static_cast<uint64_t *>(lwe_array_out),
-      static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
-      (uint64_t **)(ksks), num_radix_blocks);
-}
-
-void cleanup_cuda_integer_are_all_comparisons_block_true(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    int8_t **mem_ptr_void) {
-
-  int_comparison_buffer<uint64_t> *mem_ptr =
-      (int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
-  mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
-}
-
-void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
-    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
-    uint32_t grouping_factor, uint32_t num_radix_blocks,
-    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory) {
-
-  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
-                          big_lwe_dimension, small_lwe_dimension, ks_level,
-                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
-                          message_modulus, carry_modulus);
-
-  scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
-      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
-      (int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
-      false, allocate_gpu_memory);
-}
-
-void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
-
-  int_comparison_buffer<uint64_t> *buffer =
-      (int_comparison_buffer<uint64_t> *)mem_ptr;
-
-  host_integer_is_at_least_one_comparisons_block_true_kb<uint64_t>(
-      (cudaStream_t *)(streams), gpu_indexes, gpu_count,
-      static_cast<uint64_t *>(lwe_array_out),
-      static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
-      (uint64_t **)(ksks), num_radix_blocks);
-}
-
-void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    int8_t **mem_ptr_void) {
-
-  int_comparison_buffer<uint64_t> *mem_ptr =
-      (int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
-  mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
-}
--- a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh
@@ -38,7 +38,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
                                    uint32_t lwe_dimension,
                                    uint32_t num_radix_blocks) {

-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);
  int num_blocks = 0, num_threads = 0;
  int num_entries = (lwe_dimension + 1);
  getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
@@ -58,7 +58,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
 template <typename Torus>
 __host__ void are_all_comparisons_block_true(
    cudaStream_t const *streams, uint32_t const *gpu_indexes,
-    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
+    uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
    Torus *const *ksks, uint32_t num_radix_blocks) {

@@ -85,19 +85,16 @@ __host__ void are_all_comparisons_block_true(

  while (remaining_blocks > 0) {
    // Split in max_value chunks
-    int num_chunks = (remaining_blocks + max_value - 1) / max_value;
+    uint32_t chunk_length = std::min(max_value, remaining_blocks);
+    int num_chunks = remaining_blocks / chunk_length;

    // Since all blocks encrypt either 0 or 1, we can sum max_value of them
    // as in the worst case we will be adding `max_value` ones
    auto input_blocks = tmp_out;
    auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
-    auto is_max_value_lut = are_all_block_true_buffer->is_max_value;
-    uint32_t chunk_lengths[num_chunks];
-    auto begin_remaining_blocks = remaining_blocks;
+    auto is_equal_to_num_blocks_map =
+        &are_all_block_true_buffer->is_equal_to_lut_map;
    for (int i = 0; i < num_chunks; i++) {
-      uint32_t chunk_length =
-          std::min(max_value, begin_remaining_blocks - i * max_value);
-      chunk_lengths[i] = chunk_length;
      accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
                                   input_blocks, big_lwe_dimension,
                                   chunk_length);
@@ -114,46 +111,40 @@ __host__ void are_all_comparisons_block_true(
      // is_non_zero_lut_buffer LUT
      lut = mem_ptr->eq_buffer->is_non_zero_lut;
    } else {
-      if (chunk_lengths[num_chunks - 1] != max_value) {
+      if ((*is_equal_to_num_blocks_map).find(chunk_length) !=
+          (*is_equal_to_num_blocks_map).end()) {
+        // The LUT is already computed
+        lut = (*is_equal_to_num_blocks_map)[chunk_length];
+      } else {
        // LUT needs to be computed
-        uint32_t chunk_length = chunk_lengths[num_chunks - 1];
+        auto new_lut =
+            new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
+                                     max_value, num_radix_blocks, true);
+
        auto is_equal_to_num_blocks_lut_f = [chunk_length](Torus x) -> Torus {
          return x == chunk_length;
        };
        generate_device_accumulator<Torus>(
-            streams[0], gpu_indexes[0], is_max_value_lut->get_lut(0, 1),
-            is_max_value_lut->get_degree(1),
-            is_max_value_lut->get_max_degree(1), glwe_dimension,
-            polynomial_size, message_modulus, carry_modulus,
+            streams[0], gpu_indexes[0], new_lut->get_lut(gpu_indexes[0], 0),
+            glwe_dimension, polynomial_size, message_modulus, carry_modulus,
            is_equal_to_num_blocks_lut_f);

-        Torus *h_lut_indexes = (Torus *)malloc(num_chunks * sizeof(Torus));
-        for (int index = 0; index < num_chunks; index++) {
-          if (index == num_chunks - 1) {
-            h_lut_indexes[index] = 1;
-          } else {
-            h_lut_indexes[index] = 0;
-          }
-        }
-        cuda_memcpy_async_to_gpu(is_max_value_lut->get_lut_indexes(0, 0),
-                                 h_lut_indexes, num_chunks * sizeof(Torus),
-                                 streams[0], gpu_indexes[0]);
-        is_max_value_lut->broadcast_lut(streams, gpu_indexes, 0);
-        cuda_synchronize_stream(streams[0], gpu_indexes[0]);
-        free(h_lut_indexes);
+        new_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
+
+        (*is_equal_to_num_blocks_map)[chunk_length] = new_lut;
+        lut = new_lut;
      }
-      lut = is_max_value_lut;
    }

    // Applies the LUT
    if (remaining_blocks == 1) {
      // In the last iteration we copy the output to the final address
-      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
          ksks, 1, lut);
      return;
    } else {
-      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, tmp_out, accumulator, bsks, ksks,
          num_chunks, lut);
    }
@@ -169,7 +160,7 @@ __host__ void are_all_comparisons_block_true(
 template <typename Torus>
 __host__ void is_at_least_one_comparisons_block_true(
    cudaStream_t const *streams, uint32_t const *gpu_indexes,
-    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
+    uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
    Torus *const *ksks, uint32_t num_radix_blocks) {

@@ -191,18 +182,14 @@ __host__ void is_at_least_one_comparisons_block_true(
  uint32_t remaining_blocks = num_radix_blocks;
  while (remaining_blocks > 0) {
    // Split in max_value chunks
-    int num_chunks = (remaining_blocks + max_value - 1) / max_value;
+    uint32_t chunk_length = std::min(max_value, remaining_blocks);
+    int num_chunks = remaining_blocks / chunk_length;

    // Since all blocks encrypt either 0 or 1, we can sum max_value of them
    // as in the worst case we will be adding `max_value` ones
    auto input_blocks = mem_ptr->tmp_lwe_array_out;
    auto accumulator = buffer->tmp_block_accumulated;
-    uint32_t chunk_lengths[num_chunks];
-    auto begin_remaining_blocks = remaining_blocks;
    for (int i = 0; i < num_chunks; i++) {
-      uint32_t chunk_length =
-          std::min(max_value, begin_remaining_blocks - i * max_value);
-      chunk_lengths[i] = chunk_length;
      accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
                                   input_blocks, big_lwe_dimension,
                                   chunk_length);
@@ -219,12 +206,12 @@ __host__ void is_at_least_one_comparisons_block_true(
    // Applies the LUT
    if (remaining_blocks == 1) {
      // In the last iteration we copy the output to the final address
-      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
          ksks, 1, lut);
      return;
    } else {
-      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, mem_ptr->tmp_lwe_array_out,
          accumulator, bsks, ksks, num_chunks, lut);
    }
@@ -305,7 +292,7 @@ __host__ void host_compare_with_zero_equality(
    }
  }

-  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+  integer_radix_apply_univariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, sum, sum, bsks, ksks, num_sum_blocks,
      zero_comparison);
  are_all_comparisons_block_true<Torus>(streams, gpu_indexes, gpu_count,
@@ -324,7 +311,7 @@ __host__ void host_integer_radix_equality_check_kb(

  // Applies the LUT for the comparison operation
  auto comparisons = mem_ptr->tmp_block_comparisons;
-  legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, comparisons, lwe_array_1, lwe_array_2,
      bsks, ksks, num_radix_blocks, eq_buffer->operator_lut,
      eq_buffer->operator_lut->params.message_modulus);
@@ -371,14 +358,14 @@ __host__ void compare_radix_blocks_kb(

  // Apply LUT to compare to 0
  auto is_non_zero_lut = mem_ptr->eq_buffer->is_non_zero_lut;
-  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+  integer_radix_apply_univariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_out, bsks, ksks,
      num_radix_blocks, is_non_zero_lut);

  // Add one
  // Here Lhs can have the following values: (-1) % (message modulus * carry
  // modulus), 0, 1 So the output values after the addition will be: 0, 1, 2
-  legacy_host_integer_radix_add_scalar_one_inplace<Torus>(
+  host_integer_radix_add_scalar_one_inplace<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out, big_lwe_dimension,
      num_radix_blocks, message_modulus, carry_modulus);
 }
@@ -422,7 +409,7 @@ __host__ void tree_sign_reduction(
    pack_blocks<Torus>(streams[0], gpu_indexes[0], y, x, big_lwe_dimension,
                       partial_block_count, 4);

-    legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        streams, gpu_indexes, gpu_count, x, y, bsks, ksks,
        partial_block_count >> 1, inner_tree_leaf);

@@ -462,13 +449,12 @@ __host__ void tree_sign_reduction(
    f = sign_handler_f;
  }
  generate_device_accumulator<Torus>(
-      streams[0], gpu_indexes[0], last_lut->get_lut(0, 0),
-      last_lut->get_degree(0), last_lut->get_max_degree(0), glwe_dimension,
-      polynomial_size, message_modulus, carry_modulus, f);
-  last_lut->broadcast_lut(streams, gpu_indexes, 0);
+      streams[0], gpu_indexes[0], last_lut->get_lut(gpu_indexes[0], 0),
+      glwe_dimension, polynomial_size, message_modulus, carry_modulus, f);
+  last_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);

  // Last leaf
-  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+  integer_radix_apply_univariate_lookup_table_kb<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out, y, bsks, ksks, 1,
      last_lut);
 }
@@ -495,9 +481,8 @@ __host__ void host_integer_radix_difference_check_kb(
  if (carry_modulus >= message_modulus) {
    // Packing is possible
    // Pack inputs
-    Torus *packed_left = diff_buffer->tmp_packed;
-    Torus *packed_right =
-        diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
+    Torus *packed_left = diff_buffer->tmp_packed_left;
+    Torus *packed_right = diff_buffer->tmp_packed_right;
    // In case the ciphertext is signed, the sign block and the one before it
    // are handled separately
    if (mem_ptr->is_signed) {
@@ -514,9 +499,12 @@ __host__ void host_integer_radix_difference_check_kb(

    // Clean noise
    auto identity_lut = mem_ptr->identity_lut;
-    legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
-        2 * packed_num_radix_blocks, identity_lut);
+        packed_num_radix_blocks, identity_lut);
+    integer_radix_apply_univariate_lookup_table_kb<Torus>(
+        streams, gpu_indexes, gpu_count, packed_right, packed_right, bsks, ksks,
+        packed_num_radix_blocks, identity_lut);

    lhs = packed_left;
    rhs = packed_right;
@@ -545,18 +533,16 @@ __host__ void host_integer_radix_difference_check_kb(

      // Compare the last block before the sign block separately
      auto identity_lut = mem_ptr->identity_lut;
-      Torus *packed_left = diff_buffer->tmp_packed;
-      Torus *packed_right =
-          diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
      Torus *last_left_block_before_sign_block =
-          packed_left + packed_num_radix_blocks * big_lwe_size;
+          diff_buffer->tmp_packed_left + packed_num_radix_blocks * big_lwe_size;
      Torus *last_right_block_before_sign_block =
-          packed_right + packed_num_radix_blocks * big_lwe_size;
-      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+          diff_buffer->tmp_packed_right +
+          packed_num_radix_blocks * big_lwe_size;
+      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
          lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
          identity_lut);
-      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, last_right_block_before_sign_block,
          lwe_array_right + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks,
          1, identity_lut);
@@ -566,7 +552,7 @@ __host__ void host_integer_radix_difference_check_kb(
          last_left_block_before_sign_block, last_right_block_before_sign_block,
          mem_ptr, bsks, ksks, 1);
      // Compare the sign block separately
-      legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+      integer_radix_apply_bivariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count,
          comparisons + (packed_num_radix_blocks + 1) * big_lwe_size,
          lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
@@ -579,7 +565,7 @@ __host__ void host_integer_radix_difference_check_kb(
          streams, gpu_indexes, gpu_count, comparisons, lwe_array_left,
          lwe_array_right, mem_ptr, bsks, ksks, num_radix_blocks - 1);
      // Compare the sign block separately
-      legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+      integer_radix_apply_bivariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count,
          comparisons + (num_radix_blocks - 1) * big_lwe_size,
          lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
@@ -623,41 +609,10 @@ __host__ void host_integer_radix_maxmin_kb(
      ksks, total_num_radix_blocks);

  // Selector
-  legacy_host_integer_radix_cmux_kb<Torus>(
+  host_integer_radix_cmux_kb<Torus>(
      streams, gpu_indexes, gpu_count, lwe_array_out,
      mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right,
      mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);
 }

-template <typename Torus>
-__host__ void host_integer_are_all_comparisons_block_true_kb(
-    cudaStream_t const *streams, uint32_t const *gpu_indexes,
-    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
-    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks, uint32_t num_radix_blocks) {
-
-  auto eq_buffer = mem_ptr->eq_buffer;
-
-  // It returns a block encrypting 1 if all input blocks are 1
-  // otherwise the block encrypts 0
-  are_all_comparisons_block_true<Torus>(streams, gpu_indexes, gpu_count,
-                                        lwe_array_out, lwe_array_in, mem_ptr,
-                                        bsks, ksks, num_radix_blocks);
-}
-
-template <typename Torus>
-__host__ void host_integer_is_at_least_one_comparisons_block_true_kb(
-    cudaStream_t const *streams, uint32_t const *gpu_indexes,
-    uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
-    int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
-    Torus *const *ksks, uint32_t num_radix_blocks) {
-
-  auto eq_buffer = mem_ptr->eq_buffer;
-
-  // It returns a block encrypting 1 if all input blocks are 1
-  // otherwise the block encrypts 0
-  is_at_least_one_comparisons_block_true<Torus>(
-      streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_in, mem_ptr,
-      bsks, ksks, num_radix_blocks);
-}
 #endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh
@@ -2,7 +2,6 @@
 #define CUDA_INTEGER_COMPRESSION_CUH

 #include "ciphertext.h"
-#include "crypto/fast_packing_keyswitch.cuh"
 #include "crypto/keyswitch.cuh"
 #include "device.h"
 #include "integer/compression/compression.h"
@@ -50,7 +49,7 @@ __host__ void host_pack(cudaStream_t stream, uint32_t gpu_index,
  if (array_in == array_out)
    PANIC("Cuda error: Input and output must be different");

-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);
  auto compression_params = mem_ptr->compression_params;

  auto log_modulus = mem_ptr->storage_log_modulus;
@@ -117,7 +116,7 @@ host_integer_compress(cudaStream_t const *streams, uint32_t const *gpu_indexes,
  while (rem_lwes > 0) {
    auto chunk_size = min(rem_lwes, mem_ptr->lwe_per_glwe);

-    host_fast_packing_keyswitch_lwe_list_to_glwe<Torus, ulonglong4>(
+    host_packing_keyswitch_lwe_list_to_glwe<Torus>(
        streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
        fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
        compression_params.polynomial_size, compression_params.ks_base_log,
@@ -185,7 +184,7 @@ __host__ void host_extract(cudaStream_t stream, uint32_t gpu_index,
  if (array_in == glwe_array_out)
    PANIC("Cuda error: Input and output must be different");

-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);

  auto compression_params = mem_ptr->compression_params;

@@ -301,7 +300,7 @@ __host__ void host_integer_decompress(
  /// Apply PBS to apply a LUT, reduce the noise and go from a small LWE
  /// dimension to a big LWE dimension
  auto encryption_params = h_mem_ptr->encryption_params;
-  auto lut = h_mem_ptr->decompression_rescale_lut;
+  auto lut = h_mem_ptr->carry_extract_lut;
  auto active_gpu_count = get_active_gpu_count(num_radix_blocks, gpu_count);
  if (active_gpu_count == 1) {
    execute_pbs_async<Torus>(
--- a/backends/tfhe-cuda-backend/cuda/src/integer/div_rem.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/div_rem.cuh
@@ -285,7 +285,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
      // Shift the mask so that we will only keep bits we should
      uint32_t shifted_mask = full_message_mask >> shift_amount;

-      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, interesting_divisor.last_block(),
          interesting_divisor.last_block(), bsks, ksks, 1,
          mem_ptr->masking_luts_1[shifted_mask]);
@@ -314,7 +314,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
      // the estimated degree of the output is < msg_modulus
      shifted_mask = shifted_mask & full_message_mask;

-      legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, divisor_ms_blocks.first_block(),
          divisor_ms_blocks.first_block(), bsks, ksks, 1,
          mem_ptr->masking_luts_2[shifted_mask]);
@@ -339,7 +339,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
      interesting_remainder1.insert(0, numerator_block_1.first_block(),
                                    streams[0], gpu_indexes[0]);

-      legacy_host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
+      host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
          streams, gpu_indexes, gpu_count, interesting_remainder1.data, 1,
          mem_ptr->shift_mem_1, bsks, ksks, interesting_remainder1.len);

@@ -347,7 +347,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
                           interesting_remainder1.len - 1, streams[0],
                           gpu_indexes[0]);

-      legacy_host_radix_blocks_rotate_left<Torus>(
+      host_radix_blocks_rotate_left<Torus>(
          streams, gpu_indexes, gpu_count, interesting_remainder1.data,
          tmp_radix.data, 1, interesting_remainder1.len, big_lwe_size);

@@ -369,7 +369,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
    auto left_shift_interesting_remainder2 = [&](cudaStream_t const *streams,
                                                 uint32_t const *gpu_indexes,
                                                 uint32_t gpu_count) {
-      legacy_host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
+      host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
          streams, gpu_indexes, gpu_count, interesting_remainder2.data, 1,
          mem_ptr->shift_mem_2, bsks, ksks, interesting_remainder2.len);
    }; // left_shift_interesting_remainder2
@@ -402,7 +402,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
    // but in that position, interesting_remainder2 always has a 0
    auto &merged_interesting_remainder = interesting_remainder1;

-    legacy_host_addition<Torus>(
+    host_addition<Torus>(
        streams[0], gpu_indexes[0], merged_interesting_remainder.data,
        merged_interesting_remainder.data, interesting_remainder2.data,
        radix_params.big_lwe_dimension, merged_interesting_remainder.len);
@@ -437,7 +437,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
      mem_ptr->overflow_sub_mem->update_lut_indexes(
          streams, gpu_indexes, first_indexes, second_indexes, scalar_indexes,
          merged_interesting_remainder.len);
-      legacy_host_integer_overflowing_sub<uint64_t>(
+      host_integer_overflowing_sub<uint64_t>(
          streams, gpu_indexes, gpu_count, new_remainder.data,
          (uint64_t *)merged_interesting_remainder.data,
          interesting_divisor.data, subtraction_overflowed.data,
@@ -481,7 +481,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
    auto create_clean_version_of_merged_remainder =
        [&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
            uint32_t gpu_count) {
-          legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+          integer_radix_apply_univariate_lookup_table_kb<Torus>(
              streams, gpu_indexes, gpu_count,
              cleaned_merged_interesting_remainder.data,
              cleaned_merged_interesting_remainder.data, bsks, ksks,
@@ -507,10 +507,10 @@ __host__ void host_unsigned_integer_div_rem_kb(
      cuda_synchronize_stream(mem_ptr->sub_streams_3[j], gpu_indexes[j]);
    }

-    legacy_host_addition<Torus>(streams[0], gpu_indexes[0], overflow_sum.data,
-                                subtraction_overflowed.data,
-                                at_least_one_upper_block_is_non_zero.data,
-                                radix_params.big_lwe_dimension, 1);
+    host_addition<Torus>(streams[0], gpu_indexes[0], overflow_sum.data,
+                         subtraction_overflowed.data,
+                         at_least_one_upper_block_is_non_zero.data,
+                         radix_params.big_lwe_dimension, 1);

    int factor = (i) ? 3 : 2;
    int factor_lut_id = factor - 2;
@@ -521,7 +521,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
    auto conditionally_zero_out_merged_interesting_remainder =
        [&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
            uint32_t gpu_count) {
-          legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+          integer_radix_apply_bivariate_lookup_table_kb<Torus>(
              streams, gpu_indexes, gpu_count,
              cleaned_merged_interesting_remainder.data,
              cleaned_merged_interesting_remainder.data,
@@ -534,7 +534,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
    auto conditionally_zero_out_merged_new_remainder =
        [&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
            uint32_t gpu_count) {
-          legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+          integer_radix_apply_bivariate_lookup_table_kb<Torus>(
              streams, gpu_indexes, gpu_count, new_remainder.data,
              new_remainder.data, overflow_sum_radix.data, bsks, ksks,
              new_remainder.len,
@@ -544,7 +544,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
    auto set_quotient_bit = [&](cudaStream_t const *streams,
                                uint32_t const *gpu_indexes,
                                uint32_t gpu_count) {
-      legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+      integer_radix_apply_bivariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, did_not_overflow.data,
          subtraction_overflowed.data,
          at_least_one_upper_block_is_non_zero.data, bsks, ksks, 1,
@@ -552,7 +552,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
          mem_ptr->merge_overflow_flags_luts[pos_in_block]
              ->params.message_modulus);

-      legacy_host_addition<Torus>(
+      host_addition<Torus>(
          streams[0], gpu_indexes[0], &quotient[block_of_bit * big_lwe_size],
          &quotient[block_of_bit * big_lwe_size], did_not_overflow.data,
          radix_params.big_lwe_dimension, 1);
@@ -588,17 +588,17 @@ __host__ void host_unsigned_integer_div_rem_kb(

  // Clean the quotient and remainder
  // as even though they have no carries, they are not at nominal noise level
-  legacy_host_addition<Torus>(streams[0], gpu_indexes[0], remainder,
-                              remainder1.data, remainder2.data,
-                              radix_params.big_lwe_dimension, remainder1.len);
+  host_addition<Torus>(streams[0], gpu_indexes[0], remainder, remainder1.data,
+                       remainder2.data, radix_params.big_lwe_dimension,
+                       remainder1.len);

  for (uint j = 0; j < gpu_count; j++) {
    cuda_synchronize_stream(streams[j], gpu_indexes[j]);
  }
-  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+  integer_radix_apply_univariate_lookup_table_kb<Torus>(
      mem_ptr->sub_streams_1, gpu_indexes, gpu_count, remainder, remainder,
      bsks, ksks, num_blocks, mem_ptr->message_extract_lut_1);
-  legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
+  integer_radix_apply_univariate_lookup_table_kb<Torus>(
      mem_ptr->sub_streams_2, gpu_indexes, gpu_count, quotient, quotient, bsks,
      ksks, num_blocks, mem_ptr->message_extract_lut_2);
  for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
@@ -636,14 +636,12 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
      cuda_synchronize_stream(streams[j], gpu_indexes[j]);
    }

-    legacy_host_integer_abs_kb_async<Torus>(
-        int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
-        positive_numerator.data, bsks, ksks, int_mem_ptr->abs_mem_1, true,
-        num_blocks);
-    legacy_host_integer_abs_kb_async<Torus>(
-        int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
-        positive_divisor.data, bsks, ksks, int_mem_ptr->abs_mem_2, true,
-        num_blocks);
+    host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_1, gpu_indexes,
+                               gpu_count, positive_numerator.data, bsks, ksks,
+                               int_mem_ptr->abs_mem_1, true, num_blocks);
+    host_integer_abs_kb<Torus>(int_mem_ptr->sub_streams_2, gpu_indexes,
+                               gpu_count, positive_divisor.data, bsks, ksks,
+                               int_mem_ptr->abs_mem_2, true, num_blocks);
    for (uint j = 0; j < int_mem_ptr->active_gpu_count; j++) {
      cuda_synchronize_stream(int_mem_ptr->sub_streams_1[j], gpu_indexes[j]);
      cuda_synchronize_stream(int_mem_ptr->sub_streams_2[j], gpu_indexes[j]);
@@ -654,7 +652,7 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
        positive_numerator.data, positive_divisor.data, bsks, ksks,
        int_mem_ptr->unsigned_mem, num_blocks);

-    legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+    integer_radix_apply_bivariate_lookup_table_kb<Torus>(
        int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
        int_mem_ptr->sign_bits_are_different,
        &numerator[big_lwe_size * (num_blocks - 1)],
@@ -667,36 +665,36 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
      cuda_synchronize_stream(int_mem_ptr->sub_streams_2[j], gpu_indexes[j]);
    }

-    legacy_host_integer_radix_negation(
+    host_integer_radix_negation(
        int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
        int_mem_ptr->negated_quotient, quotient, radix_params.big_lwe_dimension,
        num_blocks, radix_params.message_modulus, radix_params.carry_modulus);

    uint32_t requested_flag = outputFlag::FLAG_NONE;
    uint32_t uses_carry = 0;
-    legacy_host_propagate_single_carry<Torus>(
+    host_propagate_single_carry<Torus>(
        int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
        int_mem_ptr->negated_quotient, nullptr, nullptr, int_mem_ptr->scp_mem_1,
        bsks, ksks, num_blocks, requested_flag, uses_carry);

-    legacy_host_integer_radix_negation(
-        int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
-        int_mem_ptr->negated_remainder, remainder,
-        radix_params.big_lwe_dimension, num_blocks,
-        radix_params.message_modulus, radix_params.carry_modulus);
+    host_integer_radix_negation(int_mem_ptr->sub_streams_2, gpu_indexes,
+                                gpu_count, int_mem_ptr->negated_remainder,
+                                remainder, radix_params.big_lwe_dimension,
+                                num_blocks, radix_params.message_modulus,
+                                radix_params.carry_modulus);

-    legacy_host_propagate_single_carry<Torus>(
+    host_propagate_single_carry<Torus>(
        int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
        int_mem_ptr->negated_remainder, nullptr, nullptr,
        int_mem_ptr->scp_mem_2, bsks, ksks, num_blocks, requested_flag,
        uses_carry);

-    legacy_host_integer_radix_cmux_kb<Torus>(
+    host_integer_radix_cmux_kb<Torus>(
        int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count, quotient,
        int_mem_ptr->sign_bits_are_different, int_mem_ptr->negated_quotient,
        quotient, int_mem_ptr->cmux_quotient_mem, bsks, ksks, num_blocks);

-    legacy_host_integer_radix_cmux_kb<Torus>(
+    host_integer_radix_cmux_kb<Torus>(
        int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count, remainder,
        &numerator[big_lwe_size * (num_blocks - 1)],
        int_mem_ptr->negated_remainder, remainder,
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Arthur Meyre	8378ce7d44	clippy bug no span for large array on stack	2024-12-03 11:00:31 +01:00
Arthur Meyre	e0111f6dd1	chore(ci): toolchain update	2024-12-02 16:34:19 +01:00
Arthur Meyre	51fd605a0c	chore: update dependencies	2024-12-02 11:12:46 +01:00