fix(mockup) Fix cast in mockup register translation

Value must be view as u32 before the shift. Otherwise every values with more than 2 NttRdxCut introduce a mismatch with the backend.
fix(Hpu) Fix Pbs level ordering in BSK shuffle/Unshuffle
2026-04-28 03:01:21 -04:00 · 2025-01-21 16:17:55 +01:00 · 2025-01-17 14:51:35 +01:00 · 2025-01-16 17:31:35 +01:00 · 2025-01-16 15:55:40 +01:00 · 2025-01-15 16:44:16 +01:00
1031 changed files with 58235 additions and 66072 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -8,14 +8,8 @@ root = true
 end_of_line = lf
 insert_final_newline = true

-# 4 space indentation for rust and toml
-[*.{rs,toml}]
+# 4 space indentation
+[*.rs]
 charset = utf-8
 indent_style = space
 indent_size = 4
-
-# 2 for c and js
-[*.{js,json,c,h}]
-charset = utf-8
-indent_style = space
-indent_size = 2
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@@ -5,7 +5,6 @@ self-hosted-runner:
    - 4090-desktop
    - large_windows_16_latest
    - large_ubuntu_16
-    - large_ubuntu_16-22.04
 # Configuration variables in array of strings defined in your repository or
 # organization. `null` means disabling configuration variables check.
 # Empty array means no configuration variable is allowed.
--- a/.github/actions/gpu_setup/action.yml
+++ b/.github/actions/gpu_setup/action.yml
@@ -1,63 +0,0 @@
-name: Setup Cuda
-description: Setup Cuda on Hyperstack or GitHub instance
-
-inputs:
-  cuda-version:
-    description: Version of Cuda to use
-    required: true
-  gcc-version:
-    description: Version of GCC to use
-    required: true
-  cmake-version:
-    description: Version of cmake to use
-    default: 3.29.6
-  github-instance:
-    description: Instance is hosted on GitHub
-    default: 'false'
-
-runs:
-  using: "composite"
-  steps:
-    # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-    - name: Install dependencies
-      shell: bash
-      run: |
-        sudo apt update
-        curl -fsSL https://apt.kitware.com/keys/kitware-archive-latest.asc | sudo gpg --dearmour -o /etc/apt/trusted.gpg.d/kitware.gpg
-        sudo chmod 644 /etc/apt/trusted.gpg.d/kitware.gpg
-        echo 'deb [signed-by=/etc/apt/trusted.gpg.d/kitware.gpg] https://apt.kitware.com/ubuntu/ jammy main' | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null
-        sudo apt update
-        sudo apt install -y cmake cmake-format libclang-dev
-
-    - name: Install CUDA
-      if: inputs.github-instance == 'true'
-      shell: bash
-      run: |
-        TOOLKIT_VERSION="$(echo ${{ inputs.cuda-version }} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
-        wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-        sudo dpkg -i cuda-keyring_1.1-1_all.deb
-        sudo apt update
-        sudo apt -y install cuda-toolkit-${TOOLKIT_VERSION}
-
-    - name: Export CUDA variables
-      shell: bash
-      run: |
-        CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
-        echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-        echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-        echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-        echo "CUDA_MODULE_LOADER=EAGER" >> "${GITHUB_ENV}"
-
-    # Specify the correct host compilers
-    - name: Export gcc and g++ variables
-      shell: bash
-      run: |
-        {
-          echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
-          echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
-          echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
-        } >> "${GITHUB_ENV}"
-
-    - name: Check device is detected
-      shell: bash
-      run: nvidia-smi
--- a/.github/actions/hyperstack_setup/action.yml
+++ b/.github/actions/hyperstack_setup/action.yml
@@ -0,0 +1,53 @@
+name: Setup Cuda
+description: Setup Cuda on Hyperstack instance
+
+inputs:
+  cuda-version:
+    description: Version of Cuda to use
+    required: true
+  gcc-version:
+    description: Version of GCC to use
+    required: true
+  cmake-version:
+    description: Version of cmake to use
+    default: 3.29.6
+
+runs:
+  using: "composite"
+  steps:
+    # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+    - name: Install dependencies
+      shell: bash
+      run: |
+        sudo apt update
+        sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+        wget https://github.com/Kitware/CMake/releases/download/v${{ inputs.cmake-version }}/cmake-${{ inputs.cmake-version }}.tar.gz
+        tar -zxvf cmake-${{ inputs.cmake-version }}.tar.gz
+        cd cmake-${{ inputs.cmake-version }}
+        ./bootstrap
+        make -j"$(nproc)"
+        sudo make install
+
+    - name: Export CUDA variables
+      shell: bash
+      run: |
+        CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
+        echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+        echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+        echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+        echo "CUDACXX=/usr/local/cuda-${{ inputs.cuda-version }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+    # Specify the correct host compilers
+    - name: Export gcc and g++ variables
+      shell: bash
+      run: |
+        {
+          echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
+          echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
+          echo "HOME=/home/ubuntu";
+        } >> "${GITHUB_ENV}"
+
+    - name: Check device is detected
+      shell: bash
+      run: nvidia-smi
--- a/.github/workflows/aws_tfhe_backward_compat_tests.yml
+++ b/.github/workflows/aws_tfhe_backward_compat_tests.yml
@@ -11,10 +11,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -26,11 +22,10 @@ jobs:
    name: Setup instance (backward-compat-tests)
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -40,18 +35,11 @@ jobs:
          backend: aws
          profile: cpu-small

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  backward-compat-tests:
    name: Backward compatibility tests
    needs: [ setup-instance ]
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -59,17 +47,21 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
          toolchain: stable

+      - name: Install git-lfs
+        run: |
+          sudo apt update && sudo apt -y install git-lfs
+
      - name: Use specific data branch
        if: ${{ contains(github.event.pull_request.labels.*.name, 'data_PR') }}
        env:
-          PR_BRANCH: ${{ github.ref_name }}
+          PR_BRANCH: ${{ github.head_ref || github.ref_name }}
        run: |
          echo "BACKWARD_COMPAT_DATA_BRANCH=${PR_BRANCH}" >> "${GITHUB_ENV}"

@@ -84,7 +76,7 @@ jobs:
        with:
          persist-credentials: 'false'
          repository: zama-ai/tfhe-backward-compat-data
-          path: tests/tfhe-backward-compat-data
+          path: tfhe/tfhe-backward-compat-data
          lfs: 'true'
          ref: ${{ steps.backward_compat_branch.outputs.branch }}

@@ -98,17 +90,16 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (backward-compat-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, backward-compat-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -123,4 +114,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -11,22 +11,19 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"
+  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request_target' }}
+  REF: ${{ github.event.pull_request.head.sha || github.sha }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
-  pull_request:
+  pull_request_target:

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
      zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
@@ -58,13 +55,14 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          ref: ${{ env.REF }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            dependencies:
              - tfhe/Cargo.toml
@@ -107,7 +105,7 @@ jobs:
            user_docs:
              - tfhe/src/**
              - '!tfhe/src/c_api/**'
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - README.md

      - name: Aggregate file changes
@@ -126,18 +124,23 @@ jobs:
        run: |
          echo "any_changed=true" >> "$GITHUB_OUTPUT"

+  check-user-permission:
+    needs: should-run
+    uses: ./.github/workflows/check_triggering_actor.yml
+    secrets:
+      TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
  setup-instance:
    name: Setup instance (fast-tests)
-    if: github.event_name == 'workflow_dispatch' ||
-      (github.event_name != 'workflow_dispatch' && needs.should-run.outputs.any_file_changed == 'true')
-    needs: should-run
+    if: github.event_name != 'pull_request_target' ||
+      needs.should-run.outputs.any_file_changed == 'true'
+    needs: [ should-run, check-user-permission ]
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -147,18 +150,13 @@ jobs:
          backend: aws
          profile: cpu-big

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  fast-tests:
    name: Fast CPU tests
+    if: github.event_name != 'pull_request_target' ||
+      (github.event_name == 'pull_request_target' && needs.setup-instance.result != 'skipped')
    needs: [ should-run, setup-instance ]
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -166,7 +164,8 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          ref: ${{ env.REF }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -209,7 +208,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@d4323d4df104b026a6aa633fdb11d772146be0bf #v4.2.2
+        uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
        with:
          path: |
            ~/.nvm
@@ -222,7 +221,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@d4323d4df104b026a6aa633fdb11d772146be0bf #v4.2.2
+        uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -265,22 +264,21 @@ jobs:
          make test_zk

      - name: Slack Notification
-        if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
+        if: ${{ failure() }}
        continue-on-error: true
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (fast-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, fast-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -295,4 +293,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -14,16 +14,12 @@ env:
  # nextest
  TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
  NO_BIG_PARAMS: FALSE
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [labeled]
  push:
    branches:
      - main
@@ -32,11 +28,12 @@ jobs:
  should-run:
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
      (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      integer_test: ${{ github.event_name == 'workflow_dispatch' ||
        steps.changed-files.outputs.integer_any_changed }}
@@ -45,13 +42,14 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: "false"

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            integer:
              - tfhe/Cargo.toml
@@ -69,15 +67,14 @@ jobs:
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
-      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
+      (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -87,18 +84,11 @@ jobs:
          backend: aws
          profile: cpu-big

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  unsigned-integer-tests:
    name: Unsigned integer tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -106,7 +96,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: "false"
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -140,17 +130,16 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (unsigned-integer-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [setup-instance, unsigned-integer-tests]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -165,4 +154,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -14,16 +14,12 @@ env:
  # nextest
  TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
  NO_BIG_PARAMS: FALSE
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [labeled]
  push:
    branches:
      - main
@@ -37,7 +33,7 @@ jobs:
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      integer_test: ${{ github.event_name == 'workflow_dispatch' ||
        steps.changed-files.outputs.integer_any_changed }}
@@ -46,13 +42,14 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          persist-credentials: "false"

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            integer:
              - tfhe/Cargo.toml
@@ -70,15 +67,14 @@ jobs:
    if:
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.integer_test == 'true') ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
-      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.integer_test == 'true') ||
+      (github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -88,18 +84,11 @@ jobs:
          backend: aws
          profile: cpu-big

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  signed-integer-tests:
    name: Signed integer tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -107,7 +96,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: "false"
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -145,17 +134,16 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (signed-integer-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [setup-instance, signed-integer-tests]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -170,4 +158,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -11,10 +11,6 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_64-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -31,7 +27,7 @@ jobs:
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
      zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
@@ -67,13 +63,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            dependencies:
              - tfhe/Cargo.toml
@@ -115,7 +111,7 @@ jobs:
            user_docs:
              - tfhe/src/**
              - '!tfhe/src/c_api/**'
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - README.md

      - name: Aggregate file changes
@@ -142,11 +138,10 @@ jobs:
    needs: should-run
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -156,20 +151,13 @@ jobs:
          backend: aws
          profile: cpu-big

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cpu-tests:
    name: CPU tests
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    needs: [ should-run, setup-instance ]
    concurrency:
-      group: ${{ github.workflow_ref }}_${{github.event_name}}
+      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -177,7 +165,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -252,17 +240,16 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cpu-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cpu-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -277,4 +264,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -10,10 +10,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -27,11 +23,10 @@ jobs:
    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -41,18 +36,11 @@ jobs:
          backend: aws
          profile: cpu-small

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  wasm-tests:
    name: WASM tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -60,7 +48,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -73,7 +61,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@d4323d4df104b026a6aa633fdb11d772146be0bf #v4.2.2
+        uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
        with:
          path: |
            ~/.nvm
@@ -86,7 +74,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@d4323d4df104b026a6aa633fdb11d772146be0bf #v4.2.2
+        uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -121,17 +109,16 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (wasm-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, wasm-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -146,4 +133,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_boolean.yml
+++ b/.github/workflows/benchmark_boolean.yml
@@ -43,7 +43,7 @@ jobs:
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    continue-on-error: true
    steps:
@@ -51,8 +51,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -94,7 +93,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_boolean
          path: ${{ env.RESULTS_FILENAME }}
@@ -104,8 +103,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -123,7 +121,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (boolean-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, boolean-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_core_crypto.yml
+++ b/.github/workflows/benchmark_core_crypto.yml
@@ -3,9 +3,6 @@ name: Core crypto benchmarks

 on:
  workflow_dispatch:
-  schedule:
-    # Weekly benchmarks will be triggered each Saturday at 5a.m.
-    - cron: '0 5 * * 6'

 env:
  CARGO_TERM_COLOR: always
@@ -43,15 +40,14 @@ jobs:
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    steps:
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -68,7 +64,6 @@ jobs:

      - name: Run benchmarks with AVX512
        run: |
-          make bench_ks_pbs
          make bench_pbs
          make bench_pbs128
          make bench_ks
@@ -86,7 +81,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
@@ -96,8 +91,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -115,7 +109,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (core-crypto-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, core-crypto-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_erc20.yml
+++ b/.github/workflows/benchmark_erc20.yml
@@ -43,7 +43,7 @@ jobs:
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    continue-on-error: true
    timeout-minutes: 720  # 12 hours
@@ -52,8 +52,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -73,8 +72,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run benchmarks
        run: |
@@ -99,7 +97,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_erc20
          path: ${{ env.RESULTS_FILENAME }}
@@ -120,7 +118,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (erc20-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, erc20-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_gpu_4090.yml
+++ b/.github/workflows/benchmark_gpu_4090.yml
@@ -17,7 +17,7 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [labeled]
  schedule:
    # Weekly benchmarks will be triggered each Friday at 9p.m.
    - cron: "0 21 * * 5"
@@ -29,17 +29,20 @@ jobs:
      github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs' ||
      contains(github.event.label.name, '4090_bench') }}
    concurrency:
-      group: ${{ github.workflow_ref }}_cuda_integer_bench
+      group: ${{ github.workflow }}_${{ github.ref }}_cuda_integer_bench
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ["self-hosted", "4090-desktop"]
    timeout-minutes: 1440 # 24 hours
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -60,8 +63,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run integer benchmarks
        run: |
@@ -80,7 +82,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_integer_multi_bit_gpu_default
          path: ${{ env.RESULTS_FILENAME }}
@@ -97,14 +99,14 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  cuda-core-crypto-benchmarks:
    name: Cuda core crypto benchmarks  (RTX 4090)
    if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || contains(github.event.label.name, '4090_bench') }}
    needs: cuda-integer-benchmarks
    concurrency:
-      group: ${{ github.workflow_ref }}_cuda_core_crypto_bench
+      group: ${{ github.workflow }}_${{ github.ref }}_cuda_core_crypto_bench
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ["self-hosted", "4090-desktop"]
    timeout-minutes: 1440 # 24 hours
@@ -114,8 +116,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -135,8 +136,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run core crypto benchmarks
        run: |
@@ -157,7 +157,7 @@ jobs:
      

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
@@ -182,7 +182,7 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  remove_github_label:
    name: Remove 4090 bench label
--- a/.github/workflows/benchmark_gpu_core_crypto.yml
+++ b/.github/workflows/benchmark_gpu_core_crypto.yml
@@ -23,16 +23,10 @@ jobs:
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        continue-on-error: true
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -42,13 +36,6 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
  cuda-core-crypto-benchmarks:
    name: Execute GPU core crypto benchmarks
    needs: setup-instance
@@ -66,12 +53,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
@@ -84,6 +69,11 @@ jobs:
            echo "COMMIT_HASH=$(git describe --tags --dirty)";
          } >> "${GITHUB_ENV}"

+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
      - name: Install rust
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
@@ -91,7 +81,6 @@ jobs:

      - name: Run benchmarks with AVX512
        run: |
-          make bench_ks_pbs_gpu
          make bench_pbs_gpu
          make bench_ks_gpu

@@ -109,7 +98,7 @@ jobs:
          --walk-subdirs

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
@@ -119,8 +108,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -143,7 +131,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-integer-full-benchmarks)
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-core-crypto-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_gpu_erc20_common.yml
+++ b/.github/workflows/benchmark_gpu_erc20_common.yml
@@ -14,7 +14,7 @@ on:
        type: string
        required: true
    secrets:
-      REPO_CHECKOUT_TOKEN:
+      FHE_ACTIONS_TOKEN:
        required: true
      SLAB_ACTION_TOKEN:
        required: true
@@ -50,16 +50,10 @@ jobs:
    if:  github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        continue-on-error: true
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -69,15 +63,6 @@ jobs:
          backend: ${{ inputs.backend }}
          profile: ${{ inputs.profile }}

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' &&
-          steps.start-remote-instance.outcome == 'failure' &&
-          inputs.profile == 'single-h100'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
  cuda-erc20-benchmarks:
    name: Cuda ERC20 benchmarks (${{ inputs.profile }})
    needs: setup-instance
@@ -95,12 +80,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
@@ -113,6 +96,11 @@ jobs:
            echo "COMMIT_HASH=$(git describe --tags --dirty)";
          } >> "${GITHUB_ENV}"

+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
      - name: Install rust
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
@@ -136,7 +124,7 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_erc20_${{ inputs.profile }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -146,8 +134,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -170,7 +157,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-erc20-${{ inputs.profile }}-benchmarks)
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_gpu_integer_common.yml
+++ b/.github/workflows/benchmark_gpu_integer_common.yml
@@ -26,7 +26,7 @@ on:
        type: boolean
        default: false
    secrets:
-      REPO_CHECKOUT_TOKEN:
+      FHE_ACTIONS_TOKEN:
        required: true
      SLAB_ACTION_TOKEN:
        required: true
@@ -114,16 +114,10 @@ jobs:
    needs: prepare-matrix
    runs-on: ubuntu-latest
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        continue-on-error: true
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -133,15 +127,6 @@ jobs:
          backend: ${{ inputs.backend }}
          profile: ${{ inputs.profile }}

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' &&
-          steps.start-remote-instance.outcome == 'failure' &&
-          inputs.profile == 'single-h100'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
  cuda-benchmarks:
    name: Cuda benchmarks (${{ inputs.profile }})
    needs: [ prepare-matrix, setup-instance ]
@@ -165,12 +150,10 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
@@ -183,6 +166,11 @@ jobs:
            echo "COMMIT_HASH=$(git describe --tags --dirty)";
          } >> "${GITHUB_ENV}"

+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
      - name: Install rust
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
@@ -212,7 +200,7 @@ jobs:
          --bench-type ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -222,8 +210,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -246,7 +233,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cuda-${{ inputs.profile }}-benchmarks)
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_integer.yml
+++ b/.github/workflows/benchmark_integer.yml
@@ -104,7 +104,7 @@ jobs:
    needs: [ prepare-matrix, setup-instance ]
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    continue-on-error: true
    timeout-minutes: 1440  # 24 hours
@@ -119,8 +119,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -140,8 +139,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Should run benchmarks with all precisions
        if: inputs.all_precisions
@@ -172,7 +170,7 @@ jobs:
          --bench-type ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -193,7 +191,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (integer-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, integer-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_shortint.yml
+++ b/.github/workflows/benchmark_shortint.yml
@@ -70,7 +70,7 @@ jobs:
    needs: [ prepare-matrix, setup-instance ]
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    continue-on-error: true
    strategy:
@@ -82,8 +82,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -103,8 +102,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run benchmarks with AVX512
        run: |
@@ -138,7 +136,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -159,7 +157,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (shortint-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, shortint-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_signed_integer.yml
+++ b/.github/workflows/benchmark_signed_integer.yml
@@ -104,7 +104,7 @@ jobs:
    needs: [ prepare-matrix, setup-instance ]
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    continue-on-error: true
    timeout-minutes: 1440  # 24 hours
@@ -119,8 +119,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -140,8 +139,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Should run benchmarks with all precisions
        if: inputs.all_precisions
@@ -166,7 +164,7 @@ jobs:
          --bench-type ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -187,7 +185,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (integer-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, signed-integer-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_tfhe_fft.yml
+++ b/.github/workflows/benchmark_tfhe_fft.yml
@@ -45,7 +45,7 @@ jobs:
    name: Execute FFT benchmarks in EC2
    needs: setup-ec2
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
    steps:
@@ -84,7 +84,7 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_fft
          path: ${{ env.RESULTS_FILENAME }}
@@ -94,8 +94,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
--- a/.github/workflows/benchmark_tfhe_ntt.yml
+++ b/.github/workflows/benchmark_tfhe_ntt.yml
@@ -45,7 +45,7 @@ jobs:
    name: Execute NTT benchmarks in EC2
    needs: setup-ec2
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
    steps:
@@ -84,7 +84,7 @@ jobs:
          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_ntt
          path: ${{ env.RESULTS_FILENAME }}
@@ -94,8 +94,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
--- a/.github/workflows/benchmark_tfhe_zk_pok.yml
+++ b/.github/workflows/benchmark_tfhe_zk_pok.yml
@@ -3,14 +3,6 @@ name: tfhe-zk-pok benchmarks

 on:
  workflow_dispatch:
-    inputs:
-      bench_type:
-        description: "Benchmarks type"
-        type: choice
-        default: latency
-        options:
-          - latency
-          - throughput
  push:
    branches:
      - main
@@ -28,7 +20,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  BENCH_TYPE: ${{ inputs.bench_type || 'latency' }}

 jobs:
  should-run:
@@ -45,8 +36,9 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            zk_pok:
              - tfhe-zk-pok/**
@@ -80,7 +72,7 @@ jobs:
    if: needs.setup-instance.result != 'skipped'
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow_ref }}_${{github.event_name}}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
+      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -88,8 +80,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -109,12 +100,11 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run benchmarks
        run: |
-          make BENCH_TYPE=${{ env.BENCH_TYPE }} bench_tfhe_zk_pok
+          make bench_tfhe_zk_pok

      - name: Parse results
        run: |
@@ -128,11 +118,10 @@ jobs:
          --commit-date "${{ env.COMMIT_DATE }}" \
          --bench-date "${{ env.BENCH_DATE }}" \
          --walk-subdirs \
-          --name-suffix avx512 \
-          --bench-type ${{ env.BENCH_TYPE }}
+          --name-suffix avx512

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_tfhe_zk_pok
          path: ${{ env.RESULTS_FILENAME }}
@@ -142,8 +131,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -161,7 +149,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (tfhe-zk-pok-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, tfhe-zk-pok-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_wasm_client.yml
+++ b/.github/workflows/benchmark_wasm_client.yml
@@ -28,7 +28,7 @@ jobs:
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
    steps:
@@ -36,13 +36,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            wasm_bench:
              - tfhe/Cargo.toml
@@ -88,8 +88,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -110,7 +109,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@d4323d4df104b026a6aa633fdb11d772146be0bf #v4.2.2
+        uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
        with:
          path: |
            ~/.nvm
@@ -123,7 +122,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@d4323d4df104b026a6aa633fdb11d772146be0bf #v4.2.2
+        uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -167,7 +166,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_wasm_${{ matrix.browser }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -177,8 +176,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -196,7 +194,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (wasm-client-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, wasm-client-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/benchmark_zk_pke.yml
+++ b/.github/workflows/benchmark_zk_pke.yml
@@ -43,13 +43,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            zk_pok:
              - tfhe/Cargo.toml
@@ -118,7 +118,7 @@ jobs:
    if: needs.setup-instance.result != 'skipped'
    needs: [ prepare-matrix, setup-instance ]
    concurrency:
-      group: ${{ github.workflow_ref }}_${{github.event_name}}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
+      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -130,8 +130,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Get benchmark details
        run: |
@@ -151,8 +150,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Run benchmarks with AVX512
        run: |
@@ -179,7 +177,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1
+        uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
        with:
          name: ${{ github.sha }}_integer_zk
          path: ${{ env.RESULTS_FILENAME }}
@@ -189,8 +187,7 @@ jobs:
        with:
          repository: zama-ai/slab
          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -208,7 +205,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (pke-zk-benchmarks)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, pke-zk-benchmarks ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/cargo_build_tfhe_ntt.yml
+++ b/.github/workflows/cargo_build_tfhe_ntt.yml
@@ -12,7 +12,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  cargo-builds-ntt:
+  cargo-builds:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
--- a/.github/workflows/cargo_test_fft.yml
+++ b/.github/workflows/cargo_test_fft.yml
@@ -3,46 +3,16 @@ name: Cargo Test tfhe-fft

 on:
  pull_request:
-  push:
-    branches:
-      - main

 env:
  CARGO_TERM_COLOR: always
-  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}

 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref }}
  cancel-in-progress: true

 jobs:
-  should-run:
-    runs-on: ubuntu-latest
-    permissions:
-      pull-requests: read
-    outputs:
-      fft_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.fft_any_changed }}
-    steps:
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          fetch-depth: 0
-          persist-credentials: 'false'
-
-      - name: Check for file changes
-        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
-        with:
-          files_yaml: |
-            fft:
-              - tfhe/Cargo.toml
-              - Makefile
-              - tfhe-fft/**
-              - '.github/workflows/cargo_test_fft.yml'
-
-  cargo-tests-fft:
-    needs: should-run
-    if: needs.should-run.outputs.fft_test == 'true'
+  cargo-tests:
    runs-on: ${{ matrix.runner_type }}
    strategy:
      matrix:
@@ -68,9 +38,7 @@ jobs:
        run: |
          make test_fft_no_std

-  cargo-tests-fft-nightly:
-    needs: should-run
-    if: needs.should-run.outputs.fft_test == 'true'
+  cargo-tests-nightly:
    runs-on: ${{ matrix.runner_type }}
    strategy:
      matrix:
@@ -92,10 +60,8 @@ jobs:
        run: |
          make test_fft_no_std_nightly

-  cargo-tests-fft-node-js:
-    needs: should-run
-    if: needs.should-run.outputs.fft_test == 'true'
-    runs-on: ubuntu-latest
+  cargo-tests-node-js:
+    runs-on: "ubuntu-latest"
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

@@ -103,30 +69,3 @@ jobs:
        run: |
          make install_node
          make test_fft_node_js_ci
-
-  cargo-tests-fft-successful:
-    needs: [ should-run, cargo-tests-fft, cargo-tests-fft-nightly, cargo-tests-fft-node-js ]
-    if: ${{ always() }}
-    runs-on: ubuntu-latest
-    steps:
-      - name: Tests do not need to run
-        if: needs.should-run.outputs.fft_test == 'false'
-        run: |
-          echo "tfhe-fft files haven't changed tests don't need to run"
-
-      - name: Check all tests passed
-        if: needs.should-run.outputs.fft_test == 'true' &&
-          needs.cargo-tests-fft.result == 'success' &&
-          needs.cargo-tests-fft-nightly.result == 'success' &&
-          needs.cargo-tests-fft-node-js.result == 'success'
-        run: |
-          echo "All tfhe-fft test passed"
-
-      - name: Check tests failure
-        if: needs.should-run.outputs.fft_test == 'true' &&
-          (needs.cargo-tests-fft.result != 'success' ||
-          needs.cargo-tests-fft-nightly.result != 'success' ||
-          needs.cargo-tests-fft-node-js.result != 'success')
-        run: |
-          echo "Some tfhe-fft tests failed"
-          exit 1
--- a/.github/workflows/cargo_test_ntt.yml
+++ b/.github/workflows/cargo_test_ntt.yml
@@ -3,46 +3,16 @@ name: Cargo Test tfhe-ntt

 on:
  pull_request:
-  push:
-    branches:
-      - main

 env:
  CARGO_TERM_COLOR: always
-  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}

 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref }}
  cancel-in-progress: true

-jobs:  
-  should-run:
-    runs-on: ubuntu-latest
-    permissions:
-      pull-requests: read
-    outputs:
-      ntt_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ntt_any_changed }}
-    steps:
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          fetch-depth: 0
-          persist-credentials: 'false'
-
-      - name: Check for file changes
-        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
-        with:
-          files_yaml: |
-            ntt:
-              - tfhe/Cargo.toml
-              - Makefile
-              - tfhe-ntt/**
-              - '.github/workflows/cargo_test_ntt.yml'
-
-  cargo-tests-ntt:
-    needs: should-run
-    if: needs.should-run.outputs.ntt_test == 'true'
+jobs:
+  cargo-tests:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@@ -63,9 +33,7 @@ jobs:
      - name: Test no-std
        run: make test_ntt_no_std

-  cargo-tests-ntt-nightly:
-    needs: should-run
-    if: needs.should-run.outputs.ntt_test == 'true'
+  cargo-tests-nightly:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@@ -84,28 +52,3 @@ jobs:

      - name: Test no-std nightly
        run: make test_ntt_no_std_nightly
-
-  cargo-tests-ntt-successful:
-    needs: [ should-run, cargo-tests-ntt, cargo-tests-ntt-nightly ]
-    if: ${{ always() }}
-    runs-on: ubuntu-latest
-    steps:
-      - name: Tests do not need to run
-        if: needs.should-run.outputs.ntt_test == 'false'
-        run: |
-          echo "tfhe-ntt files haven't changed tests don't need to run"
-
-      - name: Check all tests success
-        if: needs.should-run.outputs.ntt_test == 'true' &&
-          needs.cargo-tests-ntt.result == 'success' &&
-          needs.cargo-tests-ntt-nightly.result == 'success'
-        run: |
-          echo "All tfhe-ntt tests passed"
-
-      - name: Check tests failure
-        if: needs.should-run.outputs.ntt_test == 'true' &&
-          (needs.cargo-tests-ntt.result != 'success' ||
-          needs.cargo-tests-ntt-nightly.result != 'success')
-        run: |
-          echo "Some tfhe-ntt tests failed"
-          exit 1
--- a/.github/workflows/check_commit.yml
+++ b/.github/workflows/check_commit.yml
@@ -2,7 +2,6 @@
 name: Check commit and PR compliance
 on:
  pull_request:
-
 jobs:
  check-commit-pr:
    name: Check commit and PR
--- a/.github/workflows/check_triggering_actor.yml
+++ b/.github/workflows/check_triggering_actor.yml
@@ -0,0 +1,29 @@
+# Check if triggering actor is a collaborator and has write access
+name: Check Triggering Actor
+
+on:
+  workflow_call:
+    secrets:
+      TOKEN:
+        required: true
+
+jobs:
+  check-actor-permission:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Get User Permission
+        id: check-access
+        uses: actions-cool/check-user-permission@956b2e73cdfe3bcb819bb7225e490cb3b18fd76e # v2.2.1
+        with:
+          require: write
+          username: ${{ github.triggering_actor }}
+        env:
+          GITHUB_TOKEN: ${{ secrets.TOKEN }}
+
+      - name: Check User Permission
+        if: steps.check-access.outputs.require-result == 'false'
+        run: |
+          echo "${{ github.triggering_actor }} does not have permissions on this repo."
+          echo "Current permission level is ${{ steps.check-access.outputs.user-permission }}"
+          echo "Job originally triggered by ${{ github.actor }}"
+          exit 1
--- a/.github/workflows/ci_lint.yml
+++ b/.github/workflows/ci_lint.yml
@@ -6,7 +6,6 @@ on:

 env:
  ACTIONLINT_VERSION: 1.6.27
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}

 jobs:
  lint-check:
@@ -15,9 +14,6 @@ jobs:
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Get actionlint
        run: |
@@ -31,8 +27,7 @@ jobs:
          make lint_workflow

      - name: Ensure SHA pinned actions
-        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@25ed13d0628a1601b4b44048e63cc4328ed03633 # v3.0.22
+        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@6ae615f6475d2ede5ad88bea6baa7a1d5e93ffaa # v3.0.19
        with:
          allowlist: |
            slsa-framework/slsa-github-generator
-            ./
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -38,7 +38,7 @@ jobs:
    name: Code coverage tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow_ref }}_${{ github.event_name }}
+      group: ${{ github.workflow }}_${{ github.event_name }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    timeout-minutes: 5760 # 4 days
@@ -53,7 +53,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
          files_yaml: |
            tfhe:
@@ -83,7 +83,7 @@ jobs:
          make test_shortint_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@0565863a31f2c772f9f0395002a31e3f06189574
+        uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -97,7 +97,7 @@ jobs:
          make test_integer_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@0565863a31f2c772f9f0395002a31e3f06189574
+        uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -115,7 +115,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (code-coverage)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, code-coverage ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/csprng_randomness_tests.yml
+++ b/.github/workflows/csprng_randomness_tests.yml
@@ -10,10 +10,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -27,11 +23,10 @@ jobs:
    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -41,18 +36,11 @@ jobs:
          backend: aws
          profile: cpu-small

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  csprng-randomness-tests:
    name: CSPRNG randomness tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
@@ -60,7 +48,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -77,17 +65,16 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (csprng-randomness-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, csprng-randomness-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -102,4 +89,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/data_pr_close.yml
+++ b/.github/workflows/data_pr_close.yml
@@ -8,7 +8,7 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  PR_BRANCH: ${{ github.ref_name }}
+  PR_BRANCH: ${{ github.head_ref || github.ref_name }}
  CLOSE_TYPE: ${{ github.event.pull_request.merged && 'merge' || 'close' }}

 # only trigger on pull request closed events
--- a/.github/workflows/gpu_4090_tests.yml
+++ b/.github/workflows/gpu_4090_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an RTX 4090 machine
-name: Cuda - 4090 full tests
+name: TFHE Cuda Backend - 4090 full tests

 env:
  CARGO_TERM_COLOR: always
@@ -11,7 +11,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -29,17 +28,16 @@ jobs:
      contains(github.event.label.name, '4090_test') ||
      (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs')
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ["self-hosted", "4090-desktop"]
-    timeout-minutes: 1440 # 24 hours

    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -82,4 +80,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_fast_h100_tests.yml
+++ b/.github/workflows/gpu_fast_h100_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
-name: Cuda - Fast tests on H100
+name: TFHE Cuda Backend - Fast tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -12,22 +12,18 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+      types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -35,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -55,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_fast_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -68,17 +64,10 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        continue-on-error: true
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -88,27 +77,13 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA H100 tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -124,15 +99,17 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -165,21 +142,19 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -194,4 +169,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_fast_tests.yml
+++ b/.github/workflows/gpu_fast_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an AWS instance
-name: Cuda - Fast tests
+name: TFHE Cuda Backend - Fast tests

 env:
  CARGO_TERM_COLOR: always
@@ -12,10 +12,6 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -26,7 +22,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -34,13 +30,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -54,7 +50,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_fast_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -62,15 +58,14 @@ jobs:
  setup-instance:
    name: Setup instance (cuda-tests)
    needs: should-run
-    if: github.event_name == 'workflow_dispatch' ||
+    if: github.event_name != 'pull_request' ||
      needs.should-run.outputs.gpu_test == 'true'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -80,20 +75,13 @@ jobs:
          backend: hyperstack
          profile: gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -109,14 +97,17 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -149,21 +140,19 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -178,4 +167,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_full_h100_tests.yml
+++ b/.github/workflows/gpu_full_h100_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
-name: Cuda - Full tests on H100
+name: TFHE Cuda Backend - Full tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -11,6 +11,7 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}

 on:
  workflow_dispatch:
@@ -20,16 +21,10 @@ jobs:
    name: Setup instance (cuda-h100-tests)
    runs-on: ubuntu-latest
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        continue-on-error: true
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -39,18 +34,11 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA H100 tests
    needs: [ setup-instance ]
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -78,15 +66,18 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}

+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
@@ -119,11 +110,10 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
@@ -143,4 +133,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_full_multi_gpu_tests.yml
+++ b/.github/workflows/gpu_full_multi_gpu_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend on an AWS instance
-name: Cuda - Full tests multi-GPU
+name: TFHE Cuda Backend - Full tests multi-GPU

 env:
  CARGO_TERM_COLOR: always
@@ -12,10 +12,6 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -27,7 +23,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -35,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -55,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/**_multi_gpu_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -68,11 +64,10 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -82,20 +77,13 @@ jobs:
          backend: hyperstack
          profile: multi-gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA multi-GPU tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -111,14 +99,17 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -154,21 +145,19 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests-multi-gpu)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -183,4 +172,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_integer_long_run_tests.yml
+++ b/.github/workflows/gpu_integer_long_run_tests.yml
@@ -1,4 +1,4 @@
-name: Cuda - Long Run Tests on GPU
+name: Long Run Tests on GPU

 env:
  CARGO_TERM_COLOR: always
@@ -15,8 +15,8 @@ on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  schedule:
-    # Nightly tests will be triggered each evening 8p.m.
-    - cron: "0 20 * * *"
+    # Weekly tests will be triggered each Friday at 9p.m.
+    - cron: "0 21 * * 5"

 jobs:
  setup-instance:
@@ -42,7 +42,7 @@ jobs:
    name: Long run GPU tests
    needs: [ setup-instance ]
    concurrency:
-      group: ${{ github.workflow_ref }}_${{github.event_name}}
+      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -59,11 +59,15 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}

+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
@@ -88,7 +92,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (gpu-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/gpu_pcc.yml
+++ b/.github/workflows/gpu_pcc.yml
@@ -1,5 +1,5 @@
 # Perfom tfhe-cuda-backend post-commit checks on an AWS instance
-name: Cuda - Post-commit Checks
+name: TFHE Cuda Backend - Post-commit Checks

 env:
  CARGO_TERM_COLOR: always
@@ -11,10 +11,6 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16-22.04"

 on:
  pull_request:
@@ -24,11 +20,10 @@ jobs:
    name: Setup instance (cuda-pcc)
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -38,18 +33,11 @@ jobs:
          backend: aws
          profile: gpu-build

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-pcc:
    name: CUDA post-commit checks
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -68,17 +56,11 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

-      - name: Install CUDA
-        if: env.SECRETS_AVAILABLE == 'false'
-        shell: bash
+      - name: Set up home
        run: |
-          TOOLKIT_VERSION="$(echo ${{ matrix.cuda }} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
-          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
-          sudo dpkg -i cuda-keyring_1.1-1_all.deb
-          sudo apt update
-          sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -101,6 +83,7 @@ jobs:
            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
+            echo "HOME=/home/ubuntu";
          } >> "${GITHUB_ENV}"

      - name: Run fmt checks
@@ -112,22 +95,21 @@ jobs:
          make pcc_gpu

      - name: Slack Notification
-        if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
+        if: ${{ failure() }}
        continue-on-error: true
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-pcc)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-pcc ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -142,4 +124,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_signed_integer_classic_tests.yml
+++ b/.github/workflows/gpu_signed_integer_classic_tests.yml
@@ -1,5 +1,5 @@
 # Signed integer GPU tests on an RTXA6000 VM on hyperstack with classical PBS
-name: Cuda - Signed integer tests with classical PBS
+name: TFHE Cuda Backend - Signed integer tests with classical PBS

 env:
  CARGO_TERM_COLOR: always
@@ -12,22 +12,18 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+      types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -35,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -55,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_signed_integer_classic_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -68,11 +64,10 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -82,20 +77,13 @@ jobs:
          backend: hyperstack
          profile: gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA signed integer tests with classical PBS
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -109,16 +97,16 @@ jobs:
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -137,21 +125,19 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-signed-classic-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -166,4 +152,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_signed_integer_h100_tests.yml
+++ b/.github/workflows/gpu_signed_integer_h100_tests.yml
@@ -1,5 +1,5 @@
 # Signed integer GPU tests on an H100 VM on hyperstack
-name: Cuda - Signed integer tests on H100
+name: TFHE Cuda Backend - Signed integer tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -12,23 +12,18 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
-
+      types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -36,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -56,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_signed_integer_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -69,17 +64,10 @@ jobs:
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        continue-on-error: true
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -89,27 +77,13 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA H100 signed integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -123,17 +97,16 @@ jobs:
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -152,21 +125,19 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -181,4 +152,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_signed_integer_tests.yml
+++ b/.github/workflows/gpu_signed_integer_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend signed integer on an AWS instance
-name: Cuda - Signed integer tests
+name: TFHE Cuda Backend - Signed integer tests

 env:
  CARGO_TERM_COLOR: always
@@ -14,15 +14,14 @@ env:
  FAST_TESTS: TRUE
  NIGHTLY_TESTS: FALSE
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
+    types:
+      - opened
+      - synchronize
  schedule:
    # Nightly tests @ 1AM after each work day
    - cron: "0 1 * * MON-FRI"
@@ -31,7 +30,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -39,13 +38,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -59,10 +58,11 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_signed_integer_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
+
  setup-instance:
    name: Setup instance (cuda-signed-integer-tests)
    runs-on: ubuntu-latest
@@ -71,11 +71,10 @@ jobs:
      github.event_name == 'workflow_dispatch' ||
      needs.should-run.outputs.gpu_test == 'true'
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -85,20 +84,13 @@ jobs:
          backend: hyperstack
          profile: gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-signed-integer-tests:
    name: CUDA signed integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -114,14 +106,17 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -151,17 +146,16 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-signed-integer-tests.result }}
-          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-signed-integer-tests ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -176,4 +170,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_unsigned_integer_classic_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_classic_tests.yml
@@ -1,5 +1,5 @@
 # Test unsigned integers on an RTXA6000 VM on hyperstack with the classical PBS
-name: Cuda - Unsigned integer tests with classical PBS
+name: TFHE Cuda Backend - Unsigned integer tests with classical PBS

 env:
  CARGO_TERM_COLOR: always
@@ -12,23 +12,18 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
-
+      types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -36,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -56,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_unsigned_integer_classic_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -64,16 +59,15 @@ jobs:
  setup-instance:
    name: Setup instance (cuda-unsigned-classic-tests)
    needs: should-run
-    if: github.event_name == 'workflow_dispatch' ||
+    if: github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -83,20 +77,13 @@ jobs:
          backend: hyperstack
          profile: gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA unsigned integer tests with classical PBS
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -110,16 +97,16 @@ jobs:
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -138,21 +125,19 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-unsigned-classic-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -167,4 +152,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_unsigned_integer_h100_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_h100_tests.yml
@@ -1,5 +1,5 @@
 # Test unsigned integers on an H100 VM on hyperstack
-name: Cuda - Unsigned integer tests on H100
+name: TFHE Cuda Backend - Unsigned integer tests on H100

 env:
  CARGO_TERM_COLOR: always
@@ -12,22 +12,18 @@ env:
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+      types: [ labeled ]

 jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -35,13 +31,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -55,7 +51,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_unsigned_integer_h100_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -63,22 +59,15 @@ jobs:
  setup-instance:
    name: Setup instance (cuda-h100-tests)
    needs: should-run
-    if: github.event_name == 'workflow_dispatch' ||
+    if: github.event_name != 'pull_request' ||
      (github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
      (github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
    runs-on: ubuntu-latest
    outputs:
-      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
-      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
-      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
-      # otherwise we'll try to run the next job on a non-existing on-demand instance.
-      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
-      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
-        continue-on-error: true
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -88,27 +77,13 @@ jobs:
          backend: hyperstack
          profile: single-h100

-      # This will allow to fallback on permanent instances running on Hyperstack.
-      - name: Use permanent remote instance
-        id: use-permanent-instance
-        if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
-        run: |
-          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
-
-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-tests-linux:
    name: CUDA H100 unsigned integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -122,17 +97,16 @@ jobs:
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
-        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -151,21 +125,19 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
-          SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-h100-tests)
-    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-tests-linux ]
    runs-on: ubuntu-latest
    steps:
-      - name: Stop remote instance
+      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -180,4 +152,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_unsigned_integer_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_tests.yml
@@ -1,5 +1,5 @@
 # Compile and test tfhe-cuda-backend unsigned integer on an AWS instance
-name: Cuda - Unsigned integer tests
+name: TFHE Cuda Backend - Unsigned integer tests

 env:
  CARGO_TERM_COLOR: always
@@ -13,16 +13,14 @@ env:
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  FAST_TESTS: TRUE
  NIGHTLY_TESTS: FALSE
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  # Secrets will be available only to zama-ai organization members
-  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
-  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types:
+      - opened
+      - synchronize
  schedule:
    # Nightly tests @ 1AM after each work day
    - cron: "0 1 * * MON-FRI"
@@ -31,7 +29,7 @@ jobs:
  should-run:
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read
+      pull-requests: write
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
@@ -39,13 +37,13 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
+        uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
        with:
+          since_last_remote_commit: true
          files_yaml: |
            gpu:
              - tfhe/Cargo.toml
@@ -59,7 +57,7 @@ jobs:
              - tfhe/src/shortint/parameters/**
              - tfhe/src/high_level_api/**
              - tfhe/src/c_api/**
-              - 'tfhe/docs/**/**.md'
+              - 'tfhe/docs/**.md'
              - '.github/workflows/gpu_unsigned_integer_tests.yml'
              - scripts/integer-tests.sh
              - ci/slab.toml
@@ -72,11 +70,10 @@ jobs:
      needs.should-run.outputs.gpu_test == 'true'
    runs-on: ubuntu-latest
    outputs:
-      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
-      - name: Start remote instance
-        id: start-remote-instance
-        if: env.SECRETS_AVAILABLE == 'true'
+      - name: Start instance
+        id: start-instance
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
@@ -86,20 +83,13 @@ jobs:
          backend: hyperstack
          profile: gpu-test

-      # This instance will be spawned especially for pull-request from forked repository
-      - name: Start GitHub instance
-        id: start-github-instance
-        if: env.SECRETS_AVAILABLE == 'false'
-        run: |
-          echo "runner_group=${{ env.EXTERNAL_CONTRIBUTION_RUNNER }}" >> "$GITHUB_OUTPUT"
-
  cuda-unsigned-integer-tests:
    name: CUDA unsigned integer tests
    needs: [ should-run, setup-instance ]
    if: github.event_name != 'pull_request' ||
      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
    concurrency:
-      group: ${{ github.workflow_ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
@@ -113,16 +103,16 @@ jobs:
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          persist-credentials: 'false'
-          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
-        uses: ./.github/actions/gpu_setup
+        uses: ./.github/actions/hyperstack_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}
-          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -149,21 +139,19 @@ jobs:
    continue-on-error: true
    steps:
      - name: Send message
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ needs.cuda-unsigned-integer-tests.result }}
-          SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (cuda-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cuda-unsigned-integer-tests ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
-        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
@@ -178,4 +166,4 @@ jobs:
        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/integer_long_run_tests.yml
+++ b/.github/workflows/integer_long_run_tests.yml
@@ -42,7 +42,7 @@ jobs:
    name: Long run CPU tests
    needs: [ setup-instance ]
    concurrency:
-      group: ${{ github.workflow_ref }}_${{github.event_name}}
+      group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    timeout-minutes: 4320 # 72 hours
@@ -51,7 +51,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -72,7 +72,7 @@ jobs:

  teardown-instance:
    name: Teardown instance (cpu-tests)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
    needs: [ setup-instance, cpu-tests ]
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/m1_tests.yml
+++ b/.github/workflows/m1_tests.yml
@@ -3,7 +3,7 @@ name: Tests on M1 CPU
 on:
  workflow_dispatch:
  pull_request:
-    types: [ labeled ]
+    types: [labeled]
  # Have a nightly build for M1 tests
  schedule:
    # * is a special character in YAML so you have to quote this string
@@ -21,17 +21,14 @@ env:
  # We clear the cache to reduce memory pressure because of the numerous processes of cargo
  # nextest
  TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
-  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}

 concurrency:
-  group: ${{ github.workflow_ref }}
+  group: ${{ github.workflow }}-${{ github.head_ref }}
  cancel-in-progress: true

 jobs:
  cargo-builds-m1:
-    if: ${{ (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs') ||
-      github.event_name == 'workflow_dispatch' ||
-      contains(github.event.label.name, 'm1_test') }}
+    if: ${{ (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'm1_test') }}
    runs-on: ["self-hosted", "m1mac"]
    # 12 hours, default is 6 hours, hopefully this is more than enough
    timeout-minutes: 720
@@ -40,7 +37,6 @@ jobs:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: "false"
-          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -195,8 +191,6 @@ jobs:
          SLACK_COLOR: ${{ needs.cargo-builds-m1.result }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }}. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-          MSG_MINIMAL: event,action url,commit
-          BRANCH: ${{ github.ref }}
--- a/.github/workflows/make_release.yml
+++ b/.github/workflows/make_release.yml
@@ -43,15 +43,14 @@ jobs:
      hash: ${{ steps.hash.outputs.hash }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
      - name: Prepare package
        run: |
          cargo package -p tfhe
-      - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
+      - uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
        with:
          name: crate
          path: target/package/*.crate
@@ -62,7 +61,7 @@ jobs:
  provenance:
    if: ${{ !inputs.dry_run  }}
    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
+    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
    permissions:
      # Needed to detect the GitHub Actions environment
      actions: read
@@ -78,23 +77,21 @@ jobs:
    name: Publish Release
    needs: [package] # for comparing hashes
    runs-on: ubuntu-latest
-    # For provenance of npmjs publish
    permissions:
      contents: read
      id-token: write
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
      - name: Create NPM version tag
        if: ${{ inputs.npm_latest_tag }}
        run: |
          echo "NPM_TAG=latest" >> "${GITHUB_ENV}"
      - name: Download artifact
-        uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
        with:
          name: crate
          path: target/package
@@ -113,7 +110,7 @@ jobs:
      - name: Slack notification (hashes comparison)
        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: failure
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
@@ -158,7 +155,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_concrete_csprng.yml
+++ b/.github/workflows/make_release_concrete_csprng.yml
@@ -0,0 +1,49 @@
+name: Publish tfhe-csprng release
+
+on:
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: "Dry-run"
+        type: boolean
+        default: true
+
+env:
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  verify_tag:
+    uses: ./.github/workflows/verify_tagged_commit.yml
+    secrets:
+      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
+      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
+
+  publish_release:
+    name: Publish tfhe-csprng Release
+    needs: verify_tag
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+
+      - name: Publish crate.io package
+        env:
+          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
+        run: |
+          cargo publish -p tfhe-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "tfhe-csprng release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_cuda.yml
+++ b/.github/workflows/make_release_cuda.yml
@@ -1,3 +1,4 @@
+# Publish new release of tfhe-cuda-backend on crates.io.
 name: Publish CUDA release

 on:
@@ -7,6 +8,10 @@ on:
        description: "Dry-run"
        type: boolean
        default: true
+      push_to_crates:
+        description: "Push to crate"
+        type: boolean
+        default: true

 env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
@@ -40,12 +45,10 @@ jobs:
          backend: aws
          profile: gpu-build

-  package:
-    name: Package CUDA Release for provenance
+  publish-cuda-release:
+    name: Publish CUDA Release
    needs: setup-instance
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
    strategy:
      fail-fast: false
      # explicit include-based build matrix, of known valid options
@@ -58,74 +61,15 @@ jobs:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: "false"
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

-      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
-        with:
-          toolchain: stable
-
-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
+      - name: Set up home
        run: |
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-cuda-backend
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-  provenance:
-    if: ${{ !inputs.dry_run  }}
-    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
-  publish-cuda-release:
-    name: Publish CUDA Release
-    needs: [setup-instance, package] # for comparing hashes
-    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
-    strategy:
-      fail-fast: false
-      # explicit include-based build matrix, of known valid options
-      matrix:
-        include:
-          - os: ubuntu-22.04
-            cuda: "12.2"
-            gcc: 9
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-    steps:
      - name: Install latest stable
        uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
        with:
@@ -153,40 +97,25 @@ jobs:
          } >> "${GITHUB_ENV}"

      - name: Publish crate.io package
+        if: ${{ inputs.push_to_crates }}
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          cargo publish -p tfhe-cuda-backend --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-cuda-backend crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: Teardown instance (publish-release)
-    if: ${{ always() && needs.setup-instance.result == 'success' }}
-    needs: [setup-instance, publish-cuda-release]
+    if: ${{ always() && needs.setup-instance.result != 'skipped' }}
+    needs: [ setup-instance, publish-cuda-release ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
--- a/.github/workflows/make_release_tfhe_csprng.yml
+++ b/.github/workflows/make_release_tfhe_csprng.yml
@@ -1,103 +0,0 @@
-name: Publish tfhe-csprng release
-
-on:
-  workflow_dispatch:
-    inputs:
-      dry_run:
-        description: "Dry-run"
-        type: boolean
-        default: true
-
-env:
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-
-jobs:
-  verify_tag:
-    uses: ./.github/workflows/verify_tagged_commit.yml
-    secrets:
-      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
-      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
-
-  package:
-    runs-on: ubuntu-latest
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-csprng
-      - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
-        with:
-          name: crate-tfhe-csprng
-          path: target/package/*.crate
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-
-  provenance:
-    if: ${{ !inputs.dry_run  }}
-    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
-
-  publish_release:
-    name: Publish tfhe-csprng Release
-    needs: [verify_tag, package]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-      - name: Download artifact
-        uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
-        with:
-          name: crate-tfhe-csprng
-          path: target/package
-      - name: Publish crate.io package
-        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
-          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
-        run: |
-          cargo publish -p tfhe-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-csprng - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "tfhe-csprng release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_tfhe_fft.yml
+++ b/.github/workflows/make_release_tfhe_fft.yml
@@ -19,53 +19,15 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

-  package:
-    runs-on: ubuntu-latest
-    needs: verify_tag
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-fft
-      - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
-        with:
-          name: crate
-          path: target/package/*.crate
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-  provenance:
-    if: ${{ !inputs.dry_run  }}
-    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
  publish_release:
    name: Publish tfhe-fft Release
    runs-on: ubuntu-latest
-    needs: [verify_tag, package] # for comparing hashes
+    needs: verify_tag
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

      - name: Publish crate.io package
        env:
@@ -74,26 +36,10 @@ jobs:
        run: |
          cargo publish -p tfhe-fft --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-fft crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_ntt.yml
+++ b/.github/workflows/make_release_tfhe_ntt.yml
@@ -19,50 +19,13 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

-  package:
-    runs-on: ubuntu-latest
-    needs: verify_tag
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-ntt
-      - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
-        with:
-          name: crate
-          path: target/package/*.crate
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-  provenance:
-    if: ${{ !inputs.dry_run  }}
-    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
  publish_release:
    name: Publish tfhe-ntt Release
    runs-on: ubuntu-latest
-    needs: [verify_tag, package] # for comparing hashes
+    needs: verify_tag
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0

@@ -73,26 +36,10 @@ jobs:
        run: |
          cargo publish -p tfhe-ntt --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}

-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-ntt crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_versionable.yml
+++ b/.github/workflows/make_release_tfhe_versionable.yml
@@ -2,13 +2,14 @@ name: Publish tfhe-versionable release

 on:
  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: "Dry-run"
+        type: boolean
+        default: true

 env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

 jobs:
  verify_tag:
@@ -17,149 +18,39 @@ jobs:
      RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
      READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}

-  package-derive:
-    name: Package tfhe-versionable-derive Release
-    runs-on: ubuntu-latest
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-versionable-derive
-      - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
-        with:
-          name: crate-tfhe-versionable-derive
-          path: target/package/*.crate
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-  provenance-derive:
-    needs: [package-derive]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package-derive.outputs.hash }}
-
-  publish_release-derive:
-    name: Publish tfhe-versionable-derive Release
-    needs: [ verify_tag, package-derive ] # for comparing hashes
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      - name: Download artifact
-        uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
-        with:
-          name: crate-tfhe-versionable-derive
-          path: target/package
-      - name: Publish crate.io package
-        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
-        run: |
-          cargo publish -p tfhe-versionable-derive --token ${{ env.CRATES_TOKEN }}
-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package-derive.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_MESSAGE: "SLSA tfhe-versionable-derive - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "tfhe-versionable-derive release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-
-  package:
-    name: Package tfhe-versionable Release
-    needs: publish_release-derive
-    runs-on: ubuntu-latest
-    outputs:
-      hash: ${{ steps.hash.outputs.hash }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          fetch-depth: 0
-      - name: Prepare package
-        run: |
-          cargo package -p tfhe-versionable
-      - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
-        with:
-          name: crate-tfhe-versionable
-          path: target/package/*.crate
-      - name: generate hash
-        id: hash
-        run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-
-  provenance:
-    needs: package
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
  publish_release:
    name: Publish tfhe-versionable Release
-    needs: package # for comparing hashes
+    needs: verify_tag
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-      - name: Download artifact
-        uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
-        with:
-          name: crate-tfhe-versionable
-          path: target/package
-      - name: Publish crate.io package
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+
+      - name: Publish proc-macro crate
+        env:
+          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
+        run: |
+          cargo publish -p tfhe-versionable-derive --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+
+      - name: Publish main crate
+        if: ${{ ! inputs.dry_run }}
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
        run: |
          cargo publish -p tfhe-versionable --token ${{ env.CRATES_TOKEN }}
-      - name: Generate hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_MESSAGE: "SLSA tfhe-versionable - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
+
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
          SLACK_MESSAGE: "tfhe-versionable release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_zk_pok.yml
+++ b/.github/workflows/make_release_zk_pok.yml
@@ -1,3 +1,4 @@
+# Publish new release of tfhe-zk-pok on crates.io.
 name: Publish tfhe-zk-pok release

 on:
@@ -12,40 +13,6 @@ env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}

 jobs:
-  package:
-      runs-on: ubuntu-latest
-      outputs:
-        hash: ${{ steps.hash.outputs.hash }}
-      steps:
-        - name: Checkout
-          uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-          with:
-            fetch-depth: 0
-        - name: Prepare package
-          run: |
-            cargo package -p tfhe-zk-pok
-        - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
-          with:
-            name: crate-zk-pok
-            path: target/package/*.crate
-        - name: generate hash
-          id: hash
-          run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-  provenance:
-    if: ${{ !inputs.dry_run  }}
-    needs: [package]
-    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
-    permissions:
-      # Needed to detect the GitHub Actions environment
-      actions: read
-      # Needed to create the provenance via GitHub OIDC
-      id-token: write
-      # Needed to upload assets/artifacts
-      contents: write
-    with:
-      # SHA-256 hashes of the Crate package.
-      base64-subjects: ${{ needs.package.outputs.hash }}
-
  verify_tag:
    uses: ./.github/workflows/verify_tagged_commit.yml
    secrets:
@@ -54,44 +21,26 @@ jobs:

  publish_release:
    name: Publish tfhe-zk-pok Release
-    needs: [verify_tag, package] # for comparing hashes
+    needs: verify_tag
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      - name: Download artifact
-        uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
-        with:
-          name: crate-zk-pok
-          path: target/package
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+
      - name: Publish crate.io package
        env:
          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
        run: |
          cargo publish -p tfhe-zk-pok --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-      - name: Verify hash
-        id: published_hash
-        run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
-      - name: Slack notification (hashes comparison)
-        if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
-        env:
-          SLACK_COLOR: failure
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "SLSA tfhe-zk-pok crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
+        uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -14,7 +14,7 @@ on:

 jobs:
  params-curves-security-check:
-    runs-on: large_ubuntu_16-22.04
+    runs-on: large_ubuntu_16
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
--- a/.github/workflows/sync_on_push.yml
+++ b/.github/workflows/sync_on_push.yml
@@ -16,8 +16,7 @@ jobs:
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
      - name: git-sync
        uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
        with:
--- a/.gitignore
+++ b/.gitignore
@@ -32,9 +32,5 @@ web-test-runner/
 node_modules/
 package-lock.json

-# Python .env
-.env
-
 # Dir used for backward compatibility test data
-tests/tfhe-backward-compat-data/
-ci/
+tfhe/tfhe-backward-compat-data/
--- a/.linelint.yml
+++ b/.linelint.yml
@@ -1,15 +1,11 @@
 ignore:
  - .git
  - target
-  - tfhe/build
-  - venv
-  - web-test-runner
  - tfhe/benchmarks_parameters
  - tfhe/web_wasm_parallel_tests/node_modules
  - tfhe/web_wasm_parallel_tests/dist
  - keys
  - coverage
-  - utils/tfhe-lints/ui/main.stderr

 rules:
  # checks if file ends in a newline character
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,233 +0,0 @@
-# Contributing to TFHE-rs
-
-This document provides guidance on how to contribute to **TFHE-rs**.
-
-There are two ways to contribute:
-
- **Report issues:** Open issues on GitHub to report bugs, suggest improvements, or note typos.
- **Submit codes**: To become an official contributor, you must sign our Contributor License Agreement (CLA). Our CLA-bot will guide you through this process when you open your first pull request.
-
-## 1. Setting up the project
-
-Start by [forking](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) the **TFHE-rs** repository.
-
-{% hint style="info" %}
- **Rust version**:  Ensure that you use a Rust version >= 1.81 to compile **TFHE-rs**.
- **Incompatibility**: AArch64-based machines are not yet supported for Windows as it's currently missing an entropy source to be able to seed the [CSPRNGs](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) used in **TFHE-rs**.
- **Performance**: For optimal performance, it is highly recommended to run **TFHE-rs** code in release mode with cargo's `--release` flag.
-{% endhint %}
-
-To get more details about the library, please refer to the [documentation](https://docs.zama.ai/tfhe-rs).
-
-## 2. Creating a new branch
-
-When creating your branch, make sure to use the following format :
-
-```
-git checkout -b {feat|fix|docs|chore…}/short_description
-```
-
-For example:
-
-```
-git checkout -b feat/new_feature_X
-```
-
-## 3. Before committing
-
-### 3.1 Linting
-
-Each commit to **TFHE-rs** should conform to the standards of the project. In particular, every source code, docker or workflows files should be linted to prevent programmatic and stylistic errors.
-
- Rust source code linters: `clippy`
- Typescript/Javascript source code linters: `eslint`, `prettier`
-
-To apply automatic code formatting, run:
-
-```
-make fmt
-```
-
-You can perform linting of all Cargo targets with:
-
-```
-make clippy_all_targets
-```
-
-### 3.2 Testing
-
-Your contributions must include comprehensive documentation and tests without breaking existing tests. To run pre-commit checks, execute:
-
-```
-make pcc
-```
-
-This command ensure that all the targets in the library are building correctly.
-For a faster check, use:
-
-```
-make fpcc
-```
-
-If you're contributing to GPU code, run also:
-
-```
-make pcc_gpu
-```
-
-Unit testing suites are heavy and can require a lot of computing power and RAM availability.
-Whilst tests are run automatically in continuous integration pipeline, you can run tests locally.
-
-All unit tests have a command formatted as:
-
-```
-make test_*
-```
-
-Run `make help` to display a list of all the commands available.
-
-To quickly test your changes locally, follow these steps:
- 1. Locate where the code has changed.
- 2. Add (or modify) a Cargo test filter to the corresponding `make` target in Makefile.
- 3. Run the target.
-
-{% hint style="success" %}
-`make test_<something>` will print the underlying cargo command in STDOUT. You can quickly test your changes by copy/pasting the command and then modify it to suit your needs.
-{% endhint %}
-
-For example, if you made changes in `tfhe/src/integer/*`, you can test them with the following steps:
- 1. In `test_integer` target, replace the filter `-- integer::` by `-- my_new_test`.
- 2. Run `make test_integer`.
-
-## 4. Committing
-
-**TFHE-rs** follows the conventional commit specification to maintain a consistent commit history, essential for Semantic Versioning ([semver.org](https://semver.org/)).
-Commit messages are automatically checked in CI and will be rejected if they do not comply, so make sure that you follow the commit conventions detailed on [this page]
-(https://www.conventionalcommits.org/en/v1.0.0/).
-
-## 5. Rebasing
-
-Before creating a pull request, rebase your branch on the repository's `main` branch. Merge commits are not permitted, thus rebasing ensures fewer conflicts and a smoother PR review process.
-
-## 6. Opening a Pull Request
-
-Once your changes are ready, open a pull request.
-
-For instructions on creating a PR from a fork, refer to GitHub's [official documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork).
-
-## 7. Continuous integration
-
-Before a pull request can be merged, several test suites run automatically. Below is an overview of the CI process:
-
-```mermaid
---
-title: Continuous Integration Process
---
-sequenceDiagram
-    autonumber
-
-    participant Contributor
-    participant GitHub
-    participant Reviewer
-    participant CI-pipeline
-
-    Contributor ->> GitHub: Open pull-request
-    GitHub -->> Contributor: Ask for CLA signing (once)
-    loop
-        Reviewer ->> GitHub: Review code
-        Reviewer ->> CI-pipeline: Approve workflows (short-run)
-        CI-pipeline -->> GitHub: Send checks results
-        Contributor ->> GitHub: Make changes
-    end
-    Reviewer ->> GitHub: Pull-request approval
-    Reviewer ->> CI-pipeline: Approve workflows (long-run)
-    CI-pipeline -->> GitHub: Send checks results
-    Reviewer -->> GitHub: Merge if pipeline green
-```
-
-> [!Note]
->Useful details:
->* pipeline is triggered by humans
->* review team is located in Paris timezone, pipeline launch will most likely happen during office hours
->* direct changes to CI related files are not allowed for external contributors
->* run `make pcc` to fix any build errors before pushing commits
-
-## 8. Data versioning
-
-Data serialized with TFHE-rs must remain backward compatible. This is done using the [tfhe-versionable](https://crates.io/crates/tfhe-versionable) crate.
-
-If you modify a type that derives `Versionize` in a backward-incompatible way, an upgrade implementation must be provided.
-
-For example, these changes are data breaking:
- * Adding a field to a struct.
- * Changing the order of the fields within a struct or the variants within an enum.
- * Renaming a field of a struct or a variant of an enum.
- * Changing the type of field in a struct or a variant in an enum.
-
-On the contrary, these changes are *not* data breaking:
- * Renaming a type (unless it implements the `Named` trait).
- * Adding a variant to the end of an enum.
-
-## Example: adding a field
-
-Suppose you want to add an i32 field to a type named `MyType`. The original type is defined as:
-```rust
-#[derive(Serialize, Deserialize, Versionize)]
-#[versionize(MyTypeVersions)]
-struct MyType {
-  val: u64,
-}
-```
-And you want to change it to:
-```rust
-#[derive(Serialize, Deserialize, Versionize)]
-#[versionize(MyTypeVersions)]
-struct MyType {
-  val: u64,
-  other_val: i32
-}
-```
-
-Follow these steps:
-
- 1. Navigate to the definition of the dispatch enum of this type. This is the type inside the `#[versionize(MyTypeVersions)]` macro attribute. In general, this type has the same name as the base type with a `Versions` suffix. You should find something like
-
-```rust
-#[derive(VersionsDispatch)]
-enum MyTypeVersions {
-  V0(MyTypeV0),
-  V1(MyType)
-}
-```
-
- 2. Add a new variant to the enum to preserve the previous version of the type. You can simply copy and paste the previous definition of the type and add a version suffix:
-
-```rust
-#[derive(Version)]
-struct MyTypeV1 {
-  val: u64,
-}
-
-#[derive(VersionsDispatch)]
-enum MyTypeVersions {
-  V0(MyTypeV0),
-  V1(MyTypeV1),
-  V2(MyType) // Here this points to your modified type
-}
-```
-
- 3. Implement the `Upgrade` trait to define how we should go from the previous version to the current version:
-```rust
-impl Upgrade<MyType> for MyTypeV1 {
-  type Error = Infallible;
-
-   fn upgrade(self) -> Result<MyType, Self::Error> {
-       Ok(MyType {
-           val: self.val,
-           other_val: 0
-        })
-   }
-}
-```
-
- 4. Fix the upgrade target of the previous version. In this example, `impl Upgrade<MyType> for MyTypeV0 {` should simply be changed to `impl Upgrade<MyTypeV1> for MyTypeV0 {`
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,23 +9,28 @@ members = [
    "apps/trivium",
    "tfhe-csprng",
    "backends/tfhe-cuda-backend",
+    "backends/tfhe-hpu-backend",
    "utils/tfhe-versionable",
    "utils/tfhe-versionable-derive",
-    "tests",
+    "mockups/tfhe-hpu-mockup",
 ]

-exclude = ["tests/backward_compatibility_tests", "utils/tfhe-lints"]
+exclude = [
+    "tfhe/backward_compatibility_tests",
+    "utils/cargo-tfhe-lints-inner",
+    "utils/cargo-tfhe-lints"
+]
 [workspace.dependencies]
 aligned-vec = { version = "0.6", default-features = false }
 bytemuck = "1.14.3"
 dyn-stack = { version = "0.11", default-features = false }
-itertools = "0.14"
+itertools = "0.13"
 num-complex = "0.4"
-pulp = { version = "0.21", default-features = false }
+pulp = { version = "0.20.0", default-features = false }
 rand = "0.8"
 rayon = "1"
 serde = { version = "1.0", default-features = false }
-wasm-bindgen = "0.2.100"
+wasm-bindgen = ">=0.2.86,<0.2.94"

 [profile.bench]
 lto = "fat"
@@ -43,6 +48,3 @@ inherits = "dev"
 opt-level = 3
 lto = "off"
 debug-assertions = false
-
-[workspace.metadata.dylint]
-libraries = [{ path = "utils/tfhe-lints" }]
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 BSD 3-Clause Clear License

-Copyright © 2025 ZAMA.
+Copyright © 2024 ZAMA.
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,
--- a/113
+++ b/113
@@ -18,11 +18,9 @@ FAST_BENCH?=FALSE
 NIGHTLY_TESTS?=FALSE
 BENCH_OP_FLAVOR?=DEFAULT
 BENCH_TYPE?=latency
-BENCH_PARAM_TYPE?=classical
-BENCH_PARAMS_SET?=default
 NODE_VERSION=22.6
 BACKWARD_COMPAT_DATA_URL=https://github.com/zama-ai/tfhe-backward-compat-data.git
-BACKWARD_COMPAT_DATA_BRANCH?=$(shell ./scripts/backward_compat_data_version.py)
+BACKWARD_COMPAT_DATA_BRANCH?=v0.4
 BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
 BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
 TFHE_SPEC:=tfhe
@@ -119,7 +117,7 @@ install_wasm_bindgen_cli: install_rs_build_toolchain
 .PHONY: install_wasm_pack # Install wasm-pack to build JS packages
 install_wasm_pack: install_rs_build_toolchain
 	@wasm-pack --version | grep "$(WASM_PACK_VERSION)" > /dev/null 2>&1 || \
-	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@$(WASM_PACK_VERSION) || \
+	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.1 || \
 	( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )

 .PHONY: install_node # Install last version of NodeJS via nvm
@@ -153,9 +151,10 @@ install_tarpaulin: install_rs_build_toolchain
 	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cargo-tarpaulin --locked || \
 	( echo "Unable to install cargo tarpaulin, unknown error." && exit 1 )

-.PHONY: install_cargo_dylint # Install custom tfhe-rs lints
-install_cargo_dylint:
-	cargo install cargo-dylint dylint-link
+.PHONY: install_tfhe_lints # Install custom tfhe-rs lints
+install_tfhe_lints:
+	(cd utils/cargo-tfhe-lints-inner && cargo install --path .) && \
+	cd utils/cargo-tfhe-lints && cargo install --path .

 .PHONY: install_typos_checker # Install typos checker
 install_typos_checker: install_rs_build_toolchain
@@ -244,8 +243,7 @@ fmt_js: check_nvm_installed
 	source ~/.nvm/nvm.sh && \
 	nvm install $(NODE_VERSION) && \
 	nvm use $(NODE_VERSION) && \
-	$(MAKE) -C tfhe/web_wasm_parallel_tests fmt && \
-	$(MAKE) -C tfhe/js_on_wasm_tests fmt
+	$(MAKE) -C tfhe/web_wasm_parallel_tests fmt

 .PHONY: fmt_gpu # Format rust and cuda code
 fmt_gpu: install_rs_check_toolchain
@@ -274,8 +272,7 @@ check_fmt_js: check_nvm_installed
 	source ~/.nvm/nvm.sh && \
 	nvm install $(NODE_VERSION) && \
 	nvm use $(NODE_VERSION) && \
-	$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt && \
-	$(MAKE) -C tfhe/js_on_wasm_tests check_fmt
+	$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt

 .PHONY: check_typos # Check for typos in codebase
 check_typos: install_typos_checker
@@ -365,18 +362,7 @@ clippy_rustdoc: install_rs_check_toolchain
 	fi && \
 	CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
 		cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
-		--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental \
-		-p $(TFHE_SPEC)
-
-.PHONY: clippy_rustdoc_gpu # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
-clippy_rustdoc_gpu: install_rs_check_toolchain
-	if [[ "$(OS)" != "Linux" ]]; then \
-		echo "WARNING: skipped clippy_rustdoc_gpu, unsupported OS $(OS)"; \
-		exit 0; \
-	fi && \
-	CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
-		cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
-		--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu \
+		--features=boolean,shortint,integer,zk-pok,pbs-stats,strings \
 		-p $(TFHE_SPEC)

 .PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
@@ -430,16 +416,10 @@ clippy_versionable: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		-p tfhe-versionable -- --no-deps -D warnings

-.PHONY: clippy_tfhe_lints # Run clippy lints on tfhe-lints
-clippy_tfhe_lints: install_cargo_dylint # the toolchain is selected with toolchain.toml
-	cd utils/tfhe-lints && \
-	rustup toolchain install && \
-	cargo clippy --all-targets -- --no-deps -D warnings
-
 .PHONY: clippy_all # Run all clippy targets
 clippy_all: clippy_rustdoc clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets \
 clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_tfhe_csprng clippy_zk_pok clippy_trivium \
-clippy_versionable clippy_tfhe_lints
+clippy_versionable

 .PHONY: clippy_fast # Run main clippy targets
 clippy_fast: clippy_rustdoc clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks \
@@ -459,9 +439,9 @@ check_rust_bindings_did_not_change:


 .PHONY: tfhe_lints # Run custom tfhe-rs lints
-tfhe_lints: install_cargo_dylint
-	RUSTFLAGS="$(RUSTFLAGS)" cargo dylint --all -p tfhe --no-deps -- \
-		--features=boolean,shortint,integer,strings,zk-pok
+tfhe_lints: install_tfhe_lints
+	cd tfhe && RUSTFLAGS="$(RUSTFLAGS)" cargo tfhe-lints \
+		--features=boolean,shortint,integer,zk-pok -- -D warnings

 .PHONY: build_core # Build core_crypto without experimental features
 build_core: install_rs_build_toolchain install_rs_check_toolchain
@@ -535,11 +515,11 @@ build_web_js_api: install_rs_build_toolchain install_wasm_pack
 build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
 	cd tfhe && \
 	rustup component add rust-src --toolchain $(RS_CHECK_TOOLCHAIN) && \
-	RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory" rustup run $(RS_CHECK_TOOLCHAIN) \
+	RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory,+mutable-globals" rustup run $(RS_CHECK_TOOLCHAIN) \
 		wasm-pack build --release --target=web \
 		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api,zk-pok \
 		-Z build-std=panic_abort,std && \
-	find pkg/snippets -type f -iname workerHelpers.js -exec sed -i "s|const pkg = await import('..\/..\/..');|const pkg = await import('..\/..\/..\/tfhe.js');|" {} \;
+	find pkg/snippets -type f -iname workerHelpers.worker.js -exec sed -i "s|from '..\/..\/..\/';|from '..\/..\/..\/tfhe.js';|" {} \;
 	jq '.files += ["snippets"]' tfhe/pkg/package.json > tmp_pkg.json && mv -f tmp_pkg.json tfhe/pkg/package.json

 .PHONY: build_node_js_api # Build the js API targeting nodejs
@@ -844,7 +824,7 @@ test_strings: install_rs_build_toolchain
 .PHONY: test_user_doc # Run tests from the .md documentation
 test_user_doc: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok,strings \
+		--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
 		-p $(TFHE_SPEC) \
 		-- test_user_docs::

@@ -907,22 +887,16 @@ test_versionable: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
 		--all-targets -p tfhe-versionable

-.PHONY: test_tfhe_lints # Run test on tfhe-lints
-test_tfhe_lints: install_cargo_dylint
-	cd utils/tfhe-lints && \
-	rustup toolchain install && \
-	cargo test
-
 # The backward compat data repo holds historical binary data but also rust code to generate and load them.
 # Here we use the "patch" functionality of Cargo to make sure the repo used for the data is the same as the one used for the code.
 .PHONY: test_backward_compatibility_ci
 test_backward_compatibility_ci: install_rs_build_toolchain
 	TFHE_BACKWARD_COMPAT_DATA_DIR="$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tests/$(BACKWARD_COMPAT_DATA_DIR)\"" \
-		--features=shortint,integer,zk-pok -p tests test_backward_compatibility -- --nocapture
+		--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tfhe/$(BACKWARD_COMPAT_DATA_DIR)\"" \
+		--features=shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture

 .PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
-test_backward_compatibility: tests/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
+test_backward_compatibility: tfhe/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci

 .PHONY: backward_compat_branch # Prints the required backward compatibility branch
 backward_compat_branch:
@@ -971,10 +945,6 @@ check_intra_md_links: install_mlc
 check_md_links: install_mlc
 	mlc --match-file-extension tfhe/docs

-.PHONY: check_parameter_export_ok # Checks exported "current" shortint parameter module is correct
-check_parameter_export_ok:
-	python3 ./scripts/check_current_param_export.py
-
 .PHONY: check_compile_tests # Build tests in debug without running them
 check_compile_tests: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
@@ -1175,24 +1145,10 @@ bench_boolean: install_rs_check_toolchain

 .PHONY: bench_pbs # Run benchmarks for PBS
 bench_pbs: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench pbs-bench \
 	--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

-.PHONY: bench_ks_pbs # Run benchmarks for KS-PBS
-bench_ks_pbs: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench ks-pbs-bench \
-	--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
-
-.PHONY: bench_ks_pbs_gpu # Run benchmarks for KS-PBS on GPU backend
-bench_ks_pbs_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench ks-pbs-bench \
-	--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
-
 .PHONY: bench_pbs128 # Run benchmarks for PBS using FFT 128 bits
 bench_pbs128: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
@@ -1201,20 +1157,19 @@ bench_pbs128: install_rs_check_toolchain

 .PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
 bench_pbs_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_FAST_BENCH=$(FAST_BENCH) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench pbs-bench \
 	--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_ks # Run benchmarks for keyswitch
 bench_ks: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench ks-bench \
 	--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_ks_gpu # Run benchmarks for PBS on GPU backend
 bench_ks_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench ks-bench \
 	--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

@@ -1315,14 +1270,14 @@ parse_wasm_benchmarks: install_rs_check_toolchain

 .PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
 write_params_to_file: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example write_params_to_file --features=boolean,shortint,internal-keycache

 .PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
 clone_backward_compat_data:
-	./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tests/$(BACKWARD_COMPAT_DATA_DIR)
+	./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tfhe/$(BACKWARD_COMPAT_DATA_DIR)

-tests/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
+tfhe/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data

 #
 # Real use case examples
@@ -1347,20 +1302,20 @@ sha256_bool: install_rs_check_toolchain
 	--example sha256_bool --features=boolean

 .PHONY: pcc # pcc stands for pre commit checks (except GPU)
-pcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
-check_md_docs_are_tested check_intra_md_links clippy_all check_compile_tests test_tfhe_lints \
-tfhe_lints
+pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
+clippy_all check_compile_tests
+# TFHE lints deactivated as it's incompatible with 1.83 - temporary
+# tfhe_lints

 .PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
-pcc_gpu: check_rust_bindings_did_not_change clippy_rustdoc_gpu \
-clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu
+pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change

 .PHONY: fpcc # pcc stands for pre commit checks, the f stands for fast
-fpcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
-check_md_docs_are_tested clippy_fast check_compile_tests
+fpcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested clippy_fast \
+check_compile_tests

 .PHONY: conformance # Automatically fix problems that can be fixed
-conformance: fix_newline fmt fmt_js
+conformance: fmt fmt_js

 #=============================== FFT Section ==================================
 .PHONY: doc_fft # Build rust doc for tfhe-fft
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
 <hr/>

 <p align="center">
-  <a href="https://github.com/zama-ai/tfhe-rs-handbook/blob/main/tfhe-rs-handbook.pdf"> 📃 Read Handbook</a> |<a href="https://docs.zama.ai/tfhe-rs"> 📒 Documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a> | <a href="https://github.com/zama-ai/awesome-zama"> 📚 FHE resources by Zama</a>
+  <a href="https://docs.zama.ai/tfhe-rs"> 📒 Documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a> | <a href="https://github.com/zama-ai/awesome-zama"> 📚 FHE resources by Zama</a>
 </p>


@@ -67,9 +67,6 @@ production-ready library for all the advanced features of TFHE.

 ## Getting started

-> [!Important]
-> **TFHE-rs** released its first stable version v1.0.0 in February 2025, stabilizing the high-level API for the x86 CPU backend.
-
 ### Cargo.toml configuration
 To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:

@@ -78,13 +75,13 @@ tfhe = { version = "*", features = ["boolean", "shortint", "integer"] }
 ```

 > [!Note]
-> Note: You need to use Rust version >= 1.84 to compile TFHE-rs.
+> Note: You need to use a Rust version >= 1.81 to compile TFHE-rs.

 > [!Note]
-> Note: AArch64-based machines are not supported for Windows as it's currently missing an entropy source to be able to seed the [CSPRNGs](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) used in TFHE-rs.
+> Note: aarch64-based machines are not yet supported for Windows as it's currently missing an entropy source to be able to seed the [CSPRNGs](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) used in TFHE-rs.

 <p align="right">
-  <a href="#about" > ↑ Back to top </a>
+  <a href="#about" > ↑ Back to top </a> 
 </p>

 ### A simple example
@@ -141,7 +138,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 }
 ```

-To run this code, use the following command:
+To run this code, use the following command: 
 <p align="center"> <code> cargo run --release </code> </p>

 > [!Note]
@@ -151,15 +148,12 @@ to run in release mode with cargo's `--release` flag to have the best performanc
 *Find an example with more explanations in [this part of the documentation](https://docs.zama.ai/tfhe-rs/get-started/quick_start)*

 <p align="right">
-  <a href="#about" > ↑ Back to top </a>
+  <a href="#about" > ↑ Back to top </a> 
 </p>



-## Resources
-
-### TFHE-rs Handbook
-A document containing scientific and technical details about algorithms implemented into the library is available here: [TFHE-rs: A (Practical) Handbook](https://github.com/zama-ai/tfhe-rs-handbook/blob/main/tfhe-rs-handbook.pdf).
+## Resources 

 ### TFHE deep dive
 - [TFHE Deep Dive - Part I - Ciphertext types](https://www.zama.ai/post/tfhe-deep-dive-part-1)
@@ -182,7 +176,7 @@ A document containing scientific and technical details about algorithms implemen

 Full, comprehensive documentation is available here: [https://docs.zama.ai/tfhe-rs](https://docs.zama.ai/tfhe-rs).
 <p align="right">
-  <a href="#about" > ↑ Back to top </a>
+  <a href="#about" > ↑ Back to top </a> 
 </p>


@@ -200,13 +194,9 @@ When a new update is published in the Lattice Estimator, we update parameters ac

 ### Security model

-By default, the parameter sets used in the High-Level API with the x86 CPU backend have a failure probability $\le 2^{128}$ to securely work in the IND-CPA^D model using the algorithmic techniques provided in our code base [1].
-If you want to work within the IND-CPA security model, which is less strict than the IND-CPA-D model, the parameter sets can easily be changed and would have slightly better performance. More details can be found in the [TFHE-rs documentation](https://docs.zama.ai/tfhe-rs).
+The default parameters for the TFHE-rs library are chosen considering the IND-CPA security model, and are selected with a bootstrapping failure probability fixed at p_error = $2^{-64}$. In particular, it is assumed that the results of decrypted computations are not shared by the secret key owner with any third parties, as such an action can lead to leakage of the secret encryption key. If you are designing an application where decryptions must be shared, you will need to craft custom encryption parameters which are chosen in consideration of the IND-CPA^D security model [1]. 

-The default parameters used in the High-Level API with the GPU backend are chosen considering the IND-CPA security model, and are selected with a bootstrapping failure probability fixed at $p_{error} \le 2^{-64}$. In particular, it is assumed that the results of decrypted computations are not shared by the secret key owner with any third parties, as such an action can lead to leakage of the secret encryption key. If you are designing an application where decryptions must be shared, you will need to craft custom encryption parameters which are chosen in consideration of the IND-CPA^D security model [2].
-
-[1] Bernard, Olivier, et al. "Drifting Towards Better Error Probabilities in Fully Homomorphic Encryption Schemes". https://eprint.iacr.org/2024/1718.pdf
-[2] Li, Baiyu, et al. "Securing approximate homomorphic encryption using differential privacy." Annual International Cryptology Conference. Cham: Springer Nature Switzerland, 2022. https://eprint.iacr.org/2022/816.pdf
+[1] Li, Baiyu, et al. "Securing approximate homomorphic encryption using differential privacy." Annual International Cryptology Conference. Cham: Springer Nature Switzerland, 2022. https://eprint.iacr.org/2022/816.pdf

 #### Side-channel attacks

@@ -255,7 +245,7 @@ This software is distributed under the **BSD-3-Clause-Clear** license. Read [thi
 >We are open to collaborating and advancing the FHE space with our partners. If you have specific needs, please email us at hello@zama.ai.

 <p align="right">
-  <a href="#about" > ↑ Back to top </a>
+  <a href="#about" > ↑ Back to top </a> 
 </p>


@@ -269,8 +259,8 @@ This software is distributed under the **BSD-3-Clause-Clear** license. Read [thi
 </picture>
 </a>

-🌟 If you find this project helpful or interesting, please consider giving it a star on GitHub! Your support helps to grow the community and motivates further development.
+🌟 If you find this project helpful or interesting, please consider giving it a star on GitHub! Your support helps to grow the community and motivates further development. 

 <p align="right">
-  <a href="#about" > ↑ Back to top </a>
+  <a href="#about" > ↑ Back to top </a> 
 </p>
--- a/_typos.toml
+++ b/_typos.toml
@@ -13,9 +13,3 @@ extend-ignore-identifiers-re = [
    # Example in trivium
    "C9217BA0D762ACA1"
 ]
-
-[files]
-extend-exclude = [
-    "backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cu",
-    "backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu",
-]
--- a/apps/trivium/README.md
+++ b/apps/trivium/README.md
@@ -18,102 +18,102 @@ use tfhe::prelude::*;
 use tfhe_trivium::TriviumStream;

 fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
-    assert!(a.len() % 8 == 0);
-    let mut hexadecimal: String = "".to_string();
-    for test in a.chunks(8) {
-        // Encoding is bytes in LSB order
-        match test[4..8] {
-            [false, false, false, false] => hexadecimal.push('0'),
-            [true, false, false, false] => hexadecimal.push('1'),
-            [false, true, false, false] => hexadecimal.push('2'),
-            [true, true, false, false] => hexadecimal.push('3'),
+	assert!(a.len() % 8 == 0);
+	let mut hexadecimal: String = "".to_string();
+	for test in a.chunks(8) {
+		// Encoding is bytes in LSB order
+		match test[4..8] {
+			[false, false, false, false] => hexadecimal.push('0'),
+			[true, false, false, false] => hexadecimal.push('1'),
+			[false, true, false, false] => hexadecimal.push('2'),
+			[true, true, false, false] => hexadecimal.push('3'),

-            [false, false, true, false] => hexadecimal.push('4'),
-            [true, false, true, false] => hexadecimal.push('5'),
-            [false, true, true, false] => hexadecimal.push('6'),
-            [true, true, true, false] => hexadecimal.push('7'),
+			[false, false, true, false] => hexadecimal.push('4'),
+			[true, false, true, false] => hexadecimal.push('5'),
+			[false, true, true, false] => hexadecimal.push('6'),
+			[true, true, true, false] => hexadecimal.push('7'),

-            [false, false, false, true] => hexadecimal.push('8'),
-            [true, false, false, true] => hexadecimal.push('9'),
-            [false, true, false, true] => hexadecimal.push('A'),
-            [true, true, false, true] => hexadecimal.push('B'),
+			[false, false, false, true] => hexadecimal.push('8'),
+			[true, false, false, true] => hexadecimal.push('9'),
+			[false, true, false, true] => hexadecimal.push('A'),
+			[true, true, false, true] => hexadecimal.push('B'),

-            [false, false, true, true] => hexadecimal.push('C'),
-            [true, false, true, true] => hexadecimal.push('D'),
-            [false, true, true, true] => hexadecimal.push('E'),
-            [true, true, true, true] => hexadecimal.push('F'),
-            _ => ()
-        };
-        match test[0..4] {
-            [false, false, false, false] => hexadecimal.push('0'),
-            [true, false, false, false] => hexadecimal.push('1'),
-            [false, true, false, false] => hexadecimal.push('2'),
-            [true, true, false, false] => hexadecimal.push('3'),
+			[false, false, true, true] => hexadecimal.push('C'),
+			[true, false, true, true] => hexadecimal.push('D'),
+			[false, true, true, true] => hexadecimal.push('E'),
+			[true, true, true, true] => hexadecimal.push('F'),
+			_ => ()
+		};
+		match test[0..4] {
+			[false, false, false, false] => hexadecimal.push('0'),
+			[true, false, false, false] => hexadecimal.push('1'),
+			[false, true, false, false] => hexadecimal.push('2'),
+			[true, true, false, false] => hexadecimal.push('3'),

-            [false, false, true, false] => hexadecimal.push('4'),
-            [true, false, true, false] => hexadecimal.push('5'),
-            [false, true, true, false] => hexadecimal.push('6'),
-            [true, true, true, false] => hexadecimal.push('7'),
+			[false, false, true, false] => hexadecimal.push('4'),
+			[true, false, true, false] => hexadecimal.push('5'),
+			[false, true, true, false] => hexadecimal.push('6'),
+			[true, true, true, false] => hexadecimal.push('7'),

-            [false, false, false, true] => hexadecimal.push('8'),
-            [true, false, false, true] => hexadecimal.push('9'),
-            [false, true, false, true] => hexadecimal.push('A'),
-            [true, true, false, true] => hexadecimal.push('B'),
+			[false, false, false, true] => hexadecimal.push('8'),
+			[true, false, false, true] => hexadecimal.push('9'),
+			[false, true, false, true] => hexadecimal.push('A'),
+			[true, true, false, true] => hexadecimal.push('B'),

-            [false, false, true, true] => hexadecimal.push('C'),
-            [true, false, true, true] => hexadecimal.push('D'),
-            [false, true, true, true] => hexadecimal.push('E'),
-            [true, true, true, true] => hexadecimal.push('F'),
-            _ => ()
-        };
-    }
-    return hexadecimal;
+			[false, false, true, true] => hexadecimal.push('C'),
+			[true, false, true, true] => hexadecimal.push('D'),
+			[false, true, true, true] => hexadecimal.push('E'),
+			[true, true, true, true] => hexadecimal.push('F'),
+			_ => ()
+		};
+	}
+	return hexadecimal;
 }

 fn main() {
-    let config = ConfigBuilder::default().build();
-    let (client_key, server_key) = generate_keys(config);
+	let config = ConfigBuilder::default().build();
+	let (client_key, server_key) = generate_keys(config);

-    let key_string = "0053A6F94C9FF24598EB".to_string();
-    let mut key = [false; 80];
+	let key_string = "0053A6F94C9FF24598EB".to_string();
+	let mut key = [false; 80];

-    for i in (0..key_string.len()).step_by(2) {
-        let mut val: u8 = u8::from_str_radix(&key_string[i..i+2], 16).unwrap();
-        for j in 0..8 {
-            key[8*(i>>1) + j] = val % 2 == 1;
-            val >>= 1;
-        }
-    }
+	for i in (0..key_string.len()).step_by(2) {
+		let mut val: u8 = u8::from_str_radix(&key_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			key[8*(i>>1) + j] = val % 2 == 1;
+			val >>= 1;
+		}
+	}

-    let iv_string = "0D74DB42A91077DE45AC".to_string();
-    let mut iv = [false; 80];
+	let iv_string = "0D74DB42A91077DE45AC".to_string();
+	let mut iv = [false; 80];

-    for i in (0..iv_string.len()).step_by(2) {
-        let mut val: u8 = u8::from_str_radix(&iv_string[i..i+2], 16).unwrap();
-        for j in 0..8 {
-            iv[8*(i>>1) + j] = val % 2 == 1;
-            val >>= 1;
-        }
-    }
+	for i in (0..iv_string.len()).step_by(2) {
+		let mut val: u8 = u8::from_str_radix(&iv_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			iv[8*(i>>1) + j] = val % 2 == 1;
+			val >>= 1;
+		}
+	}
+	
+	let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();

-    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
-
-    let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
-    let cipher_iv = iv.map(|x| FheBool::encrypt(x, &client_key));
+	let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
+	let cipher_iv = iv.map(|x| FheBool::encrypt(x, &client_key));


-    let mut trivium = TriviumStream::<FheBool>::new(cipher_key, cipher_iv, &server_key);
+	let mut trivium = TriviumStream::<FheBool>::new(cipher_key, cipher_iv, &server_key);

-    let mut vec = Vec::<bool>::with_capacity(64*8);
-    while vec.len() < 64*8 {
-        let cipher_outputs = trivium.next_64();
-        for c in cipher_outputs {
-            vec.push(c.decrypt(&client_key))
-        }
-    }
+	let mut vec = Vec::<bool>::with_capacity(64*8);
+	while vec.len() < 64*8 {
+		let cipher_outputs = trivium.next_64();
+		for c in cipher_outputs {
+			vec.push(c.decrypt(&client_key))
+		}
+	}

-    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
-    assert_eq!(output_0_63, hexadecimal[0..64*2]);
+	let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+	assert_eq!(output_0_63, hexadecimal[0..64*2]);
 }
 ```

@@ -129,76 +129,63 @@ Other sizes than 64 bit are expected to be available in the future.

 # FHE shortint Trivium implementation

-The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
-It uses a lower level API of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run
-on the same cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes
+The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `PARAM_MESSAGE_1_CARRY_1_KS_PBS`). It uses a lower level API 
+of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run on the same 
+cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes 
 its setup a little more intricate.

 Example code:
 ```rust
 use tfhe::shortint::prelude::*;
-use tfhe::shortint::parameters::v1_0::{
-    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-};
+use tfhe::shortint::CastingKey;
 use tfhe::{ConfigBuilder, generate_keys, FheUint64};
 use tfhe::prelude::*;
 use tfhe_trivium::TriviumStreamShortint;

 fn test_shortint() {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
-    let (hl_client_key, hl_server_key) = generate_keys(config);
-    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
-    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
+	let config = ConfigBuilder::default().build();
+	let (hl_client_key, hl_server_key) = generate_keys(config);
+	let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
+	let ksk = CastingKey::new((&client_key, &server_key), (&hl_client_key, &hl_server_key));

-    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
-    let ksk = KeySwitchingKey::new(
-        (&client_key, Some(&server_key)),
-        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
-    );
+	let key_string = "0053A6F94C9FF24598EB".to_string();
+	let mut key = [0; 80];

-    let key_string = "0053A6F94C9FF24598EB".to_string();
-    let mut key = [0; 80];
+	for i in (0..key_string.len()).step_by(2) {
+		let mut val = u64::from_str_radix(&key_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			key[8*(i>>1) + j] = val % 2;
+			val >>= 1;
+		}
+	}

-    for i in (0..key_string.len()).step_by(2) {
-        let mut val = u64::from_str_radix(&key_string[i..i+2], 16).unwrap();
-        for j in 0..8 {
-            key[8*(i>>1) + j] = val % 2;
-            val >>= 1;
-        }
-    }
+	let iv_string = "0D74DB42A91077DE45AC".to_string();
+	let mut iv = [0; 80];

-    let iv_string = "0D74DB42A91077DE45AC".to_string();
-    let mut iv = [0; 80];
+	for i in (0..iv_string.len()).step_by(2) {
+		let mut val = u64::from_str_radix(&iv_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			iv[8*(i>>1) + j] = val % 2;
+			val >>= 1;
+		}
+	}
+	let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();

-    for i in (0..iv_string.len()).step_by(2) {
-        let mut val = u64::from_str_radix(&iv_string[i..i+2], 16).unwrap();
-        for j in 0..8 {
-            iv[8*(i>>1) + j] = val % 2;
-            val >>= 1;
-        }
-    }
-    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+	let cipher_key = key.map(|x| client_key.encrypt(x));
+	let cipher_iv = iv.map(|x| client_key.encrypt(x));

-    let cipher_key = key.map(|x| client_key.encrypt(x));
-    let cipher_iv = iv.map(|x| client_key.encrypt(x));
+	let mut ciphered_message = vec![FheUint64::try_encrypt(0u64, &hl_client_key).unwrap(); 9];

-    let mut ciphered_message = vec![FheUint64::try_encrypt(0u64, &hl_client_key).unwrap(); 9];
+	let mut trivium = TriviumStreamShortint::new(cipher_key, cipher_iv, &server_key, &ksk);

-    let mut trivium = TriviumStreamShortint::new(cipher_key, cipher_iv, &server_key, &ksk);
+	let mut vec = Vec::<u64>::with_capacity(8);
+	while vec.len() < 8 {
+		let trans_ciphered_message = trivium.trans_encrypt_64(ciphered_message.pop().unwrap(), &hl_server_key);
+		vec.push(trans_ciphered_message.decrypt(&hl_client_key));
+	}

-    let mut vec = Vec::<u64>::with_capacity(8);
-    while vec.len() < 8 {
-        let trans_ciphered_message = trivium.trans_encrypt_64(ciphered_message.pop().unwrap(), &hl_server_key);
-        vec.push(trans_ciphered_message.decrypt(&hl_client_key));
-    }
-
-    let hexadecimal = get_hexagonal_string_from_u64(vec);
-    assert_eq!(output_0_63, hexadecimal[0..64*2]);
+	let hexadecimal = get_hexagonal_string_from_u64(vec);
+	assert_eq!(output_0_63, hexadecimal[0..64*2]);
 }
 ```

--- a/apps/trivium/benches/kreyvium_shortint.rs
+++ b/apps/trivium/benches/kreyvium_shortint.rs
@@ -1,29 +1,23 @@
 use criterion::Criterion;
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::v1_0::{
-    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
-};
+use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
 use tfhe::shortint::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
 use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};

 pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -63,20 +57,18 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
 }

 pub fn kreyvium_shortint_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -111,20 +103,18 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
 }

 pub fn kreyvium_shortint_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
--- a/apps/trivium/benches/trivium_shortint.rs
+++ b/apps/trivium/benches/trivium_shortint.rs
@@ -1,29 +1,23 @@
 use criterion::Criterion;
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::v1_0::{
-    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
-};
+use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
 use tfhe::shortint::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
 use tfhe_trivium::{TransCiphering, TriviumStreamShortint};

 pub fn trivium_shortint_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -63,20 +57,18 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
 }

 pub fn trivium_shortint_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -111,20 +103,18 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
 }

 pub fn trivium_shortint_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
--- a/apps/trivium/src/kreyvium/test.rs
+++ b/apps/trivium/src/kreyvium/test.rs
@@ -1,10 +1,6 @@
 use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::v1_0::{
-    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
-};
+use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
 // Values for these tests come from the github repo renaud1239/Kreyvium,
 // commit fd6828f68711276c25f55e605935028f5e843f43
@@ -220,20 +216,18 @@ use tfhe::shortint::prelude::*;

 #[test]
 fn kreyvium_test_shortint_long() {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
--- a/apps/trivium/src/trans_ciphering/mod.rs
+++ b/apps/trivium/src/trans_ciphering/mod.rs
@@ -48,8 +48,6 @@ fn transcipher_from_1_1_stream(
 ) -> FheUint64 {
    assert_eq!(stream.len(), 64);

-    let id_lut = internal_server_key.generate_lookup_table(|x| x);
-
    let pairs = (0..32)
        .into_par_iter()
        .map(|i| {
@@ -59,11 +57,10 @@ fn transcipher_from_1_1_stream(
            let b0 = &stream[8 * byte_idx + 2 * pair_idx];
            let b1 = &stream[8 * byte_idx + 2 * pair_idx + 1];

-            let mut combined = internal_server_key
-                .unchecked_add(b0, &internal_server_key.unchecked_scalar_mul(b1, 2));
-            internal_server_key.apply_lookup_table_assign(&mut combined, &id_lut);
-
-            casting_key.cast(&combined)
+            casting_key.cast(
+                &internal_server_key
+                    .unchecked_add(b0, &internal_server_key.unchecked_scalar_mul(b1, 2)),
+            )
        })
        .collect::<Vec<_>>();

--- a/apps/trivium/src/trivium/test.rs
+++ b/apps/trivium/src/trivium/test.rs
@@ -1,10 +1,6 @@
 use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
 use tfhe::prelude::*;
-use tfhe::shortint::parameters::v1_0::{
-    V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
-    V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
-};
+use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
 // Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
 // file testvectors/trivium-80.80.test-vectors
@@ -356,20 +352,18 @@ use tfhe::shortint::prelude::*;

 #[test]
 fn trivium_test_shortint_long() {
-    let config = ConfigBuilder::default()
-        .use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();

    let (client_key, server_key): (ClientKey, ServerKey) =
-        gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
+        gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);

    let ksk = KeySwitchingKey::new(
        (&client_key, Some(&server_key)),
        (&underlying_ck, &underlying_sk),
-        V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
+        V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
    );

    let key_string = "0053A6F94C9FF24598EB".to_string();
--- a/backends/tfhe-cuda-backend/Cargo.toml
+++ b/backends/tfhe-cuda-backend/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tfhe-cuda-backend"
-version = "0.8.0"
+version = "0.7.0"
 edition = "2021"
 authors = ["Zama team"]
 license = "BSD-3-Clause-Clear"
@@ -14,4 +14,4 @@ keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
 [build-dependencies]
 cmake = { version = "0.1" }
 pkg-config = { version = "0.3" }
-bindgen = "0.71"
+bindgen = "0.70.1"
--- a/backends/tfhe-cuda-backend/LICENSE
+++ b/backends/tfhe-cuda-backend/LICENSE
@@ -1,6 +1,6 @@
 BSD 3-Clause Clear License

-Copyright © 2025 ZAMA.
+Copyright © 2024 ZAMA.
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,
--- a/backends/tfhe-cuda-backend/README.md
+++ b/backends/tfhe-cuda-backend/README.md
@@ -13,26 +13,24 @@ and forth between the CPU and the GPU, to create and destroy Cuda streams, etc.:
 - `cuda_get_number_of_gpus`
 - `cuda_synchronize_device`
 The cryptographic operations it provides are:
- an implementation of the classical TFHE programmable bootstrap,
- an implementation of the multi-bit TFHE programmable bootstrap,
- the keyswitch,
- acceleration for leveled operations,
- acceleration for arithmetics over encrypted integers of arbitrary size, 
- acceleration for integer compression/decompression.
+- an amortized implementation of the TFHE programmable bootstrap: `cuda_bootstrap_amortized_lwe_ciphertext_vector_32` and `cuda_bootstrap_amortized_lwe_ciphertext_vector_64`
+- a low latency implementation of the TFHE programmable bootstrap: `cuda_bootstrap_low latency_lwe_ciphertext_vector_32` and `cuda_bootstrap_low_latency_lwe_ciphertext_vector_64`
+- the keyswitch: `cuda_keyswitch_lwe_ciphertext_vector_32` and `cuda_keyswitch_lwe_ciphertext_vector_64`
+- the larger precision programmable bootstrap (wop PBS, which supports up to 16 bits of message while the classical PBS only supports up to 8 bits of message) and its sub-components: `cuda_wop_pbs_64`, `cuda_extract_bits_64`, `cuda_circuit_bootstrap_64`, `cuda_cmux_tree_64`, `cuda_blind_rotation_sample_extraction_64`
+- acceleration for leveled operations: `cuda_negate_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_plaintext_vector_64`, `cuda_mult_lwe_ciphertext_vector_cleartext_vector`.

 ## Dependencies

 **Disclaimer**: Compilation on Windows/Mac is not supported yet. Only Nvidia GPUs are supported. 

- nvidia driver - for example, if you're running Ubuntu 20.04 check this [page](https://linuxconfig.org/how-to-install-the-nvidia-drivers-on-ubuntu-20-04-focal-fossa-linux) for installation. You need an Nvidia GPU with Compute Capability >= 3.0
+- nvidia driver - for example, if you're running Ubuntu 20.04 check this [page](https://linuxconfig.org/how-to-install-the-nvidia-drivers-on-ubuntu-20-04-focal-fossa-linux) for installation
 - [nvcc](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) >= 10.0
 - [gcc](https://gcc.gnu.org/) >= 8.0 - check this [page](https://gist.github.com/ax3l/9489132) for more details about nvcc/gcc compatible versions
 - [cmake](https://cmake.org/) >= 3.24
- libclang, to match Rust bingen [requirements](https://rust-lang.github.io/rust-bindgen/requirements.html) >= 9.0

 ## Build

-The Cuda project held in `tfhe-cuda-backend` can be compiled independently of TFHE-rs in the following way:
+The Cuda project held in `tfhe-cuda-backend` can be compiled independently from TFHE-rs in the following way:
 ```
 git clone git@github.com:zama-ai/tfhe-rs
 cd backends/tfhe-cuda-backend/cuda
--- a/backends/tfhe-cuda-backend/build.rs
+++ b/backends/tfhe-cuda-backend/build.rs
@@ -62,7 +62,6 @@ fn main() {
            "cuda/include/integer/integer.h",
            "cuda/include/keyswitch.h",
            "cuda/include/linear_algebra.h",
-            "cuda/include/fft/fft128.h",
            "cuda/include/pbs/programmable_bootstrap.h",
            "cuda/include/pbs/programmable_bootstrap_multibit.h",
        ];
--- a/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
+++ b/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
@@ -18,7 +18,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *stream,
 void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index,
                                 void *lwe_array_out, void const *glwe_array_in,
                                 uint32_t const *nth_array, uint32_t num_nths,
-                                 uint32_t lwe_per_glwe, uint32_t glwe_dimension,
+                                 uint32_t glwe_dimension,
                                 uint32_t polynomial_size);
 }
 #endif
--- a/backends/tfhe-cuda-backend/cuda/include/device.h
+++ b/backends/tfhe-cuda-backend/cuda/include/device.h
@@ -27,8 +27,6 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
    std::abort();                                                              \
  }

-void cuda_set_device(uint32_t gpu_index);
-
 cudaEvent_t cuda_create_event(uint32_t gpu_index);

 void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
@@ -52,7 +50,7 @@ void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);

 void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);

-void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
+void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
                              cudaStream_t stream, uint32_t gpu_index);

 void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
--- a/backends/tfhe-cuda-backend/cuda/include/fft/fft128.h
+++ b/backends/tfhe-cuda-backend/cuda/include/fft/fft128.h
@@ -1,17 +0,0 @@
-#include <stdint.h>
-extern "C" {
-void cuda_fourier_transform_forward_as_torus_f128_async(
-    void *stream, uint32_t gpu_index, void *re0, void *re1, void *im0,
-    void *im1, void const *standard, uint32_t const N,
-    const uint32_t number_of_samples);
-
-void cuda_fourier_transform_forward_as_integer_f128_async(
-    void *stream, uint32_t gpu_index, void *re0, void *re1, void *im0,
-    void *im1, void const *standard, uint32_t const N,
-    const uint32_t number_of_samples);
-
-void cuda_fourier_transform_backward_as_torus_f128_async(
-    void *stream, uint32_t gpu_index, void *standard, void const *re0,
-    void const *re1, void const *im0, void const *im1, uint32_t const N,
-    const uint32_t number_of_samples);
-}
--- a/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression_utilities.h
@@ -102,7 +102,9 @@ template <typename Torus> struct int_decompression {
      // Example: in the 2_2 case we are mapping a 2 bits message onto a 4 bits
      // space, we want to keep the original 2 bits value in the 4 bits space,
      // so we apply the identity and the encoding will rescale it for us.
-      auto decompression_rescale_f = [](Torus x) -> Torus { return x; };
+      auto decompression_rescale_f = [encryption_params](Torus x) -> Torus {
+        return x;
+      };

      auto effective_compression_message_modulus =
          encryption_params.carry_modulus;
@@ -110,8 +112,6 @@ template <typename Torus> struct int_decompression {

      generate_device_accumulator_with_encoding<Torus>(
          streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
-          decompression_rescale_lut->get_degree(0),
-          decompression_rescale_lut->get_max_degree(0),
          encryption_params.glwe_dimension, encryption_params.polynomial_size,
          effective_compression_message_modulus,
          effective_compression_carry_modulus,
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer.h
@@ -38,15 +38,6 @@ enum SIGNED_OPERATION { ADDITION = 1, SUBTRACTION = -1 };
 enum outputFlag { FLAG_NONE = 0, FLAG_OVERFLOW = 1, FLAG_CARRY = 2 };

 extern "C" {
-
-typedef struct {
-  void *ptr;
-  uint64_t *degrees;
-  uint64_t *noise_levels;
-  uint32_t num_radix_blocks;
-  uint32_t lwe_dimension;
-} CudaRadixCiphertextFFI;
-
 void scratch_cuda_apply_univariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
@@ -54,7 +45,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint64_t lut_degree, bool allocate_gpu_memory);
+    bool allocate_gpu_memory);
 void scratch_cuda_apply_many_univariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
    int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
@@ -62,12 +53,13 @@ void scratch_cuda_apply_many_univariate_lut_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_radix_blocks,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory);
-void cuda_apply_univariate_lut_kb_64(
-    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *output_radix_lwe,
-    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
-    void *const *ksks, void *const *bsks);
+    uint32_t num_many_lut, bool allocate_gpu_memory);
+void cuda_apply_univariate_lut_kb_64(void *const *streams,
+                                     uint32_t const *gpu_indexes,
+                                     uint32_t gpu_count, void *output_radix_lwe,
+                                     void const *input_radix_lwe,
+                                     int8_t *mem_ptr, void *const *ksks,
+                                     void *const *bsks, uint32_t num_blocks);

 void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
                                             uint32_t const *gpu_indexes,
@@ -81,15 +73,13 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint64_t lut_degree, bool allocate_gpu_memory);
+    bool allocate_gpu_memory);

 void cuda_apply_bivariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *output_radix_lwe,
-    CudaRadixCiphertextFFI const *input_radix_lwe_1,
-    CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
-    void *const *ksks, void *const *bsks, uint32_t num_radix_blocks,
-    uint32_t shift);
+    void *output_radix_lwe, void const *input_radix_lwe_1,
+    void const *input_radix_lwe_2, int8_t *mem_ptr, void *const *ksks,
+    void *const *bsks, uint32_t num_blocks, uint32_t shift);

 void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
                                            uint32_t const *gpu_indexes,
@@ -98,10 +88,9 @@ void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,

 void cuda_apply_many_univariate_lut_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *output_radix_lwe,
-    CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
-    void *const *ksks, void *const *bsks, uint32_t num_luts,
-    uint32_t lut_stride);
+    void *output_radix_lwe, void const *input_radix_lwe, int8_t *mem_ptr,
+    void *const *ksks, void *const *bsks, uint32_t num_blocks,
+    uint32_t num_luts, uint32_t lut_stride);

 void scratch_cuda_full_propagation_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -113,8 +102,7 @@ void scratch_cuda_full_propagation_64(

 void cuda_full_propagation_64_inplace(void *const *streams,
                                      uint32_t const *gpu_indexes,
-                                      uint32_t gpu_count,
-                                      CudaRadixCiphertextFFI *input_blocks,
+                                      uint32_t gpu_count, void *input_blocks,
                                      int8_t *mem_ptr, void *const *ksks,
                                      void *const *bsks, uint32_t num_blocks);

@@ -133,11 +121,10 @@ void scratch_cuda_integer_mult_radix_ciphertext_kb_64(

 void cuda_integer_mult_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *radix_lwe_out,
-    CudaRadixCiphertextFFI const *radix_lwe_left, bool const is_bool_left,
-    CudaRadixCiphertextFFI const *radix_lwe_right, bool const is_bool_right,
-    void *const *bsks, void *const *ksks, int8_t *mem_ptr,
-    uint32_t polynomial_size, uint32_t num_blocks);
+    void *radix_lwe_out, void const *radix_lwe_left, bool const is_bool_left,
+    void const *radix_lwe_right, bool const is_bool_right, void *const *bsks,
+    void *const *ksks, int8_t *mem_ptr, uint32_t polynomial_size,
+    uint32_t num_blocks);

 void cleanup_cuda_integer_mult(void *const *streams,
                               uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -145,14 +132,15 @@ void cleanup_cuda_integer_mult(void *const *streams,

 void cuda_negate_integer_radix_ciphertext_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_in, uint32_t message_modulus,
-    uint32_t carry_modulus, uint32_t num_radix_blocks);
+    void *lwe_array_out, void const *lwe_array_in, uint32_t lwe_dimension,
+    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
+    uint32_t carry_modulus);

 void cuda_scalar_addition_integer_radix_ciphertext_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, void const *scalar_input,
-    uint32_t num_scalars, uint32_t message_modulus, uint32_t carry_modulus);
+    void *lwe_array, void const *scalar_input, uint32_t lwe_dimension,
+    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
+    uint32_t carry_modulus);

 void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -165,8 +153,8 @@ void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(

 void cuda_integer_radix_logical_scalar_shift_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, uint32_t shift, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks);
+    void *lwe_array, uint32_t shift, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks);

 void scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -179,8 +167,8 @@ void scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(

 void cuda_integer_radix_arithmetic_scalar_shift_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, uint32_t shift, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks);
+    void *lwe_array, uint32_t shift, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks);

 void cleanup_cuda_integer_radix_logical_scalar_shift(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -201,8 +189,8 @@ void scratch_cuda_integer_radix_shift_and_rotate_kb_64(

 void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI const *lwe_shift,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks);
+    void *lwe_array, void const *lwe_shift, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks);

 void cleanup_cuda_integer_radix_shift_and_rotate(void *const *streams,
                                                 uint32_t const *gpu_indexes,
@@ -220,17 +208,15 @@ void scratch_cuda_integer_radix_comparison_kb_64(

 void cuda_comparison_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_1,
-    CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks);
+    void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
+    uint32_t lwe_ciphertext_count);

 void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_in, void const *scalar_blocks,
+    void *lwe_array_out, void const *lwe_array_in, void const *scalar_blocks,
    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    uint32_t num_scalar_blocks);
+    uint32_t lwe_ciphertext_count, uint32_t num_scalar_blocks);

 void cleanup_cuda_integer_comparison(void *const *streams,
                                     uint32_t const *gpu_indexes,
@@ -247,17 +233,15 @@ void scratch_cuda_integer_radix_bitop_kb_64(

 void cuda_bitop_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_1,
-    CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks);
+    void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
+    uint32_t lwe_ciphertext_count);

 void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
+    void *lwe_array_out, void const *lwe_array_input, void const *clear_blocks,
    uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks);
+    void *const *ksks, uint32_t lwe_ciphertext_count, BITOP_TYPE op);

 void cleanup_cuda_integer_bitop(void *const *streams,
                                uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -274,11 +258,9 @@ void scratch_cuda_integer_radix_cmux_kb_64(

 void cuda_cmux_integer_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_condition,
-    CudaRadixCiphertextFFI const *lwe_array_true,
-    CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks);
+    void *lwe_array_out, void const *lwe_condition, void const *lwe_array_true,
+    void const *lwe_array_false, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t lwe_ciphertext_count);

 void cleanup_cuda_integer_radix_cmux(void *const *streams,
                                     uint32_t const *gpu_indexes,
@@ -295,8 +277,8 @@ void scratch_cuda_integer_radix_scalar_rotate_kb_64(

 void cuda_integer_radix_scalar_rotate_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, uint32_t n, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks);
+    void *lwe_array, uint32_t n, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks);

 void cleanup_cuda_integer_radix_scalar_rotate(void *const *streams,
                                              uint32_t const *gpu_indexes,
@@ -323,16 +305,15 @@ void scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(

 void cuda_propagate_single_carry_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI *carry_out,
-    const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks, uint32_t requested_flag, uint32_t uses_carry);
+    void *lwe_array, void *carry_out, const void *carry_in, int8_t *mem_ptr,
+    void *const *bsks, void *const *ksks, uint32_t num_blocks,
+    uint32_t requested_flag, uint32_t uses_carry);

 void cuda_add_and_propagate_single_carry_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
-    CudaRadixCiphertextFFI *carry_out, const CudaRadixCiphertextFFI *carry_in,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
-    uint32_t requested_flag, uint32_t uses_carry);
+    void *lhs_array, const void *rhs_array, void *carry_out,
+    const void *carry_in, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
+    uint32_t num_blocks, uint32_t requested_flag, uint32_t uses_carry);

 void cleanup_cuda_propagate_single_carry(void *const *streams,
                                         uint32_t const *gpu_indexes,
@@ -355,10 +336,9 @@ void scratch_cuda_integer_overflowing_sub_kb_64_inplace(

 void cuda_integer_overflowing_sub_kb_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
-    CudaRadixCiphertextFFI *overflow_block,
-    const CudaRadixCiphertextFFI *input_borrow, int8_t *mem_ptr,
-    void *const *bsks, void *const *ksks, uint32_t compute_overflow,
+    void *lhs_array, const void *rhs_array, void *overflow_block,
+    const void *input_borrow, int8_t *mem_ptr, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks, uint32_t compute_overflow,
    uint32_t uses_input_borrow);

 void cleanup_cuda_integer_overflowing_sub(void *const *streams,
@@ -377,9 +357,9 @@ void scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(

 void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *radix_lwe_out,
-    CudaRadixCiphertextFFI *radix_lwe_vec, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks);
+    void *radix_lwe_out, void *radix_lwe_vec, uint32_t num_radix_in_vec,
+    int8_t *mem_ptr, void *const *bsks, void *const *ksks,
+    uint32_t num_blocks_in_radix);

 void cleanup_cuda_integer_radix_partial_sum_ciphertexts_vec(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -395,10 +375,10 @@ void scratch_cuda_integer_scalar_mul_kb_64(

 void cuda_scalar_multiplication_integer_radix_ciphertext_64_inplace(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array, uint64_t const *decomposed_scalar,
+    void *lwe_array, uint64_t const *decomposed_scalar,
    uint64_t const *has_at_least_one_set, int8_t *mem_ptr, void *const *bsks,
-    void *const *ksks, uint32_t polynomial_size, uint32_t message_modulus,
-    uint32_t num_scalars);
+    void *const *ksks, uint32_t lwe_dimension, uint32_t polynomial_size,
+    uint32_t message_modulus, uint32_t num_blocks, uint32_t num_scalars);

 void cleanup_cuda_integer_radix_scalar_mul(void *const *streams,
                                           uint32_t const *gpu_indexes,
@@ -431,13 +411,12 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
    uint32_t grouping_factor, uint32_t num_radix_blocks,
    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    uint64_t lut_degree, bool allocate_gpu_memory);
+    bool allocate_gpu_memory);

 void cuda_integer_compute_prefix_sum_hillis_steele_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *output_radix_lwe,
-    CudaRadixCiphertextFFI *generates_or_propagates, int8_t *mem_ptr,
-    void *const *ksks, void *const *bsks, uint32_t num_blocks);
+    void *output_radix_lwe, void *generates_or_propagates, int8_t *mem_ptr,
+    void *const *ksks, void *const *bsks, uint32_t num_blocks, uint32_t shift);

 void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -445,8 +424,9 @@ void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(

 void cuda_integer_reverse_blocks_64_inplace(void *const *streams,
                                            uint32_t const *gpu_indexes,
-                                            uint32_t gpu_count,
-                                            CudaRadixCiphertextFFI *lwe_array);
+                                            uint32_t gpu_count, void *lwe_array,
+                                            uint32_t num_blocks,
+                                            uint32_t lwe_size);

 void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -459,8 +439,8 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(

 void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
-    void *const *bsks, void *const *ksks);
+    void *ct, int8_t *mem_ptr, bool is_signed, void *const *bsks,
+    void *const *ksks, uint32_t num_blocks);

 void cleanup_cuda_integer_abs_inplace(void *const *streams,
                                      uint32_t const *gpu_indexes,
@@ -478,8 +458,7 @@ void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(

 void cuda_integer_are_all_comparisons_block_true_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
+    void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);

 void cleanup_cuda_integer_are_all_comparisons_block_true(
@@ -497,8 +476,7 @@ void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(

 void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
    void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
-    CudaRadixCiphertextFFI *lwe_array_out,
-    CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
+    void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
    void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);

 void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
--- a/backends/tfhe-cuda-backend/cuda/include/integer/radix_ciphertext.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/radix_ciphertext.h
@@ -1,8 +0,0 @@
-#ifndef CUDA_RADIX_CIPHERTEXT_H
-#define CUDA_RADIX_CIPHERTEXT_H
-
-void release_radix_ciphertext(cudaStream_t const stream,
-                              uint32_t const gpu_index,
-                              CudaRadixCiphertextFFI *data);
-
-#endif
--- a/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
+++ b/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
@@ -1,52 +1,49 @@
 #ifndef CUDA_LINALG_H_
 #define CUDA_LINALG_H_

-#include "integer/integer.h"
 #include <stdint.h>

 extern "C" {

-void cuda_negate_lwe_ciphertext_vector_32(
-    void *stream, uint32_t gpu_index, void *lwe_array_out,
-    void const *lwe_array_in, const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
-void cuda_negate_lwe_ciphertext_vector_64(
-    void *stream, uint32_t gpu_index, void *lwe_array_out,
-    void const *lwe_array_in, const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
+void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
+                                          void *lwe_array_out,
+                                          void const *lwe_array_in,
+                                          uint32_t input_lwe_dimension,
+                                          uint32_t input_lwe_ciphertext_count);
+void cuda_negate_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
+                                          void *lwe_array_out,
+                                          void const *lwe_array_in,
+                                          uint32_t input_lwe_dimension,
+                                          uint32_t input_lwe_ciphertext_count);
 void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
-                                       CudaRadixCiphertextFFI *output,
-                                       CudaRadixCiphertextFFI const *input_1,
-                                       CudaRadixCiphertextFFI const *input_2);
+                                       void *lwe_array_out,
+                                       void const *lwe_array_in_1,
+                                       void const *lwe_array_in_2,
+                                       uint32_t input_lwe_dimension,
+                                       uint32_t input_lwe_ciphertext_count);
 void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
-                                       CudaRadixCiphertextFFI *output,
-                                       CudaRadixCiphertextFFI const *input_1,
-                                       CudaRadixCiphertextFFI const *input_2);
+                                       void *lwe_array_out,
+                                       void const *lwe_array_in_1,
+                                       void const *lwe_array_in_2,
+                                       uint32_t input_lwe_dimension,
+                                       uint32_t input_lwe_ciphertext_count);
+
 void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *plaintext_array_in,
-    const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
+    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
 void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *plaintext_array_in,
-    const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
+    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
 void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *cleartext_array_in,
-    const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
+    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
 void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
    void *stream, uint32_t gpu_index, void *lwe_array_out,
    void const *lwe_array_in, void const *cleartext_array_in,
-    const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
-void cuda_add_lwe_ciphertext_vector_plaintext_64(
-    void *stream, uint32_t gpu_index, void *lwe_array_out,
-    void const *lwe_array_in, const uint64_t plaintext_in,
-    const uint32_t input_lwe_dimension,
-    const uint32_t input_lwe_ciphertext_count);
+    uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
 }

 #endif // CUDA_LINALG_H_
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_multibit_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_multibit_utilities.h
@@ -5,12 +5,12 @@

 template <typename Torus>
 bool supports_distributed_shared_memory_on_multibit_programmable_bootstrap(
-    uint32_t polynomial_size, int max_shared_memory);
+    uint32_t polynomial_size);

 template <typename Torus>
 bool has_support_to_cuda_programmable_bootstrap_tbc_multi_bit(
    uint32_t num_samples, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t level_count, int max_shared_memory);
+    uint32_t level_count);

 #if CUDA_ARCH >= 900
 template <typename Torus>
@@ -114,8 +114,6 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
             uint32_t polynomial_size, uint32_t level_count,
             uint32_t input_lwe_ciphertext_count, uint32_t lwe_chunk_size,
             PBS_VARIANT pbs_variant, bool allocate_gpu_memory) {
-    cuda_set_device(gpu_index);
-
    this->pbs_variant = pbs_variant;
    this->lwe_chunk_size = lwe_chunk_size;
    auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_utilities.h
@@ -61,7 +61,7 @@ get_buffer_size_partial_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {

 template <typename Torus>
 bool supports_distributed_shared_memory_on_classic_programmable_bootstrap(
-    uint32_t polynomial_size, int max_shared_memory);
+    uint32_t polynomial_size);

 template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer;

@@ -77,10 +77,10 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
             uint32_t polynomial_size, uint32_t level_count,
             uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
             bool allocate_gpu_memory) {
-    cuda_set_device(gpu_index);
+
    this->pbs_variant = pbs_variant;

-    auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
+    auto max_shared_memory = cuda_get_max_shared_memory(0);

    if (allocate_gpu_memory) {
      switch (pbs_variant) {
@@ -157,7 +157,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {

        bool supports_dsm =
            supports_distributed_shared_memory_on_classic_programmable_bootstrap<
-                Torus>(polynomial_size, max_shared_memory);
+                Torus>(polynomial_size);

        uint64_t full_sm =
            get_buffer_size_full_sm_programmable_bootstrap_tbc<Torus>(
@@ -218,7 +218,8 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
 template <typename Torus>
 uint64_t get_buffer_size_programmable_bootstrap_cg(
    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) {
+    uint32_t input_lwe_ciphertext_count) {
+  int max_shared_memory = cuda_get_max_shared_memory(0);
  uint64_t full_sm =
      get_buffer_size_full_sm_programmable_bootstrap_cg<Torus>(polynomial_size);
  uint64_t partial_sm =
@@ -244,8 +245,7 @@ template <typename Torus>
 bool has_support_to_cuda_programmable_bootstrap_cg(uint32_t glwe_dimension,
                                                   uint32_t polynomial_size,
                                                   uint32_t level_count,
-                                                   uint32_t num_samples,
-                                                   int max_shared_memory);
+                                                   uint32_t num_samples);

 template <typename Torus>
 void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap_multibit.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/programmable_bootstrap_multibit.h
@@ -8,7 +8,7 @@ extern "C" {

 bool has_support_to_cuda_programmable_bootstrap_cg_multi_bit(
    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t num_samples, int max_shared_memory);
+    uint32_t num_samples);

 void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64(
    void *stream, uint32_t gpu_index, void *dest, void const *src,
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu
@@ -24,7 +24,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *stream,
 void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index,
                                 void *lwe_array_out, void const *glwe_array_in,
                                 uint32_t const *nth_array, uint32_t num_nths,
-                                 uint32_t lwe_per_glwe, uint32_t glwe_dimension,
+                                 uint32_t glwe_dimension,
                                 uint32_t polynomial_size) {

  switch (polynomial_size) {
@@ -32,43 +32,43 @@ void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index,
    host_sample_extract<uint64_t, AmortizedDegree<256>>(
        static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
        (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
-        lwe_per_glwe, glwe_dimension);
+        glwe_dimension);
    break;
  case 512:
    host_sample_extract<uint64_t, AmortizedDegree<512>>(
        static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
        (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
-        lwe_per_glwe, glwe_dimension);
+        glwe_dimension);
    break;
  case 1024:
    host_sample_extract<uint64_t, AmortizedDegree<1024>>(
        static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
        (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
-        lwe_per_glwe, glwe_dimension);
+        glwe_dimension);
    break;
  case 2048:
    host_sample_extract<uint64_t, AmortizedDegree<2048>>(
        static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
        (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
-        lwe_per_glwe, glwe_dimension);
+        glwe_dimension);
    break;
  case 4096:
    host_sample_extract<uint64_t, AmortizedDegree<4096>>(
        static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
        (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
-        lwe_per_glwe, glwe_dimension);
+        glwe_dimension);
    break;
  case 8192:
    host_sample_extract<uint64_t, AmortizedDegree<8192>>(
        static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
        (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
-        lwe_per_glwe, glwe_dimension);
+        glwe_dimension);
    break;
  case 16384:
    host_sample_extract<uint64_t, AmortizedDegree<16384>>(
        static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
        (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
-        lwe_per_glwe, glwe_dimension);
+        glwe_dimension);
    break;
  default:
    PANIC("Cuda error: unsupported polynomial size. Supported "
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
@@ -11,7 +11,7 @@ void cuda_convert_lwe_ciphertext_vector_to_gpu(cudaStream_t stream,
                                               uint32_t gpu_index, T *dest,
                                               T *src, uint32_t number_of_cts,
                                               uint32_t lwe_dimension) {
-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);
  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
  cuda_memcpy_async_to_gpu(dest, src, size, stream, gpu_index);
 }
@@ -21,14 +21,14 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu(cudaStream_t stream,
                                               uint32_t gpu_index, T *dest,
                                               T *src, uint32_t number_of_cts,
                                               uint32_t lwe_dimension) {
-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);
  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
  cuda_memcpy_async_to_cpu(dest, src, size, stream, gpu_index);
 }

 template <typename Torus, class params>
 __global__ void sample_extract(Torus *lwe_array_out, Torus const *glwe_array_in,
-                               uint32_t const *nth_array, uint32_t lwe_per_glwe,
+                               uint32_t const *nth_array,
                               uint32_t glwe_dimension) {

  const int input_id = blockIdx.x;
@@ -39,28 +39,28 @@ __global__ void sample_extract(Torus *lwe_array_out, Torus const *glwe_array_in,
  auto lwe_out = lwe_array_out + input_id * lwe_output_size;

  // We assume each GLWE will store the first polynomial_size inputs
+  uint32_t lwe_per_glwe = params::degree;
  auto glwe_in = glwe_array_in + (input_id / lwe_per_glwe) * glwe_input_size;

-  // nth is ensured to be in [0, params::degree)
-  auto nth = nth_array[input_id] % params::degree;
+  // nth is ensured to be in [0, lwe_per_glwe)
+  auto nth = nth_array[input_id] % lwe_per_glwe;

  sample_extract_mask<Torus, params>(lwe_out, glwe_in, glwe_dimension, nth);
  sample_extract_body<Torus, params>(lwe_out, glwe_in, glwe_dimension, nth);
 }

-// lwe_per_glwe LWEs will be extracted per GLWE ciphertext, thus we need to have
-// enough indexes
 template <typename Torus, class params>
-__host__ void
-host_sample_extract(cudaStream_t stream, uint32_t gpu_index,
-                    Torus *lwe_array_out, Torus const *glwe_array_in,
-                    uint32_t const *nth_array, uint32_t num_nths,
-                    uint32_t lwe_per_glwe, uint32_t glwe_dimension) {
-  cuda_set_device(gpu_index);
+__host__ void host_sample_extract(cudaStream_t stream, uint32_t gpu_index,
+                                  Torus *lwe_array_out,
+                                  Torus const *glwe_array_in,
+                                  uint32_t const *nth_array, uint32_t num_nths,
+                                  uint32_t glwe_dimension) {
+  cudaSetDevice(gpu_index);
+
  dim3 grid(num_nths);
  dim3 thds(params::degree / params::opt);
  sample_extract<Torus, params><<<grid, thds, 0, stream>>>(
-      lwe_array_out, glwe_array_in, nth_array, lwe_per_glwe, glwe_dimension);
+      lwe_array_out, glwe_array_in, nth_array, glwe_dimension);
  check_cuda_error(cudaGetLastError());
 }

--- a/backends/tfhe-cuda-backend/cuda/src/crypto/fast_packing_keyswitch.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/fast_packing_keyswitch.cuh
@@ -2,6 +2,7 @@
 #define CNCRT_FAST_KS_CUH

 #undef NDEBUG
+#include <assert.h>

 #include "device.h"
 #include "gadget.cuh"
@@ -25,6 +26,15 @@ template <typename Torus> uint64_t get_shared_mem_size_tgemm() {
  return BLOCK_SIZE_GEMM * THREADS_GEMM * 2 * sizeof(Torus);
 }

+__host__ inline bool can_use_pks_fast_path(uint32_t lwe_dimension,
+                                           uint32_t num_lwe,
+                                           uint32_t polynomial_size,
+                                           uint32_t level_count,
+                                           uint32_t glwe_dimension) {
+  // TODO: activate it back, fix tests and extend to level_count > 1
+  return false;
+}
+
 // Initialize decomposition by performing rounding
 // and decomposing one level of an array of Torus LWEs. Only
 // decomposes the mask elements of the incoming LWEs.
@@ -47,8 +57,6 @@ __global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,
  // is lwe_dimension + 1, while for writing it is lwe_dimension
  auto read_val_idx = lwe_idx * (lwe_dimension + 1) + lwe_sample_idx;
  auto write_val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
-  auto write_state_idx =
-      num_lwe * lwe_dimension + lwe_idx * lwe_dimension + lwe_sample_idx;

  Torus a_i = lwe_in[read_val_idx];

@@ -56,8 +64,6 @@ __global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,

  Torus mod_b_mask = (1ll << base_log) - 1ll;
  lwe_out[write_val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
-  synchronize_threads_in_block();
-  lwe_out[write_state_idx] = state;
 }

 // Continue decomposiion of an array of Torus elements in place. Supposes
@@ -78,16 +84,12 @@ decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
    return;

  auto val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
-  auto state_idx = num_lwe * lwe_dimension + val_idx;

-  Torus state = buffer_in[state_idx];
-  synchronize_threads_in_block();
+  Torus state = buffer_in[val_idx];

  Torus mod_b_mask = (1ll << base_log) - 1ll;

  buffer_in[val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
-  synchronize_threads_in_block();
-  buffer_in[state_idx] = state;
 }

 // Multiply matrices A, B of size (M, K), (K, N) respectively
@@ -97,10 +99,6 @@ decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
 // BLOCK_SIZE_GEMM) splitting them in multiple tiles: (BLOCK_SIZE_GEMM,
 // THREADS_GEMM)-shaped tiles of values from A, and a (THREADS_GEMM,
 // BLOCK_SIZE_GEMM)-shaped tiles of values from B.
-//
-// This code is adapted by generalizing the 1d block-tiling
-// kernel from https://github.com/siboehm/SGEMM_CUDA
-// to any matrix dimension
 template <typename Torus, typename TorusVec>
 __global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
                      int stride_B, Torus *C) {
@@ -113,6 +111,7 @@ __global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
  const uint cRow = blockIdx.y;
  const uint cCol = blockIdx.x;

+  const uint totalResultsBlocktile = BM * BN;
  const int threadCol = threadIdx.x % BN;
  const int threadRow = threadIdx.x / BN;

@@ -153,7 +152,7 @@ __global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
    } else {
      Bs[innerRowB * BN + innerColB] = 0;
    }
-    synchronize_threads_in_block();
+    __syncthreads();

    // Advance blocktile for the next iteration of this loop
    A += BK;
@@ -169,7 +168,7 @@ __global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
            As[(threadRow * TM + resIdx) * BK + dotIdx] * tmp;
      }
    }
-    synchronize_threads_in_block();
+    __syncthreads();
  }

  // Initialize the pointer to the output block of size (BLOCK_SIZE_GEMM,
@@ -260,7 +259,11 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(

  // Optimization of packing keyswitch when packing many LWEs

-  cuda_set_device(gpu_index);
+  if (level_count > 1) {
+    PANIC("Fast path PKS only supports level_count==1");
+  }
+
+  cudaSetDevice(gpu_index);
  check_cuda_error(cudaGetLastError());

  int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
@@ -270,11 +273,10 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
  // buffer and the keyswitched GLWEs in the second half of the buffer. Thus the
  // scratch buffer for the fast path must determine the half-size of the
  // scratch buffer as the max between the size of the GLWE and the size of the
-  // LWE-mask times two (to keep both decomposition state and decomposed
-  // intermediate value)
-  int memory_unit = glwe_accumulator_size > lwe_dimension * 2
+  // LWE-mask
+  int memory_unit = glwe_accumulator_size > lwe_dimension
                        ? glwe_accumulator_size
-                        : lwe_dimension * 2;
+                        : lwe_dimension;

  // ping pong the buffer between successive calls
  // split the buffer in two parts of this size
@@ -307,7 +309,7 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
                 CEIL_DIV(num_lwes, BLOCK_SIZE_GEMM));
  dim3 threads_gemm(BLOCK_SIZE_GEMM * THREADS_GEMM);

-  auto stride_KSK_buffer = glwe_accumulator_size * level_count;
+  auto stride_KSK_buffer = glwe_accumulator_size;

  uint32_t shared_mem_size = get_shared_mem_size_tgemm<Torus>();
  tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
@@ -315,20 +317,21 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
      stride_KSK_buffer, d_mem_1);
  check_cuda_error(cudaGetLastError());

-  auto ksk_block_size = glwe_accumulator_size;
+  /*
+    TODO: transpose key to generalize to level_count > 1

-  for (int li = 1; li < level_count; ++li) {
-    decompose_vectorize_step_inplace<Torus, TorusVec>
-        <<<grid_decomp, threads_decomp, 0, stream>>>(
-            d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
-    check_cuda_error(cudaGetLastError());
+    for (int li = 1; li < level_count; ++li) {
+      decompose_vectorize_step_inplace<Torus, TorusVec>
+          <<<grid_decomp, threads_decomp, 0, stream>>>(
+              d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
+      check_cuda_error(cudaGetLastError());

-    tgemm<Torus, TorusVec>
-        <<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
-            num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
-            fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
-    check_cuda_error(cudaGetLastError());
-  }
+      tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size,
+    stream>>>( num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
+          fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
+      check_cuda_error(cudaGetLastError());
+    }
+  */

  // should we include the mask in the rotation ??
  dim3 grid_rotate(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
@@ -57,7 +57,7 @@ void batch_fft_ggsw_vector(cudaStream_t *streams, uint32_t *gpu_indexes,
  if (gpu_count != 1)
    PANIC("GPU error (batch_fft_ggsw_vector): multi-GPU execution is not "
          "supported yet.")
-  cuda_set_device(gpu_indexes[0]);
+  cudaSetDevice(gpu_indexes[0]);

  int shared_memory_size = sizeof(double) * polynomial_size;

--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
@@ -73,13 +73,24 @@ void cuda_packing_keyswitch_lwe_list_to_glwe_64(
    uint32_t output_polynomial_size, uint32_t base_log, uint32_t level_count,
    uint32_t num_lwes) {

-  host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
-      static_cast<cudaStream_t>(stream), gpu_index,
-      static_cast<uint64_t *>(glwe_array_out),
-      static_cast<const uint64_t *>(lwe_array_in),
-      static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
-      input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
-      base_log, level_count, num_lwes);
+  if (can_use_pks_fast_path(input_lwe_dimension, num_lwes,
+                            output_polynomial_size, level_count,
+                            output_glwe_dimension)) {
+    host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
+        static_cast<cudaStream_t>(stream), gpu_index,
+        static_cast<uint64_t *>(glwe_array_out),
+        static_cast<const uint64_t *>(lwe_array_in),
+        static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
+        input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
+        base_log, level_count, num_lwes);
+  } else
+    host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
+        static_cast<cudaStream_t>(stream), gpu_index,
+        static_cast<uint64_t *>(glwe_array_out),
+        static_cast<const uint64_t *>(lwe_array_in),
+        static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
+        input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
+        base_log, level_count, num_lwes);
 }

 void cleanup_packing_keyswitch_lwe_list_to_glwe(void *stream,
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
@@ -45,19 +45,19 @@ keyswitch(Torus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
          const Torus *__restrict__ lwe_input_indexes,
          const Torus *__restrict__ ksk, uint32_t lwe_dimension_in,
          uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count) {
-  const int tid = threadIdx.x + blockIdx.y * blockDim.x;
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
  const int shmem_index = threadIdx.x + threadIdx.y * blockDim.x;

  extern __shared__ int8_t sharedmem[];
  Torus *lwe_acc_out = (Torus *)sharedmem;
  auto block_lwe_array_out = get_chunk(
-      lwe_array_out, lwe_output_indexes[blockIdx.x], lwe_dimension_out + 1);
+      lwe_array_out, lwe_output_indexes[blockIdx.y], lwe_dimension_out + 1);

  if (tid <= lwe_dimension_out) {

    Torus local_lwe_out = 0;
    auto block_lwe_array_in = get_chunk(
-        lwe_array_in, lwe_input_indexes[blockIdx.x], lwe_dimension_in + 1);
+        lwe_array_in, lwe_input_indexes[blockIdx.y], lwe_dimension_in + 1);

    if (tid == lwe_dimension_out && threadIdx.y == 0) {
      local_lwe_out = block_lwe_array_in[lwe_dimension_in];
@@ -105,22 +105,16 @@ __host__ void host_keyswitch_lwe_ciphertext_vector(
    uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
    uint32_t num_samples) {

-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);

  constexpr int num_threads_y = 32;
-  int num_blocks_per_sample, num_threads_x;
+  int num_blocks, num_threads_x;

  getNumBlocksAndThreads2D(lwe_dimension_out + 1, 512, num_threads_y,
-                           num_blocks_per_sample, num_threads_x);
+                           num_blocks, num_threads_x);

  int shared_mem = sizeof(Torus) * num_threads_y * num_threads_x;
-  if (num_blocks_per_sample > 65536)
-    PANIC("Cuda error (Keyswith): number of blocks per sample is too large");
-
-  // In multiplication of large integers (512, 1024, 2048), the number of
-  // samples can be larger than 65536, so we need to set it in the first
-  // dimension of the grid
-  dim3 grid(num_samples, num_blocks_per_sample, 1);
+  dim3 grid(num_blocks, num_samples, 1);
  dim3 threads(num_threads_x, num_threads_y, 1);

  keyswitch<Torus><<<grid, threads, shared_mem, stream>>>(
@@ -166,15 +160,13 @@ __host__ void scratch_packing_keyswitch_lwe_list_to_glwe(
    cudaStream_t stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
    uint32_t num_lwes, bool allocate_gpu_memory) {
-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);

  int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;

-  // allocate at least LWE-mask times two: to keep both decomposition state and
-  // decomposed intermediate value
-  int memory_unit = glwe_accumulator_size > lwe_dimension * 2
+  int memory_unit = glwe_accumulator_size > lwe_dimension
                        ? glwe_accumulator_size
-                        : lwe_dimension * 2;
+                        : lwe_dimension;

  if (allocate_gpu_memory) {
    *fp_ks_buffer = (int8_t *)cuda_malloc_async(
@@ -229,6 +221,44 @@ __device__ void packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext(
  }
 }

+// public functional packing keyswitch for a batch of LWE ciphertexts
+//
+// Selects the input each thread is working on using the y-block index.
+//
+// Assumes there are (glwe_dimension+1) * polynomial_size threads split through
+// different thread blocks at the x-axis to work on that input.
+template <typename Torus>
+__global__ void packing_keyswitch_lwe_list_to_glwe(
+    Torus *glwe_array_out, Torus const *lwe_array_in, Torus const *fp_ksk,
+    uint32_t lwe_dimension_in, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
+    Torus *d_mem) {
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
+
+  const int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
+  const int lwe_size = (lwe_dimension_in + 1);
+
+  const int input_id = blockIdx.y;
+  const int degree = input_id;
+
+  // Select an input
+  auto lwe_in = lwe_array_in + input_id * lwe_size;
+  auto ks_glwe_out = d_mem + input_id * glwe_accumulator_size;
+  auto glwe_out = glwe_array_out + input_id * glwe_accumulator_size;
+
+  // KS LWE to GLWE
+  packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext<Torus>(
+      ks_glwe_out, lwe_in, fp_ksk, lwe_dimension_in, glwe_dimension,
+      polynomial_size, base_log, level_count);
+
+  // P * x ^degree
+  auto in_poly = ks_glwe_out + (tid / polynomial_size) * polynomial_size;
+  auto out_result = glwe_out + (tid / polynomial_size) * polynomial_size;
+  polynomial_accumulate_monic_monomial_mul<Torus>(out_result, in_poly, degree,
+                                                  tid % polynomial_size,
+                                                  polynomial_size, 1, true);
+}
+
 /// To-do: Rewrite this kernel for efficiency
 template <typename Torus>
 __global__ void accumulate_glwes(Torus *glwe_out, Torus *glwe_array_in,
@@ -246,4 +276,52 @@ __global__ void accumulate_glwes(Torus *glwe_out, Torus *glwe_array_in,
  }
 }

+template <typename Torus>
+__host__ void host_packing_keyswitch_lwe_list_to_glwe(
+    cudaStream_t stream, uint32_t gpu_index, Torus *glwe_out,
+    Torus const *lwe_array_in, Torus const *fp_ksk_array, int8_t *fp_ks_buffer,
+    uint32_t lwe_dimension_in, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
+    uint32_t num_lwes) {
+
+  if (num_lwes > polynomial_size)
+    PANIC("Cuda error: too many LWEs to pack. The number of LWEs should be "
+          "smaller than "
+          "polynomial_size.")
+
+  cudaSetDevice(gpu_index);
+  int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
+
+  int num_blocks = 0, num_threads = 0;
+  getNumBlocksAndThreads(glwe_accumulator_size, 128, num_blocks, num_threads);
+
+  dim3 grid(num_blocks, num_lwes);
+  dim3 threads(num_threads);
+
+  // The fast path of PKS uses the scratch buffer (d_mem) differently:
+  // it needs to store the decomposed masks in the first half of this buffer
+  // and the keyswitched GLWEs in the second half of the buffer. Thus the
+  // scratch buffer for the fast path must determine the half-size of the
+  // scratch buffer as the max between the size of the GLWE and the size of the
+  // LWE-mask
+  int memory_unit = glwe_accumulator_size > lwe_dimension_in
+                        ? glwe_accumulator_size
+                        : lwe_dimension_in;
+
+  auto d_mem = (Torus *)fp_ks_buffer;
+  auto d_tmp_glwe_array_out = d_mem + num_lwes * memory_unit;
+
+  // individually keyswitch each lwe
+  packing_keyswitch_lwe_list_to_glwe<Torus><<<grid, threads, 0, stream>>>(
+      d_tmp_glwe_array_out, lwe_array_in, fp_ksk_array, lwe_dimension_in,
+      glwe_dimension, polynomial_size, base_log, level_count, d_mem);
+  check_cuda_error(cudaGetLastError());
+
+  // accumulate to a single glwe
+  accumulate_glwes<Torus><<<num_blocks, threads, 0, stream>>>(
+      glwe_out, d_tmp_glwe_array_out, glwe_dimension, polynomial_size,
+      num_lwes);
+  check_cuda_error(cudaGetLastError());
+}
+
 #endif
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
@@ -110,7 +110,7 @@ template <typename Torus>
 __host__ void host_modulus_switch_inplace(cudaStream_t stream,
                                          uint32_t gpu_index, Torus *array,
                                          int size, uint32_t log_modulus) {
-  cuda_set_device(gpu_index);
+  cudaSetDevice(gpu_index);

  int num_threads = 0, num_blocks = 0;
  getNumBlocksAndThreads(size, 1024, num_blocks, num_threads);
--- a/backends/tfhe-cuda-backend/cuda/src/device.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/device.cu
@@ -2,12 +2,8 @@
 #include <cstdint>
 #include <cuda_runtime.h>

-void cuda_set_device(uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
-}
-
 cudaEvent_t cuda_create_event(uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  cudaEvent_t event;
  check_cuda_error(cudaEventCreate(&event));
  return event;
@@ -15,24 +11,24 @@ cudaEvent_t cuda_create_event(uint32_t gpu_index) {

 void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
                       uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaEventRecord(event, stream));
 }

 void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
                            uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaStreamWaitEvent(stream, event, 0));
 }

 void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaEventDestroy(event));
 }

 /// Unsafe function to create a CUDA stream, must check first that GPU exists
 cudaStream_t cuda_create_stream(uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  cudaStream_t stream;
  check_cuda_error(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
  return stream;
@@ -40,22 +36,15 @@ cudaStream_t cuda_create_stream(uint32_t gpu_index) {

 /// Unsafe function to destroy CUDA stream, must check first the GPU exists
 void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaStreamDestroy(stream));
 }

 void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaStreamSynchronize(stream));
 }

-void synchronize_streams(cudaStream_t const *streams,
-                         uint32_t const *gpu_indexes, uint32_t gpu_count) {
-  for (uint i = 0; i < gpu_count; i++) {
-    cuda_synchronize_stream(streams[i], gpu_indexes[i]);
-  }
-}
-
 // Determine if a CUDA device is available at runtime
 uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }

@@ -63,7 +52,7 @@ uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
 /// or if there's not enough memory. A safe wrapper around it must call
 /// cuda_check_valid_malloc() first
 void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  void *ptr;
  check_cuda_error(cudaMalloc((void **)&ptr, size));

@@ -74,7 +63,7 @@ void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
 /// asynchronously.
 void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
                        uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  void *ptr;

 #ifndef CUDART_VERSION
@@ -97,7 +86,7 @@ void *cuda_malloc_async(uint64_t size, cudaStream_t stream,

 /// Check that allocation is valid
 void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  size_t total_mem, free_mem;
  check_cuda_error(cudaMemGetInfo(&free_mem, &total_mem));
  if (size > free_mem) {
@@ -135,7 +124,7 @@ bool cuda_check_support_thread_block_clusters() {
 }

 /// Copy memory to the GPU asynchronously
-void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
+void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
                              cudaStream_t stream, uint32_t gpu_index) {
  if (size == 0)
    return;
@@ -145,7 +134,7 @@ void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
    PANIC("Cuda error: invalid device pointer in async copy to GPU.")
  }

-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(
      cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream));
 }
@@ -165,7 +154,7 @@ void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
  if (attr_src.type != cudaMemoryTypeDevice) {
    PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
  }
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  if (attr_src.device == attr_dest.device) {
    check_cuda_error(
        cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice, stream));
@@ -190,7 +179,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,
  if (attr_src.type != cudaMemoryTypeDevice) {
    PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
  }
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  if (attr_src.device == attr_dest.device) {
    check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToDevice));
  } else {
@@ -201,7 +190,7 @@ void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,

 /// Synchronizes device
 void cuda_synchronize_device(uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaDeviceSynchronize());
 }

@@ -214,7 +203,7 @@ void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
  if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
    PANIC("Cuda error: invalid dest device pointer in cuda memset.")
  }
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaMemsetAsync(dest, val, size, stream));
 }

@@ -234,7 +223,7 @@ void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
    if (attr.type != cudaMemoryTypeDevice) {
      PANIC("Cuda error: invalid dest device pointer in cuda set value.")
    }
-    cuda_set_device(gpu_index);
+    check_cuda_error(cudaSetDevice(gpu_index));
    int block_size = 256;
    int num_blocks = (n + block_size - 1) / block_size;

@@ -264,7 +253,7 @@ void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
    PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
  }

-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(
      cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream));
 }
@@ -278,14 +267,14 @@ int cuda_get_number_of_gpus() {

 /// Drop a cuda array
 void cuda_drop(void *ptr, uint32_t gpu_index) {
-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
  check_cuda_error(cudaFree(ptr));
 }

 /// Drop a cuda array asynchronously, if supported on the device
 void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index) {

-  cuda_set_device(gpu_index);
+  check_cuda_error(cudaSetDevice(gpu_index));
 #ifndef CUDART_VERSION
 #error CUDART_VERSION Undefined!
 #elif (CUDART_VERSION >= 11020)
--- a/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu
--- a/backends/tfhe-cuda-backend/cuda/src/fft128/f128.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/fft128/f128.cuh
@@ -1,410 +0,0 @@
-
-#ifndef CUDA_FFT128_F128_CUH
-#define CUDA_FFT128_F128_CUH
-
-#include <cstdint>
-
-struct alignas(16) f128 {
-  double hi;
-  double lo;
-
-  // Default and parameterized constructors
-  __host__ __device__ f128() : hi(0.0), lo(0.0) {}
-  __host__ __device__ f128(double high, double low) : hi(high), lo(low) {}
-
-  // Quick two-sum
-  __host__ __device__ __forceinline__ static f128 quick_two_sum(double a,
-                                                                double b) {
-#ifdef __CUDA_ARCH__
-    double s = __dadd_rn(a, b);
-    return f128(s, __dsub_rn(b, __dsub_rn(s, a)));
-#else
-    double s = a + b;
-    return f128(s, b - (s - a));
-#endif;
-  }
-
-  // Two-sum
-  __host__ __device__ __forceinline__ static f128 two_sum(double a, double b) {
-#ifdef __CUDA_ARCH__
-    double s = __dadd_rn(a, b);
-    double bb = __dsub_rn(s, a);
-    return f128(s, __dadd_rn(__dsub_rn(a, __dsub_rn(s, bb)), __dsub_rn(b, bb)));
-#else
-    double s = a + b;
-    double bb = s - a;
-    return f128(s, (a - (s - bb)) + (b - bb));
-#endif
-  }
-
-  // Two-product
-  __host__ __device__ __forceinline__ static f128 two_prod(double a, double b) {
-
-#ifdef __CUDA_ARCH__
-    double p = __dmul_rn(a, b);
-    double p2 = __fma_rn(a, b, -p);
-#else
-    double p = a * b;
-    double p2 = fma(a, b, -p);
-#endif
-    return f128(p, p2);
-  }
-
-  __host__ __device__ __forceinline__ static f128 two_diff(double a, double b) {
-#ifdef __CUDA_ARCH__
-    double s = __dsub_rn(a, b);
-    double bb = __dsub_rn(s, a);
-    return f128(s, __dsub_rn(__dsub_rn(a, __dsub_rn(s, bb)), __dadd_rn(b, bb)));
-#else
-    double s = a - b;
-    double bb = s - a;
-    return f128(s, (a - (s - bb)) - (b + bb));
-#endif
-  }
-
-  // Addition
-  __host__ __device__ static f128 add(const f128 &a, const f128 &b) {
-    auto s = two_sum(a.hi, b.hi);
-    auto t = two_sum(a.lo, b.lo);
-
-    double hi = s.hi;
-    double lo = s.lo + t.hi;
-    hi = hi + lo;
-    lo = lo - (hi - s.hi);
-
-    return f128(hi, lo + t.lo);
-  }
-
-  // Addition with estimate
-  __host__ __device__ static f128 add_estimate(const f128 &a, const f128 &b) {
-    auto se = two_sum(a.hi, b.hi);
-#ifdef __CUDA_ARCH__
-    se.lo = __dadd_rn(se.lo, __dadd_rn(a.lo, b.lo));
-#else
-    se.lo += (a.lo + b.lo);
-#endif
-
-    return quick_two_sum(se.hi, se.lo);
-  }
-
-  // Subtraction with estimate
-  __host__ __device__ static f128 sub_estimate(const f128 &a, const f128 &b) {
-    f128 se = two_diff(a.hi, b.hi);
-#ifdef __CUDA_ARCH__
-    se.lo = __dadd_rn(se.lo, a.lo);
-    se.lo = __dsub_rn(se.lo, b.lo);
-#else
-    se.lo += a.lo;
-    se.lo -= b.lo;
-#endif
-    return quick_two_sum(se.hi, se.lo);
-  }
-
-  // Subtraction
-  __host__ __device__ static f128 sub(const f128 &a, const f128 &b) {
-    auto s = two_diff(a.hi, b.hi);
-    auto t = two_diff(a.lo, b.lo);
-    s = quick_two_sum(s.hi, s.lo + t.hi);
-    return quick_two_sum(s.hi, s.lo + t.lo);
-  }
-
-  // Multiplication
-  __host__ __device__ static f128 mul(const f128 &a, const f128 &b) {
-    auto p = two_prod(a.hi, b.hi);
-#ifdef __CUDA_ARCH__
-    double a_0_x_b_1 = __dmul_rn(a.hi, b.lo);
-    double a_1_x_b_0 = __dmul_rn(a.lo, b.hi);
-    p.lo = __dadd_rn(p.lo, __dadd_rn(a_0_x_b_1, a_1_x_b_0));
-#else
-    p.lo += (a.hi * b.lo + a.lo * b.hi);
-#endif
-    p = quick_two_sum(p.hi, p.lo);
-    return p;
-  }
-
-  __host__ __device__ static f128 add_f64_f64(const double a, const double b) {
-    return two_sum(a, b);
-  }
-
-  __host__ __device__ static f128 f128_floor(const f128 &x) {
-    double x0_floor = floor(x.hi);
-    if (x0_floor == x.hi) {
-      return add_f64_f64(x0_floor, floor(x.lo));
-    }
-
-    return f128(x0_floor, 0.0);
-  }
-
-  __host__ __device__ static void
-  cplx_f128_mul_assign(f128 &c_re, f128 &c_im, const f128 &a_re,
-                       const f128 &a_im, const f128 &b_re, const f128 &b_im) {
-    auto a_re_x_b_re = mul(a_re, b_re);
-    auto a_re_x_b_im = mul(a_re, b_im);
-    auto a_im_x_b_re = mul(a_im, b_re);
-    auto a_im_x_b_im = mul(a_im, b_im);
-
-    c_re = sub_estimate(a_re_x_b_re, a_im_x_b_im);
-    c_im = add_estimate(a_im_x_b_re, a_re_x_b_im);
-  }
-
-  __host__ __device__ static void
-  cplx_f128_sub_assign(f128 &c_re, f128 &c_im, const f128 &a_re,
-                       const f128 &a_im, const f128 &b_re, const f128 &b_im) {
-    c_re = sub_estimate(a_re, b_re);
-    c_im = sub_estimate(a_im, b_im);
-  }
-  __host__ __device__ static void
-  cplx_f128_add_assign(f128 &c_re, f128 &c_im, const f128 &a_re,
-                       const f128 &a_im, const f128 &b_re, const f128 &b_im) {
-    c_re = add_estimate(a_re, b_re);
-    c_im = add_estimate(a_im, b_im);
-  }
-};
-
-struct f128x2 {
-  f128 re;
-  f128 im;
-
-  __host__ __device__ f128x2() : re(), im() {}
-
-  __host__ __device__ f128x2(const f128 &real, const f128 &imag)
-      : re(real), im(imag) {}
-
-  __host__ __device__ f128x2(double real, double imag)
-      : re(real, 0.0), im(imag, 0.0) {}
-
-  __host__ __device__ explicit f128x2(double real)
-      : re(real, 0.0), im(0.0, 0.0) {}
-
-  __host__ __device__ f128x2(const f128x2 &other)
-      : re(other.re), im(other.im) {}
-
-  __host__ __device__ f128x2(f128x2 &&other) noexcept
-      : re(std::move(other.re)), im(std::move(other.im)) {}
-
-  __host__ __device__ f128x2 &operator=(const f128x2 &other) {
-    if (this != &other) {
-      re = other.re;
-      im = other.im;
-    }
-    return *this;
-  }
-
-  __host__ __device__ f128x2 &operator=(f128x2 &&other) noexcept {
-    if (this != &other) {
-      re = std::move(other.re);
-      im = std::move(other.im);
-    }
-    return *this;
-  }
-
-  __host__ __device__ f128x2 conjugate() const {
-    return f128x2(re, f128(-im.hi, -im.lo));
-  }
-
-  __host__ __device__ f128 norm_squared() const {
-    return f128::add(f128::mul(re, re), f128::mul(im, im));
-  }
-
-  __host__ __device__ void zero() {
-    re = f128(0.0, 0.0);
-    im = f128(0.0, 0.0);
-  }
-
-  // Addition
-  __host__ __device__ friend f128x2 operator+(const f128x2 &a,
-                                              const f128x2 &b) {
-    return f128x2(f128::add(a.re, b.re), f128::add(a.im, b.im));
-  }
-
-  // Subtraction
-  __host__ __device__ friend f128x2 operator-(const f128x2 &a,
-                                              const f128x2 &b) {
-    return f128x2(f128::add(a.re, f128(-b.re.hi, -b.re.lo)),
-                  f128::add(a.im, f128(-b.im.hi, -b.im.lo)));
-  }
-
-  // Multiplication (complex multiplication)
-  __host__ __device__ friend f128x2 operator*(const f128x2 &a,
-                                              const f128x2 &b) {
-    f128 real_part =
-        f128::add(f128::mul(a.re, b.re),
-                  f128(-f128::mul(a.im, b.im).hi, -f128::mul(a.im, b.im).lo));
-    f128 imag_part = f128::add(f128::mul(a.re, b.im), f128::mul(a.im, b.re));
-    return f128x2(real_part, imag_part);
-  }
-
-  // Addition-assignment operator
-  __host__ __device__ f128x2 &operator+=(const f128x2 &other) {
-    re = f128::add(re, other.re);
-    im = f128::add(im, other.im);
-    return *this;
-  }
-
-  // Subtraction-assignment operator
-  __host__ __device__ f128x2 &operator-=(const f128x2 &other) {
-    re = f128::add(re, f128(-other.re.hi, -other.re.lo));
-    im = f128::add(im, f128(-other.im.hi, -other.im.lo));
-    return *this;
-  }
-
-  // Multiplication-assignment operator
-  __host__ __device__ f128x2 &operator*=(const f128x2 &other) {
-    f128 new_re =
-        f128::add(f128::mul(re, other.re), f128(-f128::mul(im, other.im).hi,
-                                                -f128::mul(im, other.im).lo));
-    f128 new_im = f128::add(f128::mul(re, other.im), f128::mul(im, other.re));
-    re = new_re;
-    im = new_im;
-    return *this;
-  }
-};
-
-__host__ __device__ inline uint64_t double_to_bits(double d) {
-  uint64_t bits = *reinterpret_cast<uint64_t *>(&d);
-  return bits;
-}
-
-__host__ __device__ inline double bits_to_double(uint64_t bits) {
-  double d = *reinterpret_cast<double *>(&bits);
-  return d;
-}
-
-__host__ __device__ double u128_to_f64(__uint128_t x) {
-  const __uint128_t ONE = 1;
-  const double A = ONE << 52;
-  const double B = ONE << 104;
-  const double C = ONE << 76;
-  const double D = 340282366920938500000000000000000000000.;
-
-  const __uint128_t threshold = (ONE << 104);
-
-  if (x < threshold) {
-    uint64_t A_bits = double_to_bits(A);
-
-    __uint128_t shifted = (x << 12);
-    uint64_t lower64 = static_cast<uint64_t>(shifted);
-    lower64 >>= 12;
-
-    uint64_t bits_l = A_bits | lower64;
-    double l_temp = bits_to_double(bits_l);
-    double l = l_temp - A;
-
-    uint64_t B_bits = double_to_bits(B);
-    uint64_t top64 = static_cast<uint64_t>(x >> 52);
-    uint64_t bits_h = B_bits | top64;
-    double h_temp = bits_to_double(bits_h);
-    double h = h_temp - B;
-
-    return (l + h);
-
-  } else {
-    uint64_t C_bits = double_to_bits(C);
-
-    __uint128_t shifted = (x >> 12);
-    uint64_t lower64 = static_cast<uint64_t>(shifted);
-    lower64 >>= 12;
-
-    uint64_t x_lo = static_cast<uint64_t>(x);
-    uint64_t mask_part = (x_lo & 0xFFFFFFULL);
-
-    uint64_t bits_l = C_bits | lower64 | mask_part;
-    double l_temp = bits_to_double(bits_l);
-    double l = l_temp - C;
-
-    uint64_t D_bits = double_to_bits(D);
-    uint64_t top64 = static_cast<uint64_t>(x >> 76);
-    uint64_t bits_h = D_bits | top64;
-    double h_temp = bits_to_double(bits_h);
-    double h = h_temp - D;
-
-    return (l + h);
-  }
-}
-
-__host__ __device__ __uint128_t f64_to_u128(const double f) {
-  const __uint128_t ONE = 1;
-  const uint64_t f_bits = double_to_bits(f);
-  if (f_bits < 1023ull << 52) {
-    return 0;
-  } else {
-    const __uint128_t m = ONE << 127 | (__uint128_t)f_bits << 75;
-    const uint64_t s = 1150 - (f_bits >> 52);
-    if (s >= 128) {
-      return 0;
-    } else {
-      return m >> s;
-    }
-  }
-}
-
-__host__ __device__ __uint128_t f64_to_i128(const double f) {
-  // Get raw bits of the double
-  const uint64_t f_bits = double_to_bits(f);
-
-  // Remove sign bit (equivalent to Rust's !0 >> 1 mask)
-  const uint64_t a = f_bits & 0x7FFFFFFFFFFFFFFFull;
-
-  // Check if value is in [0, 1) range
-  if (a < (1023ull << 52)) {
-    return 0;
-  }
-
-  // Reconstruct mantissa with implicit leading 1
-  const __uint128_t m =
-      (__uint128_t{1} << 127) | (static_cast<__uint128_t>(a) << 75);
-
-  // Calculate shift amount based on exponent
-  const uint64_t exponent = a >> 52;
-  const uint64_t s = 1150 - exponent;
-
-  // Perform unsigned right shift
-  const __uint128_t u = m >> s;
-
-  // Apply sign (check original sign bit)
-  const __int128_t result = static_cast<__int128_t>(u);
-  return (f_bits >> 63) ? -result : result;
-}
-
-__host__ __device__ double i128_to_f64(__int128_t const x) {
-  uint64_t sign = static_cast<uint64_t>(x >> 64) & (1ULL << 63);
-  __uint128_t abs =
-      (x < 0) ? static_cast<__uint128_t>(-x) : static_cast<__uint128_t>(x);
-
-  return bits_to_double(double_to_bits(u128_to_f64(abs)) | sign);
-}
-__host__ __device__ f128 u128_to_signed_to_f128(__uint128_t x) {
-  const double first_approx = i128_to_f64(x);
-  const uint64_t sign_bit = double_to_bits(first_approx) & (1ull << 63);
-  const __uint128_t first_approx_roundtrip =
-      f64_to_u128((first_approx < 0) ? -first_approx : first_approx);
-  const __uint128_t first_approx_roundtrip_signed =
-      (sign_bit == (1ull << 63)) ? -first_approx_roundtrip
-                                 : first_approx_roundtrip;
-
-  double correction = i128_to_f64(x - first_approx_roundtrip_signed);
-
-  return f128(first_approx, correction);
-}
-
-__host__ __device__ __uint128_t u128_from_torus_f128(const f128 &a) {
-  auto x = f128::sub_estimate(a, f128::f128_floor(a));
-  const double normalization = 340282366920938500000000000000000000000.;
-#ifdef __CUDA_ARCH__
-  x.hi = __dmul_rn(x.hi, normalization);
-  x.lo = __dmul_rn(x.lo, normalization);
-#else
-  x.hi *= normalization;
-  x.lo *= normalization;
-#endif
-
-  // TODO has to be round
-  x = f128::f128_floor(x);
-
-  __uint128_t x0 = f64_to_u128(x.hi);
-  __int128_t x1 = f64_to_i128(x.lo);
-
-  return x0 + x1;
-}
-
-#endif
--- a/backends/tfhe-cuda-backend/cuda/src/fft128/fft128.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/fft128/fft128.cu
@@ -1,163 +0,0 @@
-#include "fft128.cuh"
-
-void cuda_fourier_transform_forward_as_integer_f128_async(
-    void *stream, uint32_t gpu_index, void *re0, void *re1, void *im0,
-    void *im1, void const *standard, const uint32_t N,
-    const uint32_t number_of_samples) {
-  switch (N) {
-  case 64:
-    host_fourier_transform_forward_as_integer_f128<AmortizedDegree<64>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 128:
-    host_fourier_transform_forward_as_integer_f128<AmortizedDegree<128>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 256:
-    host_fourier_transform_forward_as_integer_f128<AmortizedDegree<256>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 512:
-    host_fourier_transform_forward_as_integer_f128<AmortizedDegree<512>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 1024:
-    host_fourier_transform_forward_as_integer_f128<AmortizedDegree<1024>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 2048:
-    host_fourier_transform_forward_as_integer_f128<AmortizedDegree<2048>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 4096:
-    host_fourier_transform_forward_as_integer_f128<AmortizedDegree<4096>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  default:
-    PANIC("Cuda error (f128 fft): unsupported polynomial size. Supported "
-          "N's are powers of two"
-          " in the interval [64..4096].")
-  }
-}
-
-void cuda_fourier_transform_forward_as_torus_f128_async(
-    void *stream, uint32_t gpu_index, void *re0, void *re1, void *im0,
-    void *im1, void const *standard, const uint32_t N,
-    const uint32_t number_of_samples) {
-  switch (N) {
-  case 64:
-    host_fourier_transform_forward_as_torus_f128<AmortizedDegree<64>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 128:
-    host_fourier_transform_forward_as_torus_f128<AmortizedDegree<128>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 256:
-    host_fourier_transform_forward_as_torus_f128<AmortizedDegree<256>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 512:
-    host_fourier_transform_forward_as_torus_f128<AmortizedDegree<512>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 1024:
-    host_fourier_transform_forward_as_torus_f128<AmortizedDegree<1024>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 2048:
-    host_fourier_transform_forward_as_torus_f128<AmortizedDegree<2048>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  case 4096:
-    host_fourier_transform_forward_as_torus_f128<AmortizedDegree<4096>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
-        (double *)re1, (double *)im0, (double *)im1,
-        (__uint128_t const *)standard, N, number_of_samples);
-    break;
-  default:
-    PANIC("Cuda error (f128 fft): unsupported polynomial size. Supported "
-          "N's are powers of two"
-          " in the interval [64..4096].")
-  }
-}
-
-void cuda_fourier_transform_backward_as_torus_f128_async(
-    void *stream, uint32_t gpu_index, void *standard, void const *re0,
-    void const *re1, void const *im0, void const *im1, const uint32_t N,
-    const uint32_t number_of_samples) {
-  switch (N) {
-  case 64:
-    host_fourier_transform_backward_as_torus_f128<AmortizedDegree<64>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
-        (double const *)re0, (double const *)re1, (double const *)im0,
-        (double const *)im1, N, number_of_samples);
-    break;
-  case 128:
-    host_fourier_transform_backward_as_torus_f128<AmortizedDegree<128>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
-        (double const *)re0, (double const *)re1, (double const *)im0,
-        (double const *)im1, N, number_of_samples);
-    break;
-  case 256:
-    host_fourier_transform_backward_as_torus_f128<AmortizedDegree<256>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
-        (double const *)re0, (double const *)re1, (double const *)im0,
-        (double const *)im1, N, number_of_samples);
-    break;
-  case 512:
-    host_fourier_transform_backward_as_torus_f128<AmortizedDegree<512>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
-        (double const *)re0, (double const *)re1, (double const *)im0,
-        (double const *)im1, N, number_of_samples);
-    break;
-  case 1024:
-    host_fourier_transform_backward_as_torus_f128<AmortizedDegree<1024>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
-        (double const *)re0, (double const *)re1, (double const *)im0,
-        (double const *)im1, N, number_of_samples);
-    break;
-  case 2048:
-    host_fourier_transform_backward_as_torus_f128<AmortizedDegree<2048>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
-        (double const *)re0, (double const *)re1, (double const *)im0,
-        (double const *)im1, N, number_of_samples);
-    break;
-  case 4096:
-    host_fourier_transform_backward_as_torus_f128<AmortizedDegree<4096>>(
-        static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
-        (double const *)re0, (double const *)re1, (double const *)im0,
-        (double const *)im1, N, number_of_samples);
-    break;
-  default:
-    PANIC("Cuda error (f128 ifft): unsupported polynomial size. Supported "
-          "N's are powers of two"
-          " in the interval [64..4096].")
-  }
-}
--- a/backends/tfhe-cuda-backend/cuda/src/fft128/fft128.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/fft128/fft128.cuh
@@ -1,579 +0,0 @@
-#ifndef CUDA_FFT128_CUH
-#define CUDA_FFT128_CUH
-
-#include "f128.cuh"
-#include "fft/fft128.h"
-#include "polynomial/functions.cuh"
-#include "polynomial/parameters.cuh"
-#include "twiddles.cuh"
-#include "types/complex/operations.cuh"
-#include <iostream>
-
-using Index = unsigned;
-
-#define NEG_TWID(i)                                                            \
-  f128x2(f128(neg_twiddles_re_hi[(i)], neg_twiddles_re_lo[(i)]),               \
-         f128(neg_twiddles_im_hi[(i)], neg_twiddles_im_lo[(i)]))
-
-#define F64x4_TO_F128x2(f128x2_reg, ind)                                       \
-  f128x2_reg.re.hi = dt_re_hi[ind];                                            \
-  f128x2_reg.re.lo = dt_re_lo[ind];                                            \
-  f128x2_reg.im.hi = dt_im_hi[ind];                                            \
-  f128x2_reg.im.lo = dt_im_lo[ind]
-
-#define F128x2_TO_F64x4(f128x2_reg, ind)                                       \
-  dt_re_hi[ind] = f128x2_reg.re.hi;                                            \
-  dt_re_lo[ind] = f128x2_reg.re.lo;                                            \
-  dt_im_hi[ind] = f128x2_reg.im.hi;                                            \
-  dt_im_lo[ind] = f128x2_reg.im.lo
-
-template <class params>
-__device__ void negacyclic_forward_fft_f128(double *dt_re_hi, double *dt_re_lo,
-                                            double *dt_im_hi,
-                                            double *dt_im_lo) {
-
-  __syncthreads();
-  constexpr Index BUTTERFLY_DEPTH = params::opt >> 1;
-  constexpr Index LOG2_DEGREE = params::log2_degree;
-  constexpr Index HALF_DEGREE = params::degree >> 1;
-  constexpr Index STRIDE = params::degree / params::opt;
-
-  f128x2 u[BUTTERFLY_DEPTH], v[BUTTERFLY_DEPTH], w;
-
-  Index tid = threadIdx.x;
-
-  // load into registers
-#pragma unroll
-  for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
-    F64x4_TO_F128x2(u[i], tid);
-    F64x4_TO_F128x2(v[i], tid + HALF_DEGREE);
-    tid += STRIDE;
-  }
-
-  // level 1
-  // we don't make actual complex multiplication on level1 since we have only
-  // one twiddle, it's real and image parts are equal, so we can multiply
-  // it with simpler operations
-#pragma unroll
-  for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
-    auto ww = NEG_TWID(1);
-    f128::cplx_f128_mul_assign(w.re, w.im, v[i].re, v[i].im, NEG_TWID(1).re,
-                               NEG_TWID(1).im);
-    f128::cplx_f128_sub_assign(v[i].re, v[i].im, u[i].re, u[i].im, w.re, w.im);
-    f128::cplx_f128_add_assign(u[i].re, u[i].im, u[i].re, u[i].im, w.re, w.im);
-  }
-
-  Index twiddle_shift = 1;
-  for (Index l = LOG2_DEGREE - 1; l >= 1; --l) {
-    Index lane_mask = 1 << (l - 1);
-    Index thread_mask = (1 << l) - 1;
-    twiddle_shift <<= 1;
-
-    tid = threadIdx.x;
-    __syncthreads();
-#pragma unroll
-    for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
-      Index rank = tid & thread_mask;
-      bool u_stays_in_register = rank < lane_mask;
-      F128x2_TO_F64x4(((u_stays_in_register) ? v[i] : u[i]), tid);
-      tid = tid + STRIDE;
-    }
-    __syncthreads();
-
-    tid = threadIdx.x;
-#pragma unroll
-    for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
-      Index rank = tid & thread_mask;
-      bool u_stays_in_register = rank < lane_mask;
-      F64x4_TO_F128x2(w, tid ^ lane_mask);
-      u[i] = (u_stays_in_register) ? u[i] : w;
-      v[i] = (u_stays_in_register) ? w : v[i];
-      w = NEG_TWID(tid / lane_mask + twiddle_shift);
-      f128::cplx_f128_mul_assign(w.re, w.im, v[i].re, v[i].im, w.re, w.im);
-      f128::cplx_f128_sub_assign(v[i].re, v[i].im, u[i].re, u[i].im, w.re,
-                                 w.im);
-      f128::cplx_f128_add_assign(u[i].re, u[i].im, u[i].re, u[i].im, w.re,
-                                 w.im);
-      tid = tid + STRIDE;
-    }
-  }
-  __syncthreads();
-
-  //   store registers in SM
-  tid = threadIdx.x;
-#pragma unroll
-  for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
-    F128x2_TO_F64x4(u[i], tid * 2);
-    F128x2_TO_F64x4(v[i], (tid * 2 + 1));
-    tid = tid + STRIDE;
-  }
-  __syncthreads();
-}
-
-template <class params>
-__device__ void negacyclic_backward_fft_f128(double *dt_re_hi, double *dt_re_lo,
-                                             double *dt_im_hi,
-                                             double *dt_im_lo) {
-  __syncthreads();
-  constexpr Index BUTTERFLY_DEPTH = params::opt >> 1;
-  constexpr Index LOG2_DEGREE = params::log2_degree;
-  constexpr Index DEGREE = params::degree;
-  constexpr Index HALF_DEGREE = params::degree >> 1;
-  constexpr Index STRIDE = params::degree / params::opt;
-
-  size_t tid = threadIdx.x;
-  f128x2 u[BUTTERFLY_DEPTH], v[BUTTERFLY_DEPTH], w;
-
-  // load into registers and divide by compressed polynomial size
-#pragma unroll
-  for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
-    F64x4_TO_F128x2(u[i], 2 * tid);
-    F64x4_TO_F128x2(v[i], 2 * tid + 1);
-    tid += STRIDE;
-  }
-
-  Index twiddle_shift = DEGREE;
-  for (Index l = 1; l <= LOG2_DEGREE - 1; ++l) {
-    Index lane_mask = 1 << (l - 1);
-    Index thread_mask = (1 << l) - 1;
-    tid = threadIdx.x;
-    twiddle_shift >>= 1;
-
-    // at this point registers are ready for the  butterfly
-    tid = threadIdx.x;
-    __syncthreads();
-#pragma unroll
-    for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
-      w = (u[i] - v[i]);
-      u[i] += v[i];
-      v[i] = w * NEG_TWID(tid / lane_mask + twiddle_shift).conjugate();
-
-      // keep one of the register for next iteration and store another one in sm
-      Index rank = tid & thread_mask;
-      bool u_stays_in_register = rank < lane_mask;
-      F128x2_TO_F64x4(((u_stays_in_register) ? v[i] : u[i]), tid);
-
-      tid = tid + STRIDE;
-    }
-    __syncthreads();
-
-    // prepare registers for next butterfly iteration
-    tid = threadIdx.x;
-#pragma unroll
-    for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
-      Index rank = tid & thread_mask;
-      bool u_stays_in_register = rank < lane_mask;
-      F64x4_TO_F128x2(w, tid ^ lane_mask);
-
-      u[i] = (u_stays_in_register) ? u[i] : w;
-      v[i] = (u_stays_in_register) ? w : v[i];
-
-      tid = tid + STRIDE;
-    }
-  }
-
-  // last iteration
-  for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
-    w = (u[i] - v[i]);
-    u[i] = u[i] + v[i];
-    v[i] = w * NEG_TWID(1).conjugate();
-  }
-  __syncthreads();
-  // store registers in SM
-  tid = threadIdx.x;
-#pragma unroll
-  for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
-    F128x2_TO_F64x4(u[i], tid);
-    F128x2_TO_F64x4(v[i], tid + HALF_DEGREE);
-
-    tid = tid + STRIDE;
-  }
-  __syncthreads();
-}
-
-// params is expected to be full degree not half degree
-template <class params>
-__device__ void convert_u128_to_f128_as_integer(
-    double *out_re_hi, double *out_re_lo, double *out_im_hi, double *out_im_lo,
-    const __uint128_t *in_re, const __uint128_t *in_im) {
-
-  Index tid = threadIdx.x;
-  // #pragma unroll
-  for (Index i = 0; i < params::opt / 2; i++) {
-    auto out_re = u128_to_signed_to_f128(in_re[tid]);
-    auto out_im = u128_to_signed_to_f128(in_im[tid]);
-
-    out_re_hi[tid] = out_re.hi;
-    out_re_lo[tid] = out_re.lo;
-    out_im_hi[tid] = out_im.hi;
-    out_im_lo[tid] = out_im.lo;
-
-    tid += params::degree / params::opt;
-  }
-}
-
-// params is expected to be full degree not half degree
-template <class params>
-__device__ void convert_u128_to_f128_as_torus(
-    double *out_re_hi, double *out_re_lo, double *out_im_hi, double *out_im_lo,
-    const __uint128_t *in_re, const __uint128_t *in_im) {
-
-  const double normalization = pow(2., -128.);
-  Index tid = threadIdx.x;
-  // #pragma unroll
-  for (Index i = 0; i < params::opt / 2; i++) {
-    auto out_re = u128_to_signed_to_f128(in_re[tid]);
-    auto out_im = u128_to_signed_to_f128(in_im[tid]);
-
-    out_re_hi[tid] = out_re.hi * normalization;
-    out_re_lo[tid] = out_re.lo * normalization;
-    out_im_hi[tid] = out_im.hi * normalization;
-    out_im_lo[tid] = out_im.lo * normalization;
-
-    tid += params::degree / params::opt;
-  }
-}
-
-template <class params>
-__device__ void
-convert_f128_to_u128_as_torus(__uint128_t *out_re, __uint128_t *out_im,
-                              const double *in_re_hi, const double *in_re_lo,
-                              const double *in_im_hi, const double *in_im_lo) {
-
-  const double normalization = 1. / (params::degree / 2);
-  Index tid = threadIdx.x;
-  // #pragma unroll
-  for (Index i = 0; i < params::opt / 2; i++) {
-
-    f128 in_re(in_re_hi[tid] * normalization, in_re_lo[tid] * normalization);
-    f128 in_im(in_im_hi[tid] * normalization, in_im_lo[tid] * normalization);
-
-    out_re[tid] = u128_from_torus_f128(in_re);
-    out_im[tid] = u128_from_torus_f128(in_im);
-
-    tid += params::degree / params::opt;
-  }
-}
-
-// params is expected to be full degree not half degree
-template <class params>
-__global__ void
-batch_convert_u128_to_f128_as_integer(double *out_re_hi, double *out_re_lo,
-                                      double *out_im_hi, double *out_im_lo,
-                                      const __uint128_t *in) {
-
-  convert_u128_to_f128_as_integer<params>(
-      &out_re_hi[blockIdx.x * params::degree / 2],
-      &out_re_lo[blockIdx.x * params::degree / 2],
-      &out_im_hi[blockIdx.x * params::degree / 2],
-      &out_im_lo[blockIdx.x * params::degree / 2],
-      &in[blockIdx.x * params::degree],
-      &in[blockIdx.x * params::degree + params::degree / 2]);
-}
-
-// params is expected to be full degree not half degree
-template <class params>
-__global__ void
-batch_convert_u128_to_f128_as_torus(double *out_re_hi, double *out_re_lo,
-                                    double *out_im_hi, double *out_im_lo,
-                                    const __uint128_t *in) {
-
-  convert_u128_to_f128_as_torus<params>(
-      &out_re_hi[blockIdx.x * params::degree / 2],
-      &out_re_lo[blockIdx.x * params::degree / 2],
-      &out_im_hi[blockIdx.x * params::degree / 2],
-      &out_im_lo[blockIdx.x * params::degree / 2],
-      &in[blockIdx.x * params::degree],
-      &in[blockIdx.x * params::degree + params::degree / 2]);
-}
-
-// params is expected to be full degree not half degree
-template <class params>
-__global__ void batch_convert_f128_to_u128_as_torus(__uint128_t *out,
-                                                    const double *in_re_hi,
-                                                    const double *in_re_lo,
-                                                    const double *in_im_hi,
-                                                    const double *in_im_lo) {
-
-  convert_f128_to_u128_as_torus<params>(
-      &out[blockIdx.x * params::degree],
-      &out[blockIdx.x * params::degree + params::degree / 2],
-      &in_re_hi[blockIdx.x * params::degree / 2],
-      &in_re_lo[blockIdx.x * params::degree / 2],
-      &in_im_hi[blockIdx.x * params::degree / 2],
-      &in_im_lo[blockIdx.x * params::degree / 2]);
-}
-
-template <class params, sharedMemDegree SMD>
-__global__ void
-batch_NSMFFT_128(double *in_re_hi, double *in_re_lo, double *in_im_hi,
-                 double *in_im_lo, double *out_re_hi, double *out_re_lo,
-                 double *out_im_hi, double *out_im_lo, double *buffer) {
-  extern __shared__ double sharedMemoryFFT[];
-  double *re_hi, *re_lo, *im_hi, *im_lo;
-
-  if (SMD == NOSM) {
-    re_hi =
-        &buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 0];
-    re_lo =
-        &buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 1];
-    im_hi =
-        &buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 2];
-    im_lo =
-        &buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 3];
-  } else {
-    re_hi = &sharedMemoryFFT[params::degree / 2 * 0];
-    re_lo = &sharedMemoryFFT[params::degree / 2 * 1];
-    im_hi = &sharedMemoryFFT[params::degree / 2 * 2];
-    im_lo = &sharedMemoryFFT[params::degree / 2 * 3];
-  }
-
-  Index tid = threadIdx.x;
-#pragma unroll
-  for (Index i = 0; i < params::opt / 2; ++i) {
-    re_hi[tid] = in_re_hi[blockIdx.x * (params::degree / 2) + tid];
-    re_lo[tid] = in_re_lo[blockIdx.x * (params::degree / 2) + tid];
-    im_hi[tid] = in_im_hi[blockIdx.x * (params::degree / 2) + tid];
-    im_lo[tid] = in_im_lo[blockIdx.x * (params::degree / 2) + tid];
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-  if constexpr (params::fft_direction == 1) {
-    negacyclic_backward_fft_f128<HalfDegree<params>>(re_hi, re_lo, im_hi,
-                                                     im_lo);
-  } else {
-    negacyclic_forward_fft_f128<HalfDegree<params>>(re_hi, re_lo, im_hi, im_lo);
-  }
-  __syncthreads();
-  tid = threadIdx.x;
-#pragma unroll
-  for (Index i = 0; i < params::opt / 2; ++i) {
-    out_re_hi[blockIdx.x * (params::degree / 2) + tid] = re_hi[tid];
-    out_re_lo[blockIdx.x * (params::degree / 2) + tid] = re_lo[tid];
-    out_im_hi[blockIdx.x * (params::degree / 2) + tid] = im_hi[tid];
-    out_im_lo[blockIdx.x * (params::degree / 2) + tid] = im_lo[tid];
-    tid += params::degree / params::opt;
-  }
-}
-
-template <class params>
-__host__ void host_fourier_transform_forward_as_integer_f128(
-    cudaStream_t stream, uint32_t gpu_index, double *re0, double *re1,
-    double *im0, double *im1, const __uint128_t *standard, const uint32_t N,
-    const uint32_t number_of_samples) {
-
-  // allocate device buffers
-  double *d_re0 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  double *d_re1 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  double *d_im0 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  double *d_im1 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  __uint128_t *d_standard = (__uint128_t *)cuda_malloc_async(
-      N * sizeof(__uint128_t), stream, gpu_index);
-
-  // copy input into device
-  cuda_memcpy_async_to_gpu(d_standard, standard, N * sizeof(__uint128_t),
-                           stream, gpu_index);
-
-  // setup launch parameters
-  size_t required_shared_memory_size = sizeof(double) * N / 2 * 4;
-  int grid_size = number_of_samples;
-  int block_size = params::degree / params::opt;
-  bool full_sm =
-      (required_shared_memory_size <= cuda_get_max_shared_memory(gpu_index));
-  size_t buffer_size = full_sm ? 0 : (size_t)number_of_samples * N / 2 * 4;
-  size_t shared_memory_size = full_sm ? required_shared_memory_size : 0;
-  double *buffer = (double *)cuda_malloc_async(buffer_size, stream, gpu_index);
-
-  // configure shared memory for batch fft kernel
-  if (full_sm) {
-    check_cuda_error(cudaFuncSetAttribute(
-        batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>,
-        cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
-    check_cuda_error(cudaFuncSetCacheConfig(
-        batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>,
-        cudaFuncCachePreferShared));
-  }
-
-  // convert u128 into 4 x double
-  batch_convert_u128_to_f128_as_integer<params>
-      <<<grid_size, block_size, 0, stream>>>(d_re0, d_re1, d_im0, d_im1,
-                                             d_standard);
-
-  // call negacyclic 128 bit forward fft.
-  if (full_sm) {
-    batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>
-        <<<grid_size, block_size, shared_memory_size, stream>>>(
-            d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
-  } else {
-    batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, NOSM>
-        <<<grid_size, block_size, shared_memory_size, stream>>>(
-            d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
-  }
-
-  cuda_memcpy_async_to_cpu(re0, d_re0, N / 2 * sizeof(double), stream,
-                           gpu_index);
-  cuda_memcpy_async_to_cpu(re1, d_re1, N / 2 * sizeof(double), stream,
-                           gpu_index);
-  cuda_memcpy_async_to_cpu(im0, d_im0, N / 2 * sizeof(double), stream,
-                           gpu_index);
-  cuda_memcpy_async_to_cpu(im1, d_im1, N / 2 * sizeof(double), stream,
-                           gpu_index);
-
-  cuda_drop_async(d_standard, stream, gpu_index);
-  cuda_drop_async(d_re0, stream, gpu_index);
-  cuda_drop_async(d_re1, stream, gpu_index);
-  cuda_drop_async(d_im0, stream, gpu_index);
-  cuda_drop_async(d_im1, stream, gpu_index);
-}
-
-template <class params>
-__host__ void host_fourier_transform_forward_as_torus_f128(
-    cudaStream_t stream, uint32_t gpu_index, double *re0, double *re1,
-    double *im0, double *im1, const __uint128_t *standard, const uint32_t N,
-    const uint32_t number_of_samples) {
-
-  // allocate device buffers
-  double *d_re0 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  double *d_re1 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  double *d_im0 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  double *d_im1 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  __uint128_t *d_standard = (__uint128_t *)cuda_malloc_async(
-      N * sizeof(__uint128_t), stream, gpu_index);
-
-  // copy input into device
-  cuda_memcpy_async_to_gpu(d_standard, standard, N * sizeof(__uint128_t),
-                           stream, gpu_index);
-
-  // setup launch parameters
-  size_t required_shared_memory_size = sizeof(double) * N / 2 * 4;
-  int grid_size = number_of_samples;
-  int block_size = params::degree / params::opt;
-  bool full_sm =
-      (required_shared_memory_size <= cuda_get_max_shared_memory(gpu_index));
-  size_t buffer_size = full_sm ? 0 : (size_t)number_of_samples * N / 2 * 4;
-  size_t shared_memory_size = full_sm ? required_shared_memory_size : 0;
-  double *buffer = (double *)cuda_malloc_async(buffer_size, stream, gpu_index);
-
-  // configure shared memory for batch fft kernel
-  if (full_sm) {
-    check_cuda_error(cudaFuncSetAttribute(
-        batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>,
-        cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
-    check_cuda_error(cudaFuncSetCacheConfig(
-        batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>,
-        cudaFuncCachePreferShared));
-  }
-
-  // convert u128 into 4 x double
-  batch_convert_u128_to_f128_as_torus<params>
-      <<<grid_size, block_size, 0, stream>>>(d_re0, d_re1, d_im0, d_im1,
-                                             d_standard);
-
-  // call negacyclic 128 bit forward fft.
-  if (full_sm) {
-    batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>
-        <<<grid_size, block_size, shared_memory_size, stream>>>(
-            d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
-  } else {
-    batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, NOSM>
-        <<<grid_size, block_size, shared_memory_size, stream>>>(
-            d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
-  }
-
-  cuda_memcpy_async_to_cpu(re0, d_re0, N / 2 * sizeof(double), stream,
-                           gpu_index);
-  cuda_memcpy_async_to_cpu(re1, d_re1, N / 2 * sizeof(double), stream,
-                           gpu_index);
-  cuda_memcpy_async_to_cpu(im0, d_im0, N / 2 * sizeof(double), stream,
-                           gpu_index);
-  cuda_memcpy_async_to_cpu(im1, d_im1, N / 2 * sizeof(double), stream,
-                           gpu_index);
-
-  cuda_drop_async(d_standard, stream, gpu_index);
-  cuda_drop_async(d_re0, stream, gpu_index);
-  cuda_drop_async(d_re1, stream, gpu_index);
-  cuda_drop_async(d_im0, stream, gpu_index);
-  cuda_drop_async(d_im1, stream, gpu_index);
-}
-
-template <class params>
-__host__ void host_fourier_transform_backward_as_torus_f128(
-    cudaStream_t stream, uint32_t gpu_index, __uint128_t *standard,
-    double const *re0, double const *re1, double const *im0, double const *im1,
-    const uint32_t N, const uint32_t number_of_samples) {
-
-  // allocate device buffers
-  double *d_re0 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  double *d_re1 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  double *d_im0 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  double *d_im1 =
-      (double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
-  __uint128_t *d_standard = (__uint128_t *)cuda_malloc_async(
-      N * sizeof(__uint128_t), stream, gpu_index);
-
-  //  // copy input into device
-  cuda_memcpy_async_to_gpu(d_re0, re0, N / 2 * sizeof(double), stream,
-                           gpu_index);
-  cuda_memcpy_async_to_gpu(d_re1, re1, N / 2 * sizeof(double), stream,
-                           gpu_index);
-  cuda_memcpy_async_to_gpu(d_im0, im0, N / 2 * sizeof(double), stream,
-                           gpu_index);
-  cuda_memcpy_async_to_gpu(d_im1, im1, N / 2 * sizeof(double), stream,
-                           gpu_index);
-
-  // setup launch parameters
-  size_t required_shared_memory_size = sizeof(double) * N / 2 * 4;
-  int grid_size = number_of_samples;
-  int block_size = params::degree / params::opt;
-  bool full_sm =
-      (required_shared_memory_size <= cuda_get_max_shared_memory(gpu_index));
-  size_t buffer_size = full_sm ? 0 : (size_t)number_of_samples * N / 2 * 4;
-  size_t shared_memory_size = full_sm ? required_shared_memory_size : 0;
-  double *buffer = (double *)cuda_malloc_async(buffer_size, stream, gpu_index);
-
-  // configure shared memory for batch fft kernel
-  if (full_sm) {
-    check_cuda_error(cudaFuncSetAttribute(
-        batch_NSMFFT_128<FFTDegree<params, BackwardFFT>, FULLSM>,
-        cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
-    check_cuda_error(cudaFuncSetCacheConfig(
-        batch_NSMFFT_128<FFTDegree<params, BackwardFFT>, FULLSM>,
-        cudaFuncCachePreferShared));
-    batch_NSMFFT_128<FFTDegree<params, BackwardFFT>, FULLSM>
-        <<<grid_size, block_size, shared_memory_size, stream>>>(
-            d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
-  } else {
-    batch_NSMFFT_128<FFTDegree<params, BackwardFFT>, NOSM>
-        <<<grid_size, block_size, shared_memory_size, stream>>>(
-            d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
-  }
-
-  batch_convert_f128_to_u128_as_torus<params>
-      <<<grid_size, block_size, 0, stream>>>(d_standard, d_re0, d_re1, d_im0,
-                                             d_im1);
-
-  cuda_memcpy_async_to_cpu(standard, d_standard, N * sizeof(__uint128_t),
-                           stream, gpu_index);
-  cuda_drop_async(d_standard, stream, gpu_index);
-  cuda_drop_async(d_re0, stream, gpu_index);
-  cuda_drop_async(d_re1, stream, gpu_index);
-  cuda_drop_async(d_im0, stream, gpu_index);
-  cuda_drop_async(d_im1, stream, gpu_index);
-}
-
-#undef NEG_TWID
-#undef F64x4_TO_F128x2
-#undef F128x2_TO_F64x4
-
-#endif // TFHE_RS_BACKENDS_TFHE_CUDA_BACKEND_CUDA_SRC_FFT128_FFT128_CUH_
--- a/Show More
+++ b/Show More