(artifact_tches): Cleaning

Typos
(float) Tches Artifact 2025
2026-01-11 23:58:23 -05:00 · 2025-05-14 11:44:32 +02:00 · 2025-02-03 18:20:02 +01:00 · 2025-01-31 20:04:58 +01:00
944 changed files with 37634 additions and 143251 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,6 +1,6 @@
 ---
 name: Bug report
-about: Report a problem with TFHE-rs
+about: Report a problem with concrete
 title: ''
 labels: triage_required
 assignees: ''
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,6 +1,6 @@
 ---
 name: Feature request
-about: Suggest an idea for TFHE-rs
+about: Suggest an idea for concrete
 title: ''
 labels: feature_request
 assignees: ''
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@@ -1,9 +0,0 @@
-self-hosted-runner:
-  # Labels of self-hosted runner in array of strings.
-  labels:
-    - m1mac
-    - 4090-desktop
-# Configuration variables in array of strings defined in your repository or
-# organization. `null` means disabling configuration variables check.
-# Empty array means no configuration variable is allowed.
-config-variables: null
--- a/.github/workflows/approve_label.yml
+++ b/.github/workflows/approve_label.yml
@@ -1,34 +0,0 @@
-# Manage approved label in pull request
-name: PR approved label manager
-
-on:
-  pull_request:
-  pull_request_review:
-    types: [submitted]
-
-jobs:
-  trigger-tests:
-    runs-on: ubuntu-latest
-    permissions:
-      pull-requests: write
-    steps:
-      - name: Get current labels
-        uses: snnaplab/get-labels-action@f426df40304808ace3b5282d4f036515f7609576
-
-      # Remove label if a push is performed after an approval
-      - name: Remove approved label
-        if: ${{ github.event_name == 'pull_request' && contains(fromJSON(env.LABELS), 'approved') }}
-        uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
-        with:
-          # We use a PAT to have the same user (zama-bot) for label deletion as for creation.
-          github_token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-          labels: approved
-
-      # Add label only if the review is approved and if the label doesn't already exist
-      - name: Add approved label
-        uses: actions-ecosystem/action-add-labels@18f1af5e3544586314bbe15c0273249c770b2daf
-        if: ${{ github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && !contains(fromJSON(env.LABELS), 'approved') }}
-        with:
-          # We need to use a PAT to be able to trigger `labeled` event for the other workflow.
-          github_token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-          labels: approved
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -5,64 +5,71 @@ env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
-  pull_request:
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
+        type: string

 jobs:
-  setup-ec2:
-    name: Setup EC2 instance (fast-tests)
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: aws
-          profile: cpu-big
-
  fast-tests:
-    name: Fast CPU tests
-    needs: setup-ec2
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
      cancel-in-progress: true
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    runs-on: ${{ inputs.runner_name }}
    steps:
+      # Step used for log purpose.
+      - name: Instance configuration used
+        run: |
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
+
      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}

      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: stable
+          default: true

      - name: Run concrete-csprng tests
        run: |
          make test_concrete_csprng

-      - name: Run tfhe-zk-pok tests
-        run: |
-          make test_zk_pok
-
      - name: Run core tests
        run: |
          AVX512_SUPPORT=ON make test_core_crypto
@@ -110,31 +117,11 @@ jobs:
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-
-  teardown-ec2:
-    name: Teardown EC2 instance (fast-tests)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, fast-tests ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/aws_tfhe_gpu_4090_tests.yml
+++ b/.github/workflows/aws_tfhe_gpu_4090_tests.yml
@@ -1,75 +0,0 @@
-# Compile and test tfhe-cuda-backend on an RTX 4090 machine
-name: TFHE Cuda Backend - 4090 full tests
-
-env:
-  CARGO_TERM_COLOR: always
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUSTFLAGS: "-C target-cpu=native"
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
-on:
-  # Allows you to run this workflow manually from the Actions tab as an alternative.
-  workflow_dispatch:
-  pull_request:
-    types: [labeled]
-
-jobs:
-  cuda-tests-linux:
-    name: CUDA tests (RTX 4090)
-    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, '4090_test') }}
-    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
-      cancel-in-progress: true
-    runs-on: ["self-hosted", "4090-desktop"]
-
-    steps:
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-
-      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
-        with:
-          toolchain: stable
-
-      - name: Run fmt checks
-        run: |
-          make check_fmt_gpu
-
-      - name: Run clippy checks
-        run: |
-          make pcc_gpu
-
-      - name: Run core crypto, integer and internal CUDA backend tests
-        run: |
-          make test_gpu
-
-      - name: Run user docs tests
-        run: |
-          make test_user_doc_gpu
-
-      - name: Test C API
-        run: |
-          make test_c_api_gpu
-
-      - name: Run High Level API Tests
-        run: |
-          make test_high_level_api_gpu
-
-      - uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
-        if: ${{ always() && github.event_name == 'pull_request' }}
-        with:
-          labels: 4090_test
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Slack Notification
-        if: ${{ always() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_gpu_tests.yml
+++ b/.github/workflows/aws_tfhe_gpu_tests.yml
@@ -1,142 +0,0 @@
-# Compile and test tfhe-cuda-backend on an AWS instance
-name: TFHE Cuda Backend - Full tests
-
-env:
-  CARGO_TERM_COLOR: always
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUSTFLAGS: "-C target-cpu=native"
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
-on:
-  # Allows you to run this workflow manually from the Actions tab as an alternative.
-  workflow_dispatch:
-  pull_request:
-
-jobs:
-  setup-ec2:
-    name: Setup EC2 instance (cuda-tests)
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL_PRE_PROD }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: aws
-          profile: gpu-test
-
-  cuda-tests-linux:
-    name: CUDA tests
-    needs: setup-ec2
-    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
-      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
-    strategy:
-      fail-fast: false
-      # explicit include-based build matrix, of known valid options
-      matrix:
-        include:
-          - os: ubuntu-22.04
-            cuda: "12.2"
-            gcc: 9
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-
-    steps:
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-
-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
-
-      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
-        with:
-          toolchain: stable
-
-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Run fmt checks
-        run: |
-          make check_fmt_gpu
-
-      - name: Run clippy checks
-        run: |
-          make pcc_gpu
-
-      - name: Run core crypto, integer and internal CUDA backend tests
-        run: |
-          make test_gpu
-
-      - name: Run user docs tests
-        run: |
-          make test_user_doc_gpu
-
-      - name: Test C API
-        run: |
-          make test_c_api_gpu
-
-      - name: Run High Level API Tests
-        run: |
-          make test_high_level_api_gpu
-
-      - name: Slack Notification
-        if: ${{ always() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CUDA AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-
-  teardown-ec2:
-    name: Teardown EC2 instance (cuda-tests)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, cuda-tests-linux ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL_PRE_PROD }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -4,57 +4,66 @@ env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
-  pull_request:
-    types: [ labeled ]
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      fork_repo:
+        description: "Name of forked repo as user/repo"
+        type: string
+      fork_git_sha:
+        description: "Git SHA to checkout from fork"
+        type: string

 jobs:
-  setup-ec2:
-    name: Setup EC2 instance (unsigned-integer-tests)
-    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: aws
-          profile: cpu-big
-
-  unsigned-integer-tests:
-    name: Unsigned integer tests
-    needs: setup-ec2
+  integer-tests:
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
      cancel-in-progress: true
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    runs-on: ${{ inputs.runner_name }}
    steps:
+      # Step used for log purpose.
+      - name: Instance configuration used
+        run: |
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
+
      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}

      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: stable
+          default: true

      - name: Gen Keys if required
        run: |
@@ -75,31 +84,11 @@ jobs:
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-
-  teardown-ec2:
-    name: Teardown EC2 instance (unsigned-integer-tests)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, unsigned-integer-tests ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -4,57 +4,66 @@ env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
-  pull_request:
-    types: [ labeled ]
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      fork_repo:
+        description: "Name of forked repo as user/repo"
+        type: string
+      fork_git_sha:
+        description: "Git SHA to checkout from fork"
+        type: string

 jobs:
-  setup-ec2:
-    name: Setup EC2 instance (signed-integer-tests)
-    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: aws
-          profile: cpu-big
-
-  signed-integer-tests:
-    name: Signed integer tests
-    needs: setup-ec2
+  multi-bit-tests:
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
      cancel-in-progress: true
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    runs-on: ${{ inputs.runner_name }}
    steps:
+      # Step used for log purpose.
+      - name: Instance configuration used
+        run: |
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
+
      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}

      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: stable
+          default: true

      - name: Gen Keys if required
        run: |
@@ -79,31 +88,11 @@ jobs:
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-
-  teardown-ec2:
-    name: Teardown EC2 instance (signed-integer-tests)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, signed-integer-tests ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Shortint tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -4,66 +4,71 @@ env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
-  pull_request:
-    types: [ labeled ]
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
+        type: string

 jobs:
-  setup-ec2:
-    name: Setup EC2 instance (cpu-tests)
-    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: aws
-          profile: cpu-big
-
-  cpu-tests:
-    name: CPU tests
-    needs: setup-ec2
+  shortint-tests:
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
      cancel-in-progress: true
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    runs-on: ${{ inputs.runner_name }}
    steps:
+      # Step used for log purpose.
+      - name: Instance configuration used
+        run: |
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
+
      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}

      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: stable
+          default: true

      - name: Run concrete-csprng tests
        run: |
          make test_concrete_csprng

-      - name: Run tfhe-zk-pok tests
-        run: |
-          make test_zk_pok
-
      - name: Run core tests
        run: |
          AVX512_SUPPORT=ON make test_core_crypto
@@ -105,31 +110,11 @@ jobs:
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-
-  teardown-ec2:
-    name: Teardown EC2 instance (cpu-tests)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, cpu-tests ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Shortint tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -4,65 +4,66 @@ env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
-  pull_request:
-    types: [ labeled ]
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
+        type: string

 jobs:
-  setup-ec2:
-    name: Setup EC2 instance (wasm-tests)
-    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: aws
-          profile: cpu-small
-
  wasm-tests:
-    name: WASM tests
-    needs: setup-ec2
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
      cancel-in-progress: true
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    runs-on: ${{ inputs.runner_name }}
    steps:
+      # Step used for log purpose.
+      - name: Instance configuration used
+        run: |
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
+
      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}

      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: stable
-
-      - name: Install Node
-        run: |
-          make install_node
-
-      - name: Run fmt checks
-        run: |
-          make check_fmt_js
+          default: true

      - name: Run js on wasm API tests
        run: |
@@ -70,36 +71,17 @@ jobs:

      - name: Run parallel wasm tests
        run: |
+          make install_node
          make ci_test_web_js_api_parallel

      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
          SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-
-  teardown-ec2:
-    name: Teardown EC2 instance (wasm-tests)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, wasm-tests ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/boolean_benchmark.yml
+++ b/.github/workflows/boolean_benchmark.yml
@@ -32,8 +32,6 @@ env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 jobs:
  run-boolean-benchmarks:
@@ -53,7 +51,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -63,13 +61,14 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: nightly
+          override: true

      - name: Run benchmarks with AVX512
        run: |
-          make bench_boolean
+          make AVX512_SUPPORT=ON bench_boolean

      - name: Parse results
        run: |
@@ -97,17 +96,17 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_boolean
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -126,11 +125,11 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Boolean benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Boolean benchmarks failed. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/cargo_build.yml
+++ b/.github/workflows/cargo_build.yml
@@ -6,8 +6,6 @@ on:
 env:
  CARGO_TERM_COLOR: always
  RUSTFLAGS: "-C target-cpu=native"
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref }}
@@ -19,11 +17,11 @@ jobs:

    strategy:
      matrix:
-        os: [ubuntu-latest, macos-latest-large, windows-latest]
+        os: [ubuntu-latest, macos-latest, windows-latest]
      fail-fast: false

    steps:
-      - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11

      - name: Install and run newline linter checks
        if: matrix.os == 'ubuntu-latest'
@@ -68,9 +66,5 @@ jobs:
        run: |
          make build_c_api

-      - name: Build coverage tests
-        run: |
-          make build_tfhe_coverage
-
      # The wasm build check is a bit annoying to set-up here and is done during the tests in
      # aws_tfhe_tests.yml
--- a/.github/workflows/ci_lint.yml
+++ b/.github/workflows/ci_lint.yml
@@ -1,27 +0,0 @@
-# Lint and check CI
-name: CI Lint and Checks
-
-on:
-  pull_request:
-
-env:
-  ACTIONLINT_VERSION: 1.6.27
-
-jobs:
-  lint-check:
-    name: Lint and checks
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-
-      - name: Get actionlint
-        run: |
-          bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash) ${{ env.ACTIONLINT_VERSION }}
-          echo "f2ee6d561ce00fa93aab62a7791c1a0396ec7e8876b2a8f2057475816c550782  actionlint" > checksum
-          sha256sum -c checksum
-          ln -s "$(pwd)/actionlint" /usr/local/bin/
-
-      - name: Lint workflows
-        run: |
-          make lint_workflow
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -4,8 +4,6 @@ env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -40,7 +38,7 @@ jobs:
      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
      cancel-in-progress: true
    runs-on: ${{ inputs.runner_name }}
-    timeout-minutes: 11520 # 8 days
+    timeout-minutes: 1080
    steps:
      # Step used for log purpose.
      - name: Instance configuration used
@@ -53,7 +51,7 @@ jobs:
          echo "Fork git sha: ${{ inputs.fork_git_sha }}"

      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: ${{ inputs.fork_repo }}
          ref: ${{ inputs.fork_git_sha }}
@@ -63,13 +61,14 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: stable
+          default: true

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@2d756ea4c53f7f6b397767d8723b3a10a9f35bf2
+        uses: tj-actions/changed-files@1c938490c880156b746568a518594309cfb3f66b
        with:
          files_yaml: |
            tfhe:
@@ -99,7 +98,7 @@ jobs:
          make test_shortint_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@7afa10ed9b269c561c2336fd862446844e0cbf71
+        uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -107,24 +106,10 @@ jobs:
          fail_ci_if_error: true
          files: shortint/cobertura.xml,boolean/cobertura.xml,core_crypto/cobertura.xml,core_crypto_avx512/cobertura.xml

-      - name: Run integer coverage
-        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
-        run: |
-          make test_integer_cov
-
-      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@7afa10ed9b269c561c2336fd862446844e0cbf71
-        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          directory: ./coverage/
-          fail_ci_if_error: true
-          files: integer/cobertura.xml
-
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/core_crypto_gpu_benchmark.yml
+++ b/.github/workflows/core_crypto_gpu_benchmark.yml
@@ -1,182 +0,0 @@
-# Run core crypto benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
-name: Core crypto GPU benchmarks
-
-on:
-  # Allows you to run this workflow manually from the Actions tab as an alternative.
-  workflow_dispatch:
-
-env:
-  CARGO_TERM_COLOR: always
-  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-
-jobs:
-  setup-ec2:
-    name: Setup EC2 instance (cuda-benchmarks)
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL_PRE_PROD }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: hyperstack
-          profile: gpu-bench
-
-  core-crypto-benchmarks:
-    name: CUDA core crypto benchmarks
-    needs: setup-ec2
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
-    strategy:
-      fail-fast: false
-      # explicit include-based build matrix, of known valid options
-      matrix:
-        include:
-          - os: ubuntu-22.04
-            cuda: "12.2"
-            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.1
-
-    steps:
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-      - name: Get benchmark date
-        run: |
-          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
-
-      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          fetch-depth: 0
-
-      - name: Set up home
-        # "Install rust" step require root user to have a HOME directory which is not set.
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
-
-      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
-        with:
-          toolchain: nightly
-
-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Run benchmarks with AVX512
-        run: |
-          make bench_pbs_gpu
-          make bench_ks_gpu
-
-      - name: Parse results
-        run: |
-          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
-          COMMIT_HASH="$(git describe --tags --dirty)"
-          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
-          --database tfhe_rs \
-          --hardware ${{ inputs.instance_type }} \
-          --backend gpu \
-          --project-version "${COMMIT_HASH}" \
-          --branch ${{ github.ref_name }} \
-          --commit-date "${COMMIT_DATE}" \
-          --bench-date "${{ env.BENCH_DATE }}" \
-          --name-suffix avx512 \
-          --walk-subdirs \
-          --throughput
-
-      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
-        with:
-          name: ${{ github.sha }}_core_crypto
-          path: ${{ env.RESULTS_FILENAME }}
-
-      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          repository: zama-ai/slab
-          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Send data to Slab
-        shell: bash
-        run: |
-          echo "Computing HMac on downloaded artifact"
-          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
-          echo "Sending results to Slab..."
-          curl -v -k \
-          -H "Content-Type: application/json" \
-          -H "X-Slab-Repository: ${{ github.repository }}" \
-          -H "X-Slab-Command: store_data_v2" \
-          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
-          -d @${{ env.RESULTS_FILENAME }} \
-          ${{ secrets.SLAB_URL }}
-
-  # FIXME This action needs docker to be installed on the machine beforehand.
-#      - name: Slack Notification
-#        if: ${{ failure() }}
-#        continue-on-error: true
-#        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-#        env:
-#          SLACK_COLOR: ${{ job.status }}
-#          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-#          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-#          SLACK_MESSAGE: "PBS GPU benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-#          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-#          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
-  teardown-ec2:
-    name: Teardown EC2 instance (cuda-benchmarks)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, core-crypto-benchmarks ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL_PRE_PROD }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (cuda-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/csprng_randomness_testing.yml
+++ b/.github/workflows/csprng_randomness_testing.yml
@@ -0,0 +1,74 @@
+name: CSPRNG randomness testing Workflow
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
+        type: string
+
+jobs:
+  csprng-randomness-teting:
+    name: CSPRNG randomness testing
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
+      cancel-in-progress: true
+    runs-on: ${{ inputs.runner_name }}
+
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        with:
+          toolchain: stable
+          default: true
+
+      - name: Dieharder randomness test suite
+        run: |
+          make dieharder_csprng
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "concrete-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/csprng_randomness_tests.yml
+++ b/.github/workflows/csprng_randomness_tests.yml
@@ -1,94 +0,0 @@
-name: CSPRNG randomness testing Workflow
-
-env:
-  CARGO_TERM_COLOR: always
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUSTFLAGS: "-C target-cpu=native"
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
-on:
-  # Allows you to run this workflow manually from the Actions tab as an alternative.
-  workflow_dispatch:
-  pull_request:
-    types: [ labeled ]
-
-
-jobs:
-  setup-ec2:
-    name: Setup EC2 instance (csprng-randomness-tests)
-    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: aws
-          profile: cpu-small
-
-  csprng-randomness-tests:
-    name: CSPRNG randomness tests
-    needs: setup-ec2
-    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}
-      cancel-in-progress: true
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
-    steps:
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-
-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
-
-      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
-        with:
-          toolchain: stable
-
-      - name: Dieharder randomness test suite
-        run: |
-          make dieharder_csprng
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "concrete-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-
-  teardown-ec2:
-    name: Teardown EC2 instance (csprng-randomness-tests)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, csprng-randomness-tests ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/gpu_4090_full_benchmark.yml
+++ b/.github/workflows/gpu_4090_full_benchmark.yml
@@ -1,202 +0,0 @@
-# Run all benchmarks on an RTX 4090 machine and return parsed results to Slab CI bot.
-name: TFHE Cuda Backend - 4090 full benchmarks
-
-env:
-  CARGO_TERM_COLOR: always
-  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
-on:
-  # Allows you to run this workflow manually from the Actions tab as an alternative.
-  workflow_dispatch:
-  pull_request:
-    types: [labeled]
-  schedule:
-    # Weekly benchmarks will be triggered each Friday at 9p.m.
-    - cron: "0 21 * * 5"
-
-jobs:
-  cuda-integer-benchmarks:
-    name: Cuda integer benchmarks for all operations flavor  (RTX 4090)
-    if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || contains(github.event.label.name, '4090_bench') }}
-    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}_cuda_integer_bench
-      cancel-in-progress: true
-    runs-on: ["self-hosted", "4090-desktop"]
-    timeout-minutes: 1440 # 24 hours
-    strategy:
-      fail-fast: false
-      max-parallel: 1
-      matrix:
-        command: [integer, integer_multi_bit]
-        op_flavor: [default, unchecked]
-
-    steps:
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          fetch-depth: 0
-
-      - name: Get benchmark details
-        run: |
-          {
-            echo "BENCH_DATE=$(date --iso-8601=seconds)";
-            echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
-            echo "COMMIT_HASH=$(git describe --tags --dirty)";
-          } >> "${GITHUB_ENV}"
-
-      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
-        with:
-          toolchain: nightly
-
-      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          repository: zama-ai/slab
-          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Run integer benchmarks
-        run: |
-          make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
-
-      - name: Parse results
-        run: |
-          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
-          --database tfhe_rs \
-          --hardware "rtx4090" \
-          --backend gpu \
-          --project-version "${{ env.COMMIT_HASH }}" \
-          --branch ${{ github.ref_name }} \
-          --commit-date "${{ env.COMMIT_DATE }}" \
-          --bench-date "${{ env.BENCH_DATE }}" \
-          --walk-subdirs \
-          --throughput
-
-      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
-        with:
-          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
-          path: ${{ env.RESULTS_FILENAME }}
-
-      - name: Send data to Slab
-        shell: bash
-        run: |
-          echo "Computing HMac on results file"
-          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
-          echo "Sending results to Slab..."
-          curl -v -k \
-          -H "Content-Type: application/json" \
-          -H "X-Slab-Repository: ${{ github.repository }}" \
-          -H "X-Slab-Command: store_data_v2" \
-          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
-          -d @${{ env.RESULTS_FILENAME }} \
-          ${{ secrets.SLAB_URL }}
-
-      - name: Slack Notification
-        if: ${{ always() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-
-  cuda-core-crypto-benchmarks:
-    name: Cuda core crypto benchmarks  (RTX 4090)
-    if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || contains(github.event.label.name, '4090_bench') }}
-    needs: cuda-integer-benchmarks
-    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}_cuda_core_crypto_bench
-      cancel-in-progress: true
-    runs-on: ["self-hosted", "4090-desktop"]
-    timeout-minutes: 1440 # 24 hours
-
-    steps:
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          fetch-depth: 0
-
-      - name: Get benchmark details
-        run: |
-          {
-            echo "BENCH_DATE=$(date --iso-8601=seconds)";
-            echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
-            echo "COMMIT_HASH=$(git describe --tags --dirty)";
-          } >> "${GITHUB_ENV}"
-
-      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
-        with:
-          toolchain: nightly
-
-      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          repository: zama-ai/slab
-          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Run integer benchmarks
-        run: |
-          make bench_pbs_gpu
-          make bench_ks_gpu
-
-      - name: Parse results
-        run: |
-          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
-          --database tfhe_rs \
-          --hardware "rtx4090" \
-          --backend gpu \
-          --project-version "${{ env.COMMIT_HASH }}" \
-          --branch ${{ github.ref_name }} \
-          --commit-date "${{ env.COMMIT_DATE }}" \
-          --bench-date "${{ env.BENCH_DATE }}" \
-          --walk-subdirs \
-          --throughput
-
-      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
-        with:
-          name: ${{ github.sha }}_core_crypto
-          path: ${{ env.RESULTS_FILENAME }}
-
-      - name: Send data to Slab
-        shell: bash
-        run: |
-          echo "Computing HMac on results file"
-          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
-          echo "Sending results to Slab..."
-          curl -v -k \
-          -H "Content-Type: application/json" \
-          -H "X-Slab-Repository: ${{ github.repository }}" \
-          -H "X-Slab-Command: store_data_v2" \
-          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
-          -d @${{ env.RESULTS_FILENAME }} \
-          ${{ secrets.SLAB_URL }}
-
-      - name: Slack Notification
-        if: ${{ !success() && !cancelled() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-
-  remove_github_label:
-    name: Remove 4090 bench label
-    if: ${{ always() && github.event_name == 'pull_request' }}
-    needs: [cuda-integer-benchmarks, cuda-core-crypto-benchmarks]
-    runs-on: ["self-hosted", "4090-desktop"]
-    steps:
-      - uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
-        with:
-          labels: 4090_bench
-          github_token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/integer_benchmark.yml
+++ b/.github/workflows/integer_benchmark.yml
@@ -25,8 +25,6 @@ env:
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 jobs:
  run-integer-benchmarks:
@@ -46,7 +44,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -56,13 +54,14 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: nightly
+          override: true

      - name: Run benchmarks with AVX512
        run: |
-          make FAST_BENCH=TRUE bench_integer
+          make AVX512_SUPPORT=ON FAST_BENCH=TRUE bench_integer

      - name: Parse benchmarks to csv
        run: |
@@ -70,7 +69,7 @@ jobs:
            parse_integer_benches

      - name: Upload csv results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_csv_integer
          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
@@ -91,17 +90,17 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_integer
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -120,11 +119,11 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Integer benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_full_benchmark.yml
+++ b/.github/workflows/integer_full_benchmark.yml
@@ -28,8 +28,6 @@ env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 jobs:
  prepare-matrix:
@@ -41,17 +39,20 @@ jobs:
      - name: Weekly benchmarks
        if: ${{ github.event.inputs.user_inputs == 'weekly_benchmarks' }}
        run: |
-          echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}"
+          echo "OP_FLAVOR=[\"default\", \"default_comp\", \"default_scalar\", \"default_scalar_comp\"]" >> ${GITHUB_ENV}

      - name: Quarterly benchmarks
        if: ${{ github.event.inputs.user_inputs == 'quarterly_benchmarks' }}
        run: |
-          echo "OP_FLAVOR=[\"default\", \"smart\", \"unchecked\", \"misc\"]" >> "${GITHUB_ENV}"
+          echo "OP_FLAVOR=[\"default\", \"default_comp\", \"default_scalar\", \"default_scalar_comp\", \
+          \"smart\", \"smart_comp\", \"smart_scalar\", \"smart_parallelized\", \"smart_parallelized_comp\", \"smart_scalar_parallelized\", \"smart_scalar_parallelized_comp\", \
+          \"unchecked\", \"unchecked_comp\", \"unchecked_scalar\", \"unchecked_scalar_comp\", \
+          \"misc\"]" >> ${GITHUB_ENV}

      -  name: Set operation flavor output
         id: set_op_flavor
         run: |
-          echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
+          echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> ${GITHUB_OUTPUT}

  integer-benchmarks:
    name: Execute integer benchmarks for all operations flavor
@@ -59,7 +60,6 @@ jobs:
    runs-on: ${{ github.event.inputs.runner_name }}
    if: ${{ !cancelled() }}
    continue-on-error: true
-    timeout-minutes: 1440  # 24 hours
    strategy:
      max-parallel: 1
      matrix:
@@ -74,17 +74,15 @@ jobs:
          echo "Request ID: ${{ inputs.request_id }}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

      - name: Get benchmark details
        run: |
-          {
-            echo "BENCH_DATE=$(date --iso-8601=seconds)";
-            echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
-            echo "COMMIT_HASH=$(git describe --tags --dirty)";
-          } >> "${GITHUB_ENV}"
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"

      - name: Set up home
        # "Install rust" step require root user to have a HOME directory which is not set.
@@ -92,20 +90,21 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: nightly
+          override: true

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Run benchmarks with AVX512
        run: |
-          make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}
+          make AVX512_SUPPORT=ON BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}

      - name: Parse results
        run: |
@@ -121,7 +120,7 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -148,11 +147,11 @@ jobs:
    steps:
      - name: Notify
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Integer full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_gpu_benchmark.yml
+++ b/.github/workflows/integer_gpu_benchmark.yml
@@ -1,195 +0,0 @@
-# Run integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
-name: Integer GPU benchmarks
-
-on:
-  # Allows you to run this workflow manually from the Actions tab as an alternative.
-  workflow_dispatch:
-  pull_request:
-
-env:
-  CARGO_TERM_COLOR: always
-  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
-  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-
-jobs:
-  setup-ec2:
-    name: Setup EC2 instance (cuda-benchmarks)
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL_PRE_PROD }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: hyperstack
-          profile: gpu-bench
-
-  cuda-integer-benchmarks:
-    name: CUDA integer benchmarks
-    needs: setup-ec2
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
-    strategy:
-      fail-fast: false
-      # explicit include-based build matrix, of known valid options
-      matrix:
-        include:
-          - os: ubuntu-22.04
-            cuda: "12.2"
-            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.1
-
-    steps:
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-      - name: Get benchmark date
-        run: |
-          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
-
-      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          fetch-depth: 0
-
-      - name: Set up home
-        # "Install rust" step require root user to have a HOME directory which is not set.
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
-
-      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
-        with:
-          toolchain: nightly
-
-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-          } >> "${GITHUB_ENV}"
-
-      - name: Run benchmarks with AVX512
-        run: |
-          make FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_gpu
-
-      - name: Parse benchmarks to csv
-        run: |
-          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
-            parse_integer_benches
-
-      - name: Upload csv results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
-        with:
-          name: ${{ github.sha }}_csv_integer
-          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
-
-      - name: Parse results
-        run: |
-          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
-          COMMIT_HASH="$(git describe --tags --dirty)"
-          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
-          --database tfhe_rs \
-          --hardware "n2-H100x1" \
-          --backend gpu \
-          --project-version "${COMMIT_HASH}" \
-          --branch ${{ github.ref_name }} \
-          --commit-date "${COMMIT_DATE}" \
-          --bench-date "${{ env.BENCH_DATE }}" \
-          --walk-subdirs \
-          --name-suffix avx512 \
-          --throughput
-
-      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
-        with:
-          name: ${{ github.sha }}_integer
-          path: ${{ env.RESULTS_FILENAME }}
-
-      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          repository: zama-ai/slab
-          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Send data to Slab
-        shell: bash
-        run: |
-          echo "Computing HMac on results file"
-          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
-          echo "Sending results to Slab..."
-          curl -v -k \
-          -H "Content-Type: application/json" \
-          -H "X-Slab-Repository: ${{ github.repository }}" \
-          -H "X-Slab-Command: store_data_v2" \
-          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
-          -d @${{ env.RESULTS_FILENAME }} \
-          ${{ secrets.SLAB_URL }}
-
-# FIXME This action needs docker to be installed on the machine beforehand.
-#      - name: Slack Notification
-#        if: ${{ !success() && !cancelled() }}
-#        continue-on-error: true
-#        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-#        env:
-#          SLACK_COLOR: ${{ job.status }}
-#          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-#          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-#          SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-#          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-#          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
-  teardown-ec2:
-    name: Teardown EC2 instance (cuda-benchmarks)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, cuda-integer-benchmarks ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL_PRE_PROD }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (cuda-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/integer_gpu_full_benchmark.yml
+++ b/.github/workflows/integer_gpu_full_benchmark.yml
@@ -1,190 +0,0 @@
-# Run all integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
-name: Integer GPU full benchmarks
-
-on:
-  # Allows you to run this workflow manually from the Actions tab as an alternative.
-  workflow_dispatch:
-  pull_request:
-
-env:
-  CARGO_TERM_COLOR: always
-  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-
-jobs:
-  setup-ec2:
-    name: Setup EC2 instance (cuda-full-benchmarks)
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL_PRE_PROD }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: hyperstack
-          profile: gpu-bench
-
-  cuda-integer-full-benchmarks:
-    name: CUDA integer full benchmarks
-    needs: setup-ec2
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
-    timeout-minutes: 1440 # 24 hours
-    continue-on-error: true
-    strategy:
-      fail-fast: false
-      max-parallel: 1
-      matrix:
-        command: [integer, integer_multi_bit]
-        op_flavor: [default, unchecked]
-        # explicit include-based build matrix, of known valid options
-        include:
-          - os: ubuntu-22.04
-            cuda: "12.2"
-            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.1
-
-    steps:
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          fetch-depth: 0
-
-      - name: Get benchmark details
-        run: |
-          {
-            echo "BENCH_DATE=$(date --iso-8601=seconds)";
-            echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
-            echo "COMMIT_HASH=$(git describe --tags --dirty)";
-          } >> "${GITHUB_ENV}"
-
-      - name: Set up home
-        # "Install rust" step require root user to have a HOME directory which is not set.
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
-
-      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
-        with:
-          toolchain: nightly
-
-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-          } >> "${GITHUB_ENV}"
-
-      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          repository: zama-ai/slab
-          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Run benchmarks with AVX512
-        run: |
-          make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
-
-      - name: Parse results
-        run: |
-          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
-          --database tfhe_rs \
-          --hardware "n2-H100x1" \
-          --backend gpu \
-          --project-version "${{ env.COMMIT_HASH }}" \
-          --branch ${{ github.ref_name }} \
-          --commit-date "${{ env.COMMIT_DATE }}" \
-          --bench-date "${{ env.BENCH_DATE }}" \
-          --walk-subdirs \
-          --name-suffix avx512 \
-          --throughput
-
-      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
-        with:
-          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
-          path: ${{ env.RESULTS_FILENAME }}
-
-      - name: Send data to Slab
-        shell: bash
-        run: |
-          echo "Computing HMac on results file"
-          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
-          echo "Sending results to Slab..."
-          curl -v -k \
-          -H "Content-Type: application/json" \
-          -H "X-Slab-Repository: ${{ github.repository }}" \
-          -H "X-Slab-Command: store_data_v2" \
-          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
-          -d @${{ env.RESULTS_FILENAME }} \
-          ${{ secrets.SLAB_URL }}
-
-  # FIXME This action needs docker to be installed on the machine beforehand.
-  #      - name: Slack Notification
-  #        if: ${{ !success() && !cancelled() }}
-  #        continue-on-error: true
-  #        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-  #        env:
-  #          SLACK_COLOR: ${{ job.status }}
-  #          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  #          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  #          SLACK_MESSAGE: "Integer GPU full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-  #          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  #          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
-  teardown-ec2:
-    name: Teardown EC2 instance (cuda-full-benchmarks)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, cuda-integer-full-benchmarks ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL_PRE_PROD }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (cuda-full-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/integer_multi_bit_benchmark.yml
+++ b/.github/workflows/integer_multi_bit_benchmark.yml
@@ -25,8 +25,6 @@ env:
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 jobs:
  run-integer-benchmarks:
@@ -46,7 +44,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -56,13 +54,14 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: nightly
+          override: true

      - name: Run multi-bit benchmarks with AVX512
        run: |
-          make FAST_BENCH=TRUE bench_integer_multi_bit
+          make AVX512_SUPPORT=ON FAST_BENCH=TRUE bench_integer_multi_bit

      - name: Parse benchmarks to csv
        run: |
@@ -70,7 +69,7 @@ jobs:
            parse_integer_benches

      - name: Upload csv results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_csv_integer
          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
@@ -91,17 +90,17 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_integer
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -120,11 +119,11 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Integer benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_multi_bit_gpu_benchmark.yml
+++ b/.github/workflows/integer_multi_bit_gpu_benchmark.yml
@@ -1,196 +0,0 @@
-# Run integer benchmarks with multi-bit cryptographic parameters on an AWS instance and return parsed results to Slab CI bot.
-name: Integer GPU Multi-bit benchmarks
-
-on:
-  # Allows you to run this workflow manually from the Actions tab as an alternative.
-  workflow_dispatch:
-  pull_request:
-
-env:
-  CARGO_TERM_COLOR: always
-  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
-  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
-
-jobs:
-  setup-ec2:
-    name: Setup EC2 instance (cuda-multi-bit-benchmarks)
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL_PRE_PROD }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: hyperstack
-          profile: gpu-bench
-
-  cuda-integer-multi-bit-benchmarks:
-    name: CUDA integer multi-bit benchmarks
-    needs: setup-ec2
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
-    timeout-minutes: 1440 # 24 hours
-    strategy:
-      fail-fast: false
-      # explicit include-based build matrix, of known valid options
-      matrix:
-        include:
-          - os: ubuntu-22.04
-            cuda: "12.2"
-            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.1
-
-    steps:
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-      - name: Get benchmark date
-        run: |
-          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
-
-      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          fetch-depth: 0
-
-      - name: Set up home
-        # "Install rust" step require root user to have a HOME directory which is not set.
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
-
-      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
-        with:
-          toolchain: nightly
-
-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-          } >> "${GITHUB_ENV}"
-
-      - name: Run multi-bit benchmarks with AVX512
-        run: |
-          make FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_multi_bit_gpu
-
-      - name: Parse benchmarks to csv
-        run: |
-          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
-            parse_integer_benches
-
-      - name: Upload csv results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
-        with:
-          name: ${{ github.sha }}_csv_integer
-          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
-
-      - name: Parse results
-        run: |
-          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
-          COMMIT_HASH="$(git describe --tags --dirty)"
-          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
-          --database tfhe_rs \
-          --hardware "n2-H100x1" \
-          --backend gpu \
-          --project-version "${COMMIT_HASH}" \
-          --branch ${{ github.ref_name }} \
-          --commit-date "${COMMIT_DATE}" \
-          --bench-date "${{ env.BENCH_DATE }}" \
-          --walk-subdirs \
-          --name-suffix avx512 \
-          --throughput
-
-      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
-        with:
-          name: ${{ github.sha }}_integer
-          path: ${{ env.RESULTS_FILENAME }}
-
-      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          repository: zama-ai/slab
-          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Send data to Slab
-        shell: bash
-        run: |
-          echo "Computing HMac on results file"
-          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
-          echo "Sending results to Slab..."
-          curl -v -k \
-          -H "Content-Type: application/json" \
-          -H "X-Slab-Repository: ${{ github.repository }}" \
-          -H "X-Slab-Command: store_data_v2" \
-          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
-          -d @${{ env.RESULTS_FILENAME }} \
-          ${{ secrets.SLAB_URL }}
-
-# FIXME This action needs docker to be installed on the machine beforehand.
-#      - name: Slack Notification
-#        if: ${{ !success() && !cancelled() }}
-#        continue-on-error: true
-#        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-#        env:
-#          SLACK_COLOR: ${{ job.status }}
-#          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-#          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-#          SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-#          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-#          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
-  teardown-ec2:
-    name: Teardown EC2 instance (cuda-multi-bit-benchmarks)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, cuda-integer-multi-bit-benchmarks ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL_PRE_PROD }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (cuda-multi-bit-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/m1_tests.yml
+++ b/.github/workflows/m1_tests.yml
@@ -14,8 +14,6 @@ on:
 env:
  CARGO_TERM_COLOR: always
  RUSTFLAGS: "-C target-cpu=native"
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  FAST_TESTS: "TRUE"

@@ -27,16 +25,15 @@ jobs:
  cargo-builds:
    if: ${{ (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'm1_test') }}
    runs-on: ["self-hosted", "m1mac"]
-    # 12 hours, default is 6 hours, hopefully this is more than enough
-    timeout-minutes: 720

    steps:
-      - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11

      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: stable
+          default: true

      - name: Run pcc checks
        run: |
@@ -74,10 +71,6 @@ jobs:
        run: |
          make test_concrete_csprng

-      - name: Run tfhe-zk-pok tests
-        run: |
-          make test_zk_pok
-
      - name: Run core tests
        run: |
          make test_core_crypto
@@ -137,7 +130,7 @@ jobs:
      - name: Slack Notification
        if: ${{ needs.cargo-builds.result != 'skipped' }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ needs.cargo-builds.result }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release.yml
+++ b/.github/workflows/make_release.yml
@@ -30,7 +30,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -49,7 +49,7 @@ jobs:

      - name: Publish web package
        if: ${{ inputs.push_web_package }}
-        uses: JS-DevTools/npm-publish@19c28f1ef146469e409470805ea4279d47c3d35c
+        uses: JS-DevTools/npm-publish@4b07b26a2f6e0a51846e1870223e545bae91c552
        with:
          token: ${{ secrets.NPM_TOKEN }}
          package: tfhe/pkg/package.json
@@ -65,7 +65,7 @@ jobs:

      - name: Publish Node package
        if: ${{ inputs.push_node_package }}
-        uses: JS-DevTools/npm-publish@19c28f1ef146469e409470805ea4279d47c3d35c
+        uses: JS-DevTools/npm-publish@4b07b26a2f6e0a51846e1870223e545bae91c552
        with:
          token: ${{ secrets.NPM_TOKEN }}
          package: tfhe/pkg/package.json
@@ -74,7 +74,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_concrete_csprng.yml
+++ b/.github/workflows/make_release_concrete_csprng.yml
@@ -18,7 +18,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -32,7 +32,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_cuda.yml
+++ b/.github/workflows/make_release_cuda.yml
@@ -1,129 +0,0 @@
-# Publish new release of tfhe-cuda-backend on crates.io.
-name: Publish CUDA release
-
-on:
-  workflow_dispatch:
-    inputs:
-      dry_run:
-        description: "Dry-run"
-        type: boolean
-        default: true
-      push_to_crates:
-        description: "Push to crate"
-        type: boolean
-        default: true
-
-env:
-  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-
-jobs:
-  setup-ec2:
-    name: Setup EC2 instance (publish-cuda-release)
-    runs-on: ubuntu-latest
-    outputs:
-      runner-name: ${{ steps.start-instance.outputs.label }}
-    steps:
-      - name: Start instance
-        id: start-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: start
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          backend: aws
-          profile: gpu-test
-
-  publish-cuda-release:
-    name: Publish CUDA Release
-    needs: setup-ec2
-    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
-    strategy:
-      fail-fast: false
-      # explicit include-based build matrix, of known valid options
-      matrix:
-        include:
-          - os: ubuntu-22.04
-            cuda: "12.2"
-            gcc: 9
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
-        with:
-          fetch-depth: 0
-
-      - name: Set up home
-        run: |
-          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
-
-      - name: Install latest stable
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
-        with:
-          toolchain: stable
-
-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Publish crate.io package
-        if: ${{ inputs.push_to_crates }}
-        env:
-          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
-          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
-        run: |
-          cargo publish -p tfhe-cuda-backend --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-
-  teardown-ec2:
-    name: Teardown EC2 instance (publish-release)
-    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
-    needs: [ setup-ec2, publish-cuda-release ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Stop instance
-        id: stop-instance
-        uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
-        with:
-          mode: stop
-          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
-          slab-url: ${{ secrets.SLAB_BASE_URL }}
-          job-secret: ${{ secrets.JOB_SECRET }}
-          label: ${{ needs.setup-ec2.outputs.runner-name }}
-
-      - name: Slack Notification
-        if: ${{ failure() }}
-        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
-        env:
-          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "EC2 teardown (publish-cuda-release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -17,14 +17,13 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11

      - name: Checkout lattice-estimator
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: malb/lattice-estimator
          path: lattice_estimator
-          ref: '53508253629d3b5d31a2ad110e85dc69391ccb95'

      - name: Install Sage
        run: |
@@ -42,7 +41,7 @@ jobs:
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/core_crypto_benchmark.yml
+++ b/.github/workflows/core_crypto_benchmark.yml
@@ -1,5 +1,5 @@
-# Run core crypto benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: Core crypto benchmarks
+# Run PBS benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: PBS benchmarks

 on:
  workflow_dispatch:
@@ -32,12 +32,10 @@ env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 jobs:
-  run-core-crypto-benchmarks:
-    name: Execute core crypto benchmarks in EC2
+  run-pbs-benchmarks:
+    name: Execute PBS benchmarks in EC2
    runs-on: ${{ github.event.inputs.runner_name }}
    if: ${{ !cancelled() }}
    steps:
@@ -53,7 +51,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -63,14 +61,14 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: nightly
+          override: true

      - name: Run benchmarks with AVX512
        run: |
-          make bench_pbs
-          make bench_ks
+          make AVX512_SUPPORT=ON bench_pbs

      - name: Parse results
        run: |
@@ -88,17 +86,17 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
-          name: ${{ github.sha }}_core_crypto
+          name: ${{ github.sha }}_pbs
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -117,11 +115,11 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "PBS benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "PBS benchmarks failed. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/shortint_benchmark.yml
+++ b/.github/workflows/shortint_benchmark.yml
@@ -24,8 +24,6 @@ env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 jobs:
  run-shortint-benchmarks:
@@ -45,7 +43,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -55,13 +53,14 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: nightly
+          override: true

      - name: Run benchmarks with AVX512
        run: |
-          make bench_shortint
+          make AVX512_SUPPORT=ON bench_shortint

      - name: Parse results
        run: |
@@ -89,17 +88,17 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_shortint
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -118,11 +117,11 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Shortint benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Shortint benchmarks failed. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/shortint_full_benchmark.yml
+++ b/.github/workflows/shortint_full_benchmark.yml
@@ -32,8 +32,6 @@ env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 jobs:
  shortint-benchmarks:
@@ -53,17 +51,15 @@ jobs:
          echo "Request ID: ${{ inputs.request_id }}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

      - name: Get benchmark details
        run: |
-          {
-            echo "BENCH_DATE=$(date --iso-8601=seconds)";
-            echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
-            echo "COMMIT_HASH=$(git describe --tags --dirty)";
-          } >> "${GITHUB_ENV}"
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"

      - name: Set up home
        # "Install rust" step require root user to have a HOME directory which is not set.
@@ -71,20 +67,21 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: nightly
+          override: true

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Run benchmarks with AVX512
        run: |
-          make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_shortint
+          make AVX512_SUPPORT=ON BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_shortint

      - name: Parse results
        run: |
@@ -115,7 +112,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -142,11 +139,11 @@ jobs:
    steps:
      - name: Notify
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Shortint full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Shortint full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/signed_integer_benchmark.yml
+++ b/.github/workflows/signed_integer_benchmark.yml
@@ -25,8 +25,6 @@ env:
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 jobs:
  run-integer-benchmarks:
@@ -46,7 +44,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -56,13 +54,14 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: nightly
+          override: true

      - name: Run benchmarks with AVX512
        run: |
-          make FAST_BENCH=TRUE bench_signed_integer
+          make AVX512_SUPPORT=ON FAST_BENCH=TRUE bench_signed_integer

      - name: Parse benchmarks to csv
        run: |
@@ -70,7 +69,7 @@ jobs:
            parse_integer_benches

      - name: Upload csv results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_csv_integer
          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
@@ -91,17 +90,17 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_integer
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -120,11 +119,11 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Signed integer benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Signed integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/signed_integer_full_benchmark.yml
+++ b/.github/workflows/signed_integer_full_benchmark.yml
@@ -28,8 +28,6 @@ env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 jobs:
  integer-benchmarks:
@@ -37,12 +35,12 @@ jobs:
    runs-on: ${{ github.event.inputs.runner_name }}
    if: ${{ !cancelled() }}
    continue-on-error: true
-    timeout-minutes: 1440  # 24 hours
    strategy:
      max-parallel: 1
      matrix:
        command: [ integer, integer_multi_bit ]
-        op_flavor: [ default, unchecked ]
+        op_flavor: [ default, default_comp, default_scalar, default_scalar_comp,
+                     unchecked, unchecked_comp, unchecked_scalar, unchecked_scalar_comp ]
    steps:
      - name: Instance configuration used
        run: |
@@ -52,17 +50,15 @@ jobs:
          echo "Request ID: ${{ inputs.request_id }}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

      - name: Get benchmark details
        run: |
-          {
-            echo "BENCH_DATE=$(date --iso-8601=seconds)";
-            echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
-            echo "COMMIT_HASH=$(git describe --tags --dirty)";
-          } >> "${GITHUB_ENV}"
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"

      - name: Set up home
        # "Install rust" step require root user to have a HOME directory which is not set.
@@ -70,20 +66,21 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: nightly
+          override: true

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Run benchmarks with AVX512
        run: |
-          make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_signed_${{ matrix.command }}
+          make AVX512_SUPPORT=ON BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_signed_${{ matrix.command }}

      - name: Parse results
        run: |
@@ -99,7 +96,7 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -126,11 +123,11 @@ jobs:
    steps:
      - name: Notify
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Signed integer full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Signed integer full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/signed_integer_multi_bit_benchmark.yml
+++ b/.github/workflows/signed_integer_multi_bit_benchmark.yml
@@ -25,8 +25,6 @@ env:
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 jobs:
  run-integer-benchmarks:
@@ -46,7 +44,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -56,13 +54,14 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: nightly
+          override: true

      - name: Run multi-bit benchmarks with AVX512
        run: |
-          make FAST_BENCH=TRUE bench_signed_integer_multi_bit
+          make AVX512_SUPPORT=ON FAST_BENCH=TRUE bench_signed_integer_multi_bit

      - name: Parse benchmarks to csv
        run: |
@@ -70,7 +69,7 @@ jobs:
            parse_integer_benches

      - name: Upload csv results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_csv_integer
          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
@@ -91,17 +90,17 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_integer
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -120,11 +119,11 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Signed integer benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Signed integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/start_benchmarks.yml
+++ b/.github/workflows/start_benchmarks.yml
@@ -32,12 +32,8 @@ on:
        description: "Run signed integer multi bit benches"
        type: boolean
        default: true
-      core_crypto_bench:
-        description: "Run core crypto benches"
-        type: boolean
-        default: true
-      core_crypto_gpu_bench:
-        description: "Run core crypto benches on GPU"
+      pbs_bench:
+        description: "Run PBS benches"
        type: boolean
        default: true
      wasm_client_bench:
@@ -53,18 +49,17 @@ jobs:
        command: [ boolean_bench, shortint_bench,
                   integer_bench, integer_multi_bit_bench,
                   signed_integer_bench, signed_integer_multi_bit_bench,
-                   integer_gpu_bench, integer_multi_bit_gpu_bench,
-                   core_crypto_bench, core_crypto_gpu_bench, wasm_client_bench ]
+                   pbs_bench, wasm_client_bench ]
    runs-on: ubuntu-latest
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@2d756ea4c53f7f6b397767d8723b3a10a9f35bf2
+        uses: tj-actions/changed-files@1c938490c880156b746568a518594309cfb3f66b
        with:
          files_yaml: |
            common_benches:
@@ -102,20 +97,20 @@ jobs:
              - tfhe/src/integer/**
              - tfhe/benches/integer/signed_bench.rs
              - .github/workflows/signed_integer_multi_bit_benchmark.yml
-            core_crypto_bench:
+            pbs_bench:
              - tfhe/src/core_crypto/**
              - tfhe/benches/core_crypto/**
-              - .github/workflows/core_crypto_benchmark.yml
+              - .github/workflows/pbs_benchmark.yml
            wasm_client_bench:
              - tfhe/web_wasm_parallel_tests/**
              - .github/workflows/wasm_client_benchmark.yml

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Start AWS job in Slab
        # If manually triggered check that the current bench has been requested
--- a/.github/workflows/start_full_benchmarks.yml
+++ b/.github/workflows/start_full_benchmarks.yml
@@ -24,22 +24,21 @@ jobs:
    if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' }}
    strategy:
      matrix:
-        command: [ boolean_bench, shortint_full_bench,
-                   integer_full_bench, signed_integer_full_bench, integer_gpu_full_bench,
-                   core_crypto_bench, core_crypto_gpu_bench, wasm_client_bench ]
+        command: [ boolean_bench, shortint_full_bench, integer_full_bench,
+                   signed_integer_full_bench, pbs_bench, wasm_client_bench ]
    runs-on: ubuntu-latest
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Set benchmarks type as weekly
        if: (github.event_name == 'workflow_dispatch' && inputs.benchmark_type == 'weekly') || github.event.schedule == '0 1 * * 6'
--- a/.github/workflows/sync_on_push.yml
+++ b/.github/workflows/sync_on_push.yml
@@ -13,11 +13,11 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0
      - name: Save repo
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: repo-archive
          path: '.'
@@ -26,12 +26,12 @@ jobs:
        with:
          source_repo: "zama-ai/tfhe-rs"
          source_branch: "main"
-          destination_repo: "https://${{ secrets.BOT_USERNAME }}:${{ secrets.FHE_ACTIONS_TOKEN }}@github.com/${{ secrets.SYNC_DEST_REPO }}"
+          destination_repo: "https://${{ secrets.BOT_USERNAME }}:${{ secrets.CONCRETE_ACTIONS_TOKEN }}@github.com/${{ secrets.SYNC_DEST_REPO }}"
          destination_branch: "main"
      - name: git-sync tags
        uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
        with:
          source_repo: "zama-ai/tfhe-rs"
          source_branch: "refs/tags/*"
-          destination_repo: "https://${{ secrets.BOT_USERNAME }}:${{ secrets.FHE_ACTIONS_TOKEN }}@github.com/${{ secrets.SYNC_DEST_REPO }}"
+          destination_repo: "https://${{ secrets.BOT_USERNAME }}:${{ secrets.CONCRETE_ACTIONS_TOKEN }}@github.com/${{ secrets.SYNC_DEST_REPO }}"
          destination_branch: "refs/tags/*"
--- a/.github/workflows/trigger_aws_tests_on_pr.yml
+++ b/.github/workflows/trigger_aws_tests_on_pr.yml
@@ -0,0 +1,54 @@
+# Trigger an AWS build each time commits are pushed to a pull request.
+name: PR AWS build trigger
+
+on:
+  pull_request:
+  pull_request_review:
+    types: [submitted]
+
+jobs:
+  trigger-tests:
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+    steps:
+      - name: Get current labels
+        uses: snnaplab/get-labels-action@f426df40304808ace3b5282d4f036515f7609576
+
+      - name: Remove approved label
+        if: ${{ github.event_name == 'pull_request' && contains(fromJSON(env.LABELS), 'approved') }}
+        uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          labels: approved
+
+      - name: Launch fast tests
+        if: ${{ github.event_name == 'pull_request' }}
+        uses: mshick/add-pr-comment@a65df5f64fc741e91c59b8359a4bc56e57aaf5b1
+        with:
+          allow-repeats: true
+          message: |
+            @slab-ci cpu_fast_test
+
+      - name: Add approved label
+        uses: actions-ecosystem/action-add-labels@18f1af5e3544586314bbe15c0273249c770b2daf
+        if: ${{ github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && !contains(fromJSON(env.LABELS), 'approved') }}
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          labels: approved
+
+      # PR label 'approved' presence is checked to avoid running the full test suite several times
+      # in case of multiple approvals without new commits in between.
+      - name: Launch full tests suite
+        if: ${{ github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && !contains(fromJSON(env.LABELS), 'approved') }}
+        uses: mshick/add-pr-comment@a65df5f64fc741e91c59b8359a4bc56e57aaf5b1
+        with:
+          allow-repeats: true
+          message: |
+            Pull Request has been approved :tada:
+            Launching full test suite...
+            @slab-ci cpu_test
+            @slab-ci cpu_unsigned_integer_test
+            @slab-ci cpu_signed_integer_test
+            @slab-ci cpu_wasm_test
+            @slab-ci csprng_randomness_testing
--- a/.github/workflows/wasm_client_benchmark.yml
+++ b/.github/workflows/wasm_client_benchmark.yml
@@ -32,8 +32,6 @@ env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  RUST_BACKTRACE: "full"
-  RUST_MIN_STACK: "8388608"

 jobs:
  run-wasm-client-benchmarks:
@@ -53,7 +51,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -63,9 +61,10 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: dtolnay/rust-toolchain@dc6353516c68da0f06325f42ad880f76a5e77ec9
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
        with:
          toolchain: nightly
+          override: true

      - name: Run benchmarks
        run: |
@@ -98,17 +97,17 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_wasm
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Send data to Slab
        shell: bash
@@ -127,11 +126,11 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "WASM benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "WASM benchmarks failed. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,3 @@ dieharder_run.log

 # Coverage reports
 /coverage/
-
-# Cuda local build
-backends/tfhe-cuda-backend/cuda/cmake-build-debug/
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,13 +1,6 @@
 [workspace]
 resolver = "2"
-members = [
-    "tfhe",
-    "tfhe-zk-pok",
-    "tasks",
-    "apps/trivium",
-    "concrete-csprng",
-    "backends/tfhe-cuda-backend",
-]
+members = ["tfhe", "tasks", "apps/trivium", "concrete-csprng", "concrete-float"]

 [profile.bench]
 lto = "fat"
@@ -24,4 +17,3 @@ lto = "off"
 inherits = "dev"
 opt-level = 3
 lto = "off"
-debug-assertions = false
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 BSD 3-Clause Clear License

-Copyright © 2024 ZAMA.
+Copyright © 2023 ZAMA.
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,
--- a/428
+++ b/428
@@ -3,7 +3,6 @@ OS:=$(shell uname)
 RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
 CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
 TARGET_ARCH_FEATURE:=$(shell ./scripts/get_arch_feature.sh)
-CPU_COUNT=$(shell ./scripts/cpu_count.sh)
 RS_BUILD_TOOLCHAIN:=stable
 CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
 CARGO_PROFILE?=release
@@ -18,7 +17,6 @@ FAST_TESTS?=FALSE
 FAST_BENCH?=FALSE
 BENCH_OP_FLAVOR?=DEFAULT
 NODE_VERSION=20
-FORWARD_COMPAT?=OFF
 # sed: -n, do not print input stream, -e means a script/expression
 # 1,/version/ indicates from the first line, to the line matching version at the start of the line
 # p indicates to print, so we keep only the start of the Cargo.toml until we hit the first version
@@ -51,20 +49,10 @@ else
 		COVERAGE_ONLY=
 endif

-ifeq ($(FORWARD_COMPAT),ON)
-		FORWARD_COMPAT_FEATURE=forward_compatibility
-else
-		FORWARD_COMPAT_FEATURE=
-endif
-
 # Variables used only for regex_engine example
 REGEX_STRING?=''
 REGEX_PATTERN?=''

-# tfhe-cuda-backend
-TFHECUDA_SRC=backends/tfhe-cuda-backend/cuda
-TFHECUDA_BUILD=$(TFHECUDA_SRC)/build
-
 # Exclude these files from coverage reports
 define COVERAGE_EXCLUDED_FILES
 --exclude-files apps/trivium/src/trivium/* \
@@ -120,12 +108,7 @@ install_wasm_pack: install_rs_build_toolchain

 .PHONY: install_node # Install last version of NodeJS via nvm
 install_node:
-	curl -o nvm_install.sh https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh
-	@echo "2ed5e94ba12434370f0358800deb69f514e8bce90f13beb0e1b241d42c6abafd nvm_install.sh" > nvm_checksum
-	@sha256sum -c nvm_checksum
-	@rm nvm_checksum
-	$(SHELL) nvm_install.sh
-	@rm nvm_install.sh
+	curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | $(SHELL)
 	source ~/.bashrc
 	$(SHELL) -i -c 'nvm install $(NODE_VERSION)' || \
 	( echo "Unable to install node, unknown error." && exit 1 )
@@ -150,63 +133,14 @@ check_linelint_installed:
 	@printf "\n" | linelint - > /dev/null 2>&1 || \
 	( echo "Unable to locate linelint. Try installing it: https://github.com/fernandrone/linelint/releases" && exit 1 )

-.PHONY: check_actionlint_installed # Check if actionlint workflow linter is installed
-check_actionlint_installed:
-	@actionlint --version > /dev/null 2>&1 || \
-	( echo "Unable to locate actionlint. Try installing it: https://github.com/rhysd/actionlint/releases" && exit 1 )
-
-.PHONY: check_nvm_installed # Check if Node Version Manager is installed
-check_nvm_installed:
-	@source ~/.nvm/nvm.sh && nvm --version > /dev/null 2>&1 || \
-	( echo "Unable to locate Node. Run 'make install_node'" && exit 1 )
-
 .PHONY: fmt # Format rust code
 fmt: install_rs_check_toolchain
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt

-.PHONY: fmt_js # Format javascript code
-fmt_js: check_nvm_installed
-	source ~/.nvm/nvm.sh && \
-	nvm install $(NODE_VERSION) && \
-	nvm use $(NODE_VERSION) && \
-	$(MAKE) -C tfhe/web_wasm_parallel_tests fmt
-
-.PHONY: fmt_gpu # Format rust and cuda code
-fmt_gpu: install_rs_check_toolchain
-	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt
-	cd "$(TFHECUDA_SRC)" && ./format_tfhe_cuda_backend.sh
-
-.PHONY: fmt_c_tests # Format c tests
-fmt_c_tests:
-	find tfhe/c_api_tests/ -regex '.*\.\(cpp\|hpp\|cu\|c\|h\)' -exec clang-format -style=file -i {} \;
-
 .PHONY: check_fmt # Check rust code format
 check_fmt: install_rs_check_toolchain
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt --check

-.PHONY: check_fmt_c_tests  # Check C tests format
-check_fmt_c_tests:
-	find tfhe/c_api_tests/ -regex '.*\.\(cpp\|hpp\|cu\|c\|h\)' -exec clang-format --dry-run --Werror -style=file {} \;
-
-.PHONY: check_fmt_gpu # Check rust and cuda code format
-check_fmt_gpu: install_rs_check_toolchain
-	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt --check
-	cd "$(TFHECUDA_SRC)" && ./format_tfhe_cuda_backend.sh -c
-
-.PHONY: check_fmt_js # Check javascript code format
-check_fmt_js: check_nvm_installed
-	source ~/.nvm/nvm.sh && \
-	nvm install $(NODE_VERSION) && \
-	nvm use $(NODE_VERSION) && \
-	$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt
-
-.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
-clippy_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
-		--all-targets \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
-
 .PHONY: fix_newline # Fix newline at end of file issues to be UNIX compliant
 fix_newline: check_linelint_installed
 	linelint -a .
@@ -215,9 +149,10 @@ fix_newline: check_linelint_installed
 check_newline: check_linelint_installed
 	linelint .

-.PHONY: lint_workflow # Run static linter on GitHub workflows
-lint_workflow: check_actionlint_installed
-	actionlint
+.PHONY: clippy_float # Run clippy lints on core_crypto with and without experimental features
+clippy_float: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
+		-p concrete-float -- --no-deps -D warnings

 .PHONY: clippy_core # Run clippy lints on core_crypto with and without experimental features
 clippy_core: install_rs_check_toolchain
@@ -227,12 +162,6 @@ clippy_core: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE),experimental \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings
-	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),nightly-avx512 \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings
-	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 \
-		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_boolean # Run clippy lints enabling the boolean features
 clippy_boolean: install_rs_check_toolchain
@@ -261,7 +190,7 @@ clippy: install_rs_check_toolchain
 .PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
 clippy_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
@@ -277,13 +206,13 @@ clippy_tasks:

 .PHONY: clippy_trivium # Run clippy lints on Trivium app
 clippy_trivium: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		-p tfhe-trivium -- --no-deps -D warnings

 .PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
 clippy_all_targets:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok-experimental \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,safe-deserialization \
 		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_concrete_csprng # Run clippy lints on concrete-csprng
@@ -292,24 +221,14 @@ clippy_concrete_csprng:
 		--features=$(TARGET_ARCH_FEATURE) \
 		-p concrete-csprng -- --no-deps -D warnings

-.PHONY: clippy_zk_pok # Run clippy lints on tfhe-zk-pok
-clippy_zk_pok:
-	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		-p tfhe-zk-pok -- --no-deps -D warnings
-
 .PHONY: clippy_all # Run all clippy targets
 clippy_all: clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets clippy_c_api \
-clippy_js_wasm_api clippy_tasks clippy_core clippy_concrete_csprng clippy_zk_pok clippy_trivium
+clippy_js_wasm_api clippy_tasks clippy_core clippy_concrete_csprng clippy_trivium

 .PHONY: clippy_fast # Run main clippy targets
 clippy_fast: clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core \
 clippy_concrete_csprng

-.PHONY: clippy_cuda_backend # Run clippy lints on the tfhe-cuda-backend
-clippy_cuda_backend: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-		-p tfhe-cuda-backend -- --no-deps -D warnings
-
 .PHONY: build_core # Build core_crypto without experimental features
 build_core: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
@@ -348,44 +267,24 @@ build_tfhe_full: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
 		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --all-targets

-.PHONY: build_tfhe_coverage # Build with test coverage enabled
-build_tfhe_coverage: install_rs_build_toolchain
-	RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
-
-.PHONY: symlink_c_libs_without_fingerprint # Link the .a and .so files without the changing hash part in target
-symlink_c_libs_without_fingerprint:
-	@./scripts/symlink_c_libs_without_fingerprint.sh \
-		--cargo-profile "$(CARGO_PROFILE)" \
-		--lib-name tfhe-c-api-dynamic-buffer
-
 .PHONY: build_c_api # Build the C API for boolean, shortint and integer
 build_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok-experimental,$(FORWARD_COMPAT_FEATURE) \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,safe-deserialization \
 		-p $(TFHE_SPEC)
-	@"$(MAKE)" symlink_c_libs_without_fingerprint
-
-.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
-build_c_api_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok-experimental,gpu \
-		-p $(TFHE_SPEC)
-	@"$(MAKE)" symlink_c_libs_without_fingerprint

 .PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
 build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,experimental-force_fft_algo_dif4,$(FORWARD_COMPAT_FEATURE) \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,safe-deserialization,experimental-force_fft_algo_dif4 \
 		-p $(TFHE_SPEC)
-	@"$(MAKE)" symlink_c_libs_without_fingerprint

 .PHONY: build_web_js_api # Build the js API targeting the web browser
 build_web_js_api: install_rs_build_toolchain install_wasm_pack
 	cd tfhe && \
 	RUSTFLAGS="$(WASM_RUSTFLAGS)" rustup run "$(RS_BUILD_TOOLCHAIN)" \
 		wasm-pack build --release --target=web \
-		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok-experimental
+		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api

 .PHONY: build_web_js_api_parallel # Build the js API targeting the web browser with parallelism support
 build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
@@ -393,7 +292,7 @@ build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
 	rustup component add rust-src --toolchain $(RS_CHECK_TOOLCHAIN) && \
 	RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory,+mutable-globals" rustup run $(RS_CHECK_TOOLCHAIN) \
 		wasm-pack build --release --target=web \
-		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api,zk-pok-experimental \
+		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api \
 		-Z build-std=panic_abort,std

 .PHONY: build_node_js_api # Build the js API targeting nodejs
@@ -401,7 +300,7 @@ build_node_js_api: install_rs_build_toolchain install_wasm_pack
 	cd tfhe && \
 	RUSTFLAGS="$(WASM_RUSTFLAGS)" rustup run "$(RS_BUILD_TOOLCHAIN)" \
 		wasm-pack build --release --target=nodejs \
-		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok-experimental
+		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api

 .PHONY: build_concrete_csprng # Build concrete_csprng
 build_concrete_csprng: install_rs_build_toolchain
@@ -411,10 +310,10 @@ build_concrete_csprng: install_rs_build_toolchain
 .PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
 test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok-experimental -p $(TFHE_SPEC) -- core_crypto::
+		--features=$(TARGET_ARCH_FEATURE),experimental -p $(TFHE_SPEC) -- core_crypto::
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-			--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok-experimental,$(AVX512_FEATURE) -p $(TFHE_SPEC) -- core_crypto::; \
+			--features=$(TARGET_ARCH_FEATURE),experimental,$(AVX512_FEATURE) -p $(TFHE_SPEC) -- core_crypto::; \
 	fi

 .PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
@@ -422,41 +321,16 @@ test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain inst
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 		--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
 		--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
-		--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache \
+		--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,__coverage \
 		-p $(TFHE_SPEC) -- core_crypto::
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 			--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
 			--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
-			--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,$(AVX512_FEATURE) \
-			-p $(TFHE_SPEC) -- -Z unstable-options --report-time core_crypto::; \
+			--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,__coverage,$(AVX512_FEATURE) \
+			-p $(TFHE_SPEC) -- core_crypto::; \
 	fi

-.PHONY: test_cuda_backend # Run the internal tests of the CUDA backend
-test_cuda_backend:
-	mkdir -p "$(TFHECUDA_BUILD)" && \
-		cd "$(TFHECUDA_BUILD)" && \
-		cmake .. -DCMAKE_BUILD_TYPE=Release -DTFHE_CUDA_BACKEND_BUILD_TESTS=ON && \
-		make -j "$(CPU_COUNT)" && \
-		make test
-
-.PHONY: test_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
-test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
-
-.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
-test_core_crypto_gpu: install_rs_build_toolchain install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
-
-.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
-test_integer_gpu: install_rs_build_toolchain install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
-
 .PHONY: test_boolean # Run the tests of the boolean module
 test_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
@@ -467,13 +341,13 @@ test_boolean_cov: install_rs_check_toolchain install_tarpaulin
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 		--out xml --output-dir coverage/boolean --line --engine llvm --timeout 500 \
 		$(COVERAGE_EXCLUDED_FILES) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache \
-		-p $(TFHE_SPEC) -- -Z unstable-options --report-time boolean::
+		--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,__coverage \
+		-p $(TFHE_SPEC) -- boolean::

 .PHONY: test_c_api_rs # Run the rust tests for the C API
 test_c_api_rs: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,safe-deserialization \
 		-p $(TFHE_SPEC) \
 		c_api

@@ -484,23 +358,19 @@ test_c_api_c: build_c_api
 .PHONY: test_c_api # Run all the tests for the C API
 test_c_api: test_c_api_rs test_c_api_c

-.PHONY: test_c_api_gpu # Run the C tests for the C API
-test_c_api_gpu: build_c_api_gpu
-	./scripts/c_api_tests.sh --gpu
-
 .PHONY: test_shortint_ci # Run the tests for shortint ci
 test_shortint_ci: install_rs_build_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)" --tfhe-package "$(TFHE_SPEC)"
+		--cargo-profile "$(CARGO_PROFILE)"

 .PHONY: test_shortint_multi_bit_ci # Run the tests for shortint ci running only multibit tests
 test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --tfhe-package "$(TFHE_SPEC)"
+		--cargo-profile "$(CARGO_PROFILE)" --multi-bit

 .PHONY: test_shortint # Run all the tests for shortint
 test_shortint: install_rs_build_toolchain
@@ -512,16 +382,15 @@ test_shortint_cov: install_rs_check_toolchain install_tarpaulin
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
 		--out xml --output-dir coverage/shortint --line --engine llvm --timeout 500 \
 		$(COVERAGE_EXCLUDED_FILES) \
-		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
-		-p $(TFHE_SPEC) -- -Z unstable-options --report-time shortint::
+		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,__coverage \
+		-p $(TFHE_SPEC) -- shortint::

 .PHONY: test_integer_ci # Run the tests for integer ci
 test_integer_ci: install_rs_check_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
-		--tfhe-package "$(TFHE_SPEC)"
+		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)"

 .PHONY: test_unsigned_integer_ci # Run the tests for unsigned integer ci
 test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
@@ -529,7 +398,7 @@ test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
-		--unsigned-only --tfhe-package "$(TFHE_SPEC)"
+		--unsigned-only

 .PHONY: test_signed_integer_ci # Run the tests for signed integer ci
 test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
@@ -537,15 +406,14 @@ test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
-		--signed-only --tfhe-package "$(TFHE_SPEC)"
+		--signed-only

 .PHONY: test_integer_multi_bit_ci # Run the tests for integer ci running only multibit tests
 test_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
-		--tfhe-package "$(TFHE_SPEC)"
+		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)"

 .PHONY: test_unsigned_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
 test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
@@ -553,7 +421,7 @@ test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nex
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
-		--unsigned-only --tfhe-package "$(TFHE_SPEC)"
+		--unsigned-only

 .PHONY: test_signed_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
 test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
@@ -561,57 +429,30 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
 		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
-		--signed-only --tfhe-package "$(TFHE_SPEC)"
+		--signed-only

 .PHONY: test_safe_deserialization # Run the tests for safe deserialization
 test_safe_deserialization: install_rs_build_toolchain install_cargo_nextest
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_deserialization::
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,safe-deserialization -p $(TFHE_SPEC) -- safe_deserialization::

 .PHONY: test_integer # Run all the tests for integer
 test_integer: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
 		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache -p $(TFHE_SPEC) -- integer::

-.PHONY: test_integer_cov # Run the tests of the integer module with code coverage
-test_integer_cov: install_rs_check_toolchain install_tarpaulin
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
-		--out xml --output-dir coverage/integer --line --engine llvm --timeout 500 \
-		--implicit-test-threads \
-		--exclude-files $(COVERAGE_EXCLUDED_FILES) \
-		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
-		-p $(TFHE_SPEC) -- -Z unstable-options --report-time integer::
-
 .PHONY: test_high_level_api # Run all the tests for high_level_api
 test_high_level_api: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok-experimental -p $(TFHE_SPEC) \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) \
 		-- high_level_api::

-test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) \
-		-E "test(/high_level_api::.*gpu.*/)"
-
 .PHONY: test_user_doc # Run tests from the .md documentation
 test_user_doc: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok-experimental \
-		-p $(TFHE_SPEC) \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) \
 		-- test_user_docs::

-.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
-test_user_doc_gpu: install_rs_build_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu,zk-pok-experimental -p $(TFHE_SPEC) \
-		-- test_user_docs::
-
-.PHONY: test_fhe_strings # Run tests for fhe_strings example
-test_fhe_strings: install_rs_build_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--example fhe_strings \
-		--features=$(TARGET_ARCH_FEATURE),integer
-
 .PHONY: test_regex_engine # Run tests for regex_engine example
 test_regex_engine: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
@@ -642,50 +483,90 @@ test_concrete_csprng:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
 		--features=$(TARGET_ARCH_FEATURE) -p concrete-csprng

-.PHONY: test_zk_pok # Run tfhe-zk-pok-experimental tests
-test_zk_pok:
+.PHONY: test_float # Run minifloat bivariate test
+test_float: test_float_add test_float_sub test_float_mul test_float_div test_float_cos test_float_sin test_float_relu test_float_sigmoid test_minifloat
+
+.PHONY: test_minifloat # Run minifloat bivariate test
+test_minifloat:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		-p tfhe-zk-pok
+		--features=$(TARGET_ARCH_FEATURE),shortint -p tfhe float_wopbs_bivariate -- --nocapture
+
+.PHONY: test_float_cos # Run floating points cosine test
+test_float_cos:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-float "server_key::tests::float_cos" -- --exact --nocapture
+
+.PHONY: test_float_sin # Run floating points sine test
+test_float_sin:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-float "server_key::tests::float_sin" -- --exact --nocapture
+
+.PHONY: test_float_mul # Run floating points multiplication test
+test_float_mul:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-float "server_key::tests::test_float_mul" -- --exact --nocapture
+
+.PHONY: test_float_add # Run floating points addition test
+test_float_add:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-float "server_key::tests::test_float_add" -- --exact --nocapture
+
+.PHONY: test_float_sub # Run floating points subtraction test
+test_float_sub:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-float "server_key::tests::test_float_sub" -- --exact --nocapture
+
+.PHONY: test_float_div # Run floating points division test
+test_float_div:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-float "server_key::tests::test_float_div" -- --exact --nocapture
+
+.PHONY: test_float_relu # Run floating points relu test
+test_float_relu:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-float "server_key::tests::test_float_relu" -- --exact --nocapture
+
+.PHONY: test_float_sigmoid # Run floating points sigmoid test
+test_float_sigmoid:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-float "server_key::tests::test_float_sigmoid" -- --exact --nocapture
+
+.PHONY: test_float_depth_test # Run floating points depth test
+test_float_depth_test:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-float "server_key::tests::depth_test_parallelized" -- --exact --nocapture

 .PHONY: doc # Build rust doc
 doc: install_rs_check_toolchain
-	@# Even though we are not in docs.rs, this allows to "just" build the doc
-	DOCS_RS=1 \
 	RUSTDOCFLAGS="--html-in-header katex-header.html" \
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental --no-deps -p $(TFHE_SPEC)
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer --no-deps

 .PHONY: docs # Build rust doc alias for doc
 docs: doc

 .PHONY: lint_doc # Build rust doc with linting enabled
 lint_doc: install_rs_check_toolchain
-	@# Even though we are not in docs.rs, this allows to "just" build the doc
-	DOCS_RS=1 \
 	RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental -p $(TFHE_SPEC) --no-deps
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer --no-deps

 .PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
 lint_docs: lint_doc

 .PHONY: format_doc_latex # Format the documentation latex equations to avoid broken rendering.
 format_doc_latex:
-	RUSTFLAGS="" cargo xtask format_latex_doc
+	cargo xtask format_latex_doc
 	@"$(MAKE)" --no-print-directory fmt
 	@printf "\n===============================\n\n"
 	@printf "Please manually inspect changes made by format_latex_doc, rustfmt can break equations \
 	if the line length is exceeded\n"
 	@printf "\n===============================\n"

-.PHONY: check_md_docs_are_tested # Checks that the rust codeblocks in our .md files are tested
-check_md_docs_are_tested:
-	RUSTFLAGS="" cargo xtask check_tfhe_docs_are_tested
-
 .PHONY: check_compile_tests # Build tests in debug without running them
 check_compile_tests:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
-		--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache \
+		--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache,safe-deserialization \
 		-p $(TFHE_SPEC)

 	@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
@@ -693,16 +574,6 @@ check_compile_tests:
 		./scripts/c_api_tests.sh --build-only; \
 	fi

-.PHONY: check_compile_tests_benches_gpu # Build tests in debug without running them
-check_compile_tests_benches_gpu: install_rs_build_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
-		--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache,gpu \
-		-p $(TFHE_SPEC)
-	mkdir -p "$(TFHECUDA_BUILD)" && \
-		cd "$(TFHECUDA_BUILD)" && \
-		cmake .. -DCMAKE_BUILD_TYPE=Debug -DTFHE_CUDA_BACKEND_BUILD_TESTS=ON -DTFHE_CUDA_BACKEND_BUILD_BENCHMARKS=ON && \
-		make -j "$(CPU_COUNT)"
-
 .PHONY: build_nodejs_test_docker # Build a docker image with tools to run nodejs tests for wasm API
 build_nodejs_test_docker:
 	DOCKER_BUILDKIT=1 docker build --build-arg RUST_TOOLCHAIN="$(RS_BUILD_TOOLCHAIN)" \
@@ -755,21 +626,7 @@ bench_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
-
-.PHONY: bench_signed_integer # Run benchmarks for signed integer
-bench_signed_integer: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench integer-signed-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
-
-.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
-bench_integer_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --

 .PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
 bench_integer_multi_bit: install_rs_check_toolchain
@@ -777,7 +634,14 @@ bench_integer_multi_bit: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --
+
+.PHONY: bench_signed_integer # Run benchmarks for signed integer
+bench_signed_integer: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-signed-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --

 .PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
 bench_signed_integer_multi_bit: install_rs_check_toolchain
@@ -785,35 +649,14 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-signed-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
-
-.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
-bench_integer_multi_bit_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
-	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --

 .PHONY: bench_shortint # Run benchmarks for shortint
 bench_shortint: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench shortint-bench \
-	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
-
-.PHONY: bench_oprf # Run benchmarks for shortint
-bench_oprf: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench oprf-shortint-bench \
-	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
-	RUSTFLAGS="$(RUSTFLAGS)" \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench oprf-integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
-
-
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)

 .PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
 bench_shortint_multi_bit: install_rs_check_toolchain
@@ -821,38 +664,20 @@ bench_shortint_multi_bit: install_rs_check_toolchain
 	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench shortint-bench \
-	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --


 .PHONY: bench_boolean # Run benchmarks for boolean
 bench_boolean: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench boolean-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)

 .PHONY: bench_pbs # Run benchmarks for PBS
 bench_pbs: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench pbs-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
-
-.PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
-bench_pbs_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench pbs-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
-
-.PHONY: bench_ks # Run benchmarks for keyswitch
-bench_ks: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench ks-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
-
-.PHONY: bench_ks_gpu # Run benchmarks for PBS on GPU backend
-bench_ks_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
-	--bench ks-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)

 .PHONY: bench_web_js_api_parallel # Run benchmarks for the web wasm api
 bench_web_js_api_parallel: build_web_js_api_parallel
@@ -864,12 +689,44 @@ ci_bench_web_js_api_parallel: build_web_js_api_parallel
 	nvm use node && \
 	$(MAKE) -C tfhe/web_wasm_parallel_tests bench-ci

+.PHONY: bench_float # Run benchmarks for the floating points
+bench_float: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench float-bench
+
+.PHONY: bench_float_8bit # Run benchmarks for the floating points
+bench_float_8bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench float-bench -- PARAM_8
+
+
+.PHONY: bench_float_16bit # Run benchmarks for the floating points
+bench_float_16bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench float-bench -- PARAM_16
+
+
+.PHONY: bench_float_32bit # Run benchmarks for the floating points
+bench_float_32bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench float-bench -- PARAM_32
+
+.PHONY: bench_float_64bit # Run benchmarks for the floating points
+bench_float_64bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench float-bench -- PARAM_64
+
+.PHONY: bench_minifloat # Run benchmarks for Wopbs floating points
+bench_minifloat: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench float-wopbs-bench
+
 #
 # Utility tools
 #
 .PHONY: gen_key_cache # Run the script to generate keys and cache them for shortint tests
 gen_key_cache: install_rs_build_toolchain
-	RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 		--example generates_test_keys \
 		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache -- \
 		$(MULTI_BIT_ONLY) $(COVERAGE_ONLY)
@@ -941,16 +798,11 @@ sha256_bool: install_rs_check_toolchain
 	--example sha256_bool \
 	--features=$(TARGET_ARCH_FEATURE),boolean

-.PHONY: pcc # pcc stands for pre commit checks (except GPU)
-pcc: no_tfhe_typo no_dbg_log check_fmt lint_doc check_md_docs_are_tested clippy_all \
-check_compile_tests
-
-.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
-pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu
+.PHONY: pcc # pcc stands for pre commit checks
+pcc: no_tfhe_typo no_dbg_log check_fmt lint_doc clippy_all check_compile_tests

 .PHONY: fpcc # pcc stands for pre commit checks, the f stands for fast
-fpcc: no_tfhe_typo no_dbg_log check_fmt lint_doc check_md_docs_are_tested clippy_fast \
-check_compile_tests
+fpcc: no_tfhe_typo no_dbg_log check_fmt lint_doc clippy_fast check_compile_tests

 .PHONY: conformance # Automatically fix problems that can be fixed
 conformance: fix_newline fmt
--- a/README.md
+++ b/README.md
@@ -1,264 +1,160 @@
-<p align="center">
-<!-- product name logo -->
-<picture>
-  <source media="(prefers-color-scheme: dark)" srcset="https://github.com/zama-ai/tfhe-rs/assets/157474013/5283e0ba-da1e-43af-9f2a-c5221367a12b">
-  <source media="(prefers-color-scheme: light)" srcset="https://github.com/zama-ai/tfhe-rs/assets/157474013/b94a8c96-7595-400b-9311-70765c706955">
-  <img width=600 alt="Zama TFHE-rs">
-</picture>
-</p>
-
-<hr/>
-
-<p align="center">
-  <a href="https://docs.zama.ai/tfhe-rs"> 📒 Documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a> | <a href="https://github.com/zama-ai/awesome-zama"> 📚 FHE resources by Zama</a>
-</p>
+# Artifact:TFHE Gets Real: an Efficient and Flexible Homomorphic Floating-Point Arithmetic


-<p align="center">
-  <a href="https://github.com/zama-ai/tfhe-rs/releases"><img src="https://img.shields.io/github/v/release/zama-ai/tfhe-rs?style=flat-square"></a>
-  <a href="LICENSE"><img src="https://img.shields.io/badge/License-BSD--3--Clause--Clear-%23ffb243?style=flat-square"></a>
-  <a href="https://github.com/zama-ai/bounty-program"><img src="https://img.shields.io/badge/Contribute-Zama%20Bounty%20Program-%23ffd208?style=flat-square"></a>
-</p>
+## Description

-## About

-### What is TFHE-rs
+In what follows, we provide instructions on how to run the benchmarks from the paper entitled **TFHE Gets Real: An Efficient and Flexible Homomorphic Floating-Point Arithmetic**.
+In particular, the benchmarks presented in **Table 5**, **Table 6**, **Table 7**, and the experiments shown in **Table 8** can be easily reproduced using this code. The implementation of the techniques described in the aforementioned paper has been integrated into the **TFHE-rs** library, version 0.5.0. The modified or added source files are organized into two different paths.

-**TFHE-rs** is a pure Rust implementation of TFHE for boolean and integer arithmetics over encrypted data.
+The Minifloats (Section 3.1) are located in *tfhe/src/float-wopbs*
+- Test files are located in *tfhe/src/float_wopbs/server_key/tests.rs*
+- Benchmarks are located in *tfhe/benches/float_wopbs/bench.rs*

-It includes:
- a **Rust** API
- a **C** API
- and a **client-side WASM** API

-TFHE-rs is designed for developers and researchers who want full control over
-what they can do with TFHE, while not having to worry about the low-level
-implementation. The goal is to have a stable, simple, high-performance, and
-production-ready library for all the advanced features of TFHE.
-<br></br>
+The homomorphic floating points (Section 3.2) are located in *tfhe/concrete-float/*
+- Test files are located *tfhe/concrete-float/src/server_key/tests.rs*
+- Benchmarks are located in *tfhe/concrete-float/benches/bench.rs*

-### Main features

- **Low-level cryptographic library** that implements Zama’s variant of TFHE, including programmable bootstrapping
- **Implementation of the original TFHE boolean API** that can be used as a drop-in replacement for other TFHE libraries
- **Short integer API** that enables exact, unbounded FHE integer arithmetics with up to 8 bits of message space
- **Size-efficient public key encryption**
- **Ciphertext and server key compression** for efficient data transfer
- **Full Rust API, C bindings to the Rust High-Level API, and client-side Javascript API using WASM**.
+## Dependencies

-*Learn more about TFHE-rs features in the [documentation](https://docs.zama.ai/tfhe-rs/readme).*
-<br></br>
+Tested on Linux and Mac OS with Rust version >= 1.80 (see [here](https://www.rust-lang.org/tools/install) a guide to install Rust).
+Complete list of dependencies and a guide on how to install TFHE-rs can be found in the online documentation [here](https://docs.zama.ai/tfhe-rs/0.5-3/getting-started/installation) or in the local file [here](./README_TFHE-rs.md).

-## Table of Contents
- **[Getting Started](#getting-started)**
-   - [Cargo.toml configuration](#cargotoml-configuration)
-   - [A simple example](#a-simple-example)
- **[Resources](#resources)**
-   - [TFHE deep dive](#tfhe-deep-dive)
-   - [Tutorials](#tutorials)
-   - [Documentation](#documentation)
- **[Working with TFHE-rs](#working-with-tfhe-rs)**
-   - [Disclaimers](#disclaimers)
-   - [Citations](#citations)
-   - [Contributing](#contributing)
-   - [License](#license)
- **[Support](#support)**
-<br></br>
+## How to run benchmarks
+At the root of the project (i.e., in the TFHE-rs folder), enter the following commands to run the benchmarks:

-## Getting Started
+- ```make bench_minifloat```: returns the timings associated to the Minifloats (**Table 6**).
+- ```make bench_float```: returns the timings associated to the HFP (**Table 5**, **Table 7**).
+These benchmarks first launch the parallelized and then the sequential experiments. 
+This outputs the timings depending on the input precision. 
+**This takes more than 6 hours to run**.

-### Cargo.toml configuration
-To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:
+To run benchmarks for a specific precision over homomorphic floating points, here are the dedicated commands:
+- ```make bench_float_8bit```: Runs benchmarks for only 8-bit floating point *(around 15 min)*.
+- ```make bench_float_16bit```: Runs benchmarks for only 16-bit floating point *(around 30 min)*.
+- ```make bench_float_32bit```: Runs benchmarks for only 32-bit floating point *(around 1h40)*.
+- ```make bench_float_64bit```: Runs benchmarks for only 64-bit floating point *(around 6h30)*.

-+ For x86_64-based machines running Unix-like OSes:

-```toml
-tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64-unix"] }
+We recall that the benchmarks were performed on AWS using an **m6i.metal** instance with an Intel Xeon 8375C (Ice Lake) processor running at 3.5 GHz, 128 vCPUs, and 512 GiB of memory.
+
+### Understanding Benchmark Output (Criterion.rs)
+
+This project uses [Criterion.rs](https://docs.rs/criterion/latest/criterion/) for benchmarking. Criterion is a powerful and statistically robust benchmarking framework for Rust, and it may produce outputs that are unfamiliar at first glance. This section explains how to interpret them.
+
+#### Sample Output Structure
+
+A typical benchmark result looks like this:
+
+```
+test_float             time:   [53.2 µs 54.0 µs 54.8 µs]
+                        change: [+0.2% +1.0% +1.8%] (p = 0.002)
+Found 3 outliers among 100 measurements (3.00%)
+  3 (3.00%) high mild
 ```

-+ For Apple Silicon or aarch64-based machines running Unix-like OSes:
+**Here's what this means:**

-```toml
-tfhe = { version = "*", features = ["boolean", "shortint", "integer", "aarch64-unix"] }
+- `time: [low est.  median  high est.]`: The estimated execution time of the function.
+- `change`: The performance change compared to a previous run (if available).
+- `outliers`: Some runs deviated from the typical time. Criterion detects and accounts for these using statistical methods.
+
+---
+
+####  Common Warnings and What They Mean
+
+##### `Found X outliers among Y measurements`
+
+Criterion runs each benchmark many times (default: 100) to get statistically significant results.
+An *outlier* is a run that was significantly faster or slower than the others.
+
+- **Why does this happen?** Often, it's due to **other processes on the machine** (e.g., background services, OS interrupts, or CPU scheduling) affecting performance temporarily.
+- **Why it doesn't invalidate results:** Criterion uses statistical techniques to minimize the impact of these outliers when estimating performance.
+- **Best practice to reduce outliers:** Run the benchmarks on a **freshly rebooted machine**, with as few background processes as possible. Ideally, let the system idle for a minute after boot to stabilize before running benchmarks.
+
+##### `Unable to complete 100 samples in 5.0s.`
+
+The benchmark took longer than the expected 5 seconds.
+This is merely a warning indicating that the full set of 100 samples could not be collected within the default 5-second measurement window.
+
+- **No action is required**: Criterion will still proceed to run all 100 samples, and the results remain statistically valid.
+- **Why the warning appears**: It's there to inform you that benchmarking is taking longer than expected and to help you tune settings if needed.
+- **Optional**: If you're constrained by time (e.g., running in CI), you can:
+  - Reduce the sample size (e.g., to 10 or 20 samples).
+  - Or increase the measurement time using:
+    ```bash
+    cargo bench -- --measurement-time 30
+    ```
+
+## How to run the tests
+### MiniFloats
+
+To run the tests related to the **minifloats**, run the following command:
+- ```make test_minifloat```: Runs a bivariate operation between two minifloats.
+
+
+The **minifloat** test is available in the file *tfhe/src/float_wopbs/server_key/tests.rs*.
+
+
+
+### Homomorphic Floating Points 
+At the root of the project (i.e., in the TFHE-rs folder), enter the following commands to run the tests per operation on the **homomorphic floating points**:
+- ```make test_float_add```: Runs a 32-bit floating-point addition with two random inputs.
+- ```make test_float_sub```: Runs a 32-bit floating-point subtraction with two random inputs.
+- ```make test_float_mul```: Runs a 32-bit floating-point multiplication with two random inputs.
+- ```make test_float_div```: Runs a 32-bit floating-point division with two random inputs.
+- ```make test_float_cos```: Runs the experiment from **Table 8** with a random input value.
+- ```make test_float_sin```: Runs the experiment from **Table 8** with a random input value.
+- ```make test_float_relu```: Runs a 32-bit floating-point relu with a random input.
+- ```make test_float_sigmoid```: Runs a 32-bit floating-point sigmoid with a random input.
+- ```make test_float```: Runs all previous tests for operations on 32-bit floating-points.
+- ```make test_float_depth_test```: This command runs the following experiment:
+  - **Step 1**: Create 3 blocks, each composed of a clear 32-bit floating point, a clear 64-bit floating point, and a 32-bit homomorphic floating point.
+  - **Step 2**: Choose two blocks randomly among the 3 blocks and randomly select a parallelized operation (addition, subtraction, or multiplication).
+  - **Step 3**: Compute the selected operation between the two selected blocks and store the result randomly in one of the two selected blocks.  
+  (The operation is performed respectively between the two 64-bit floating points, the two 32-bit floating points, and homomorphically between the two 32-bit homomorphic floating points.) 
+  - Repeat Steps 2 and 3 for 50 iterations.
+  - To avoid reaching + or - infinity, or **NaN**, when the clear 64-bit floating point reaches a fixed bound, compute a multiplication to rescale the value close to 1.  
+  This operation is also performed homomorphically for the encrypted data. This test takes several minutes.
+
+The tests are located in the file *tfhe/concrete-float/src/server_key/tests.rs*.
+
+Due to the representation being close to, but not exactly the same as, a given representation, the obtained result is not identical to the one obtained in clear.
+To consider a test as "passed", we accept a difference of less than 0.1% compared to the 64-bit floating-point clear results.
+Note that using 8 or 16-bit homomorphic floating points might return errors due to a lack of precision and due to the comparisons with clear 64-bit floating points.
+
+In each test, the different results are presented in the following format:
+``` 
+--------------------
+"Name":
+
+Result       : 
+Clear 32-bits: 
+Clear 64-bits: 
+
+--------------------
+```
+where ```name``` stands for the name of the ciphertext or the name of the operation, result always corresponds to the decryption of a homomorphic floating point, and Clear ``` 32-bits```  and Clear ``` 64-bits``` correspond to the clear floating-point witness.
+
+All tests in *tfhe/concrete-float/src/server_key/tests.rs* are conducted for 32-bit floating-point precision, as it provides the best ratio between execution time and precision.  
+To change the parameter set used, the parameters in the following ``` const ``` must be uncommented (lines 79 to 87 in the file *tfhe/concrete-float/src/server_key/tests.rs*).
+
+
+```rust 
+const PARAMS: [(&str, Parameters); 1] =
+[
+//named_param!(PARAM_FP_64_BITS),
+named_param!(PARAM_FP_32_BITS),
+//named_param!(PARAM_FP_16_BITS),
+//named_param!(PARAM_FP_8_BITS),
+];
 ```

-+ For x86_64-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) running Windows:
-
-```toml
-tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64"] }
-```
-
-> [!Note]
-> Note: You need to use a Rust version >= 1.73 to compile TFHE-rs.
-
-> [!Note]
-> Note: aarch64-based machines are not yet supported for Windows as it's currently missing an entropy source to be able to seed the [CSPRNGs](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) used in TFHE-rs.
-
-<p align="right">
-  <a href="#about" > ↑ Back to top </a> 
-</p>
-
-### A simple example
-
-Here is a full example:
-
-``` rust
-use tfhe::prelude::*;
-use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint32, FheUint8};
-
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    // Basic configuration to use homomorphic integers
-    let config = ConfigBuilder::default().build();
-
-    // Key generation
-    let (client_key, server_keys) = generate_keys(config);
-
-    let clear_a = 1344u32;
-    let clear_b = 5u32;
-    let clear_c = 7u8;
-
-    // Encrypting the input data using the (private) client_key
-    // FheUint32: Encrypted equivalent to u32
-    let mut encrypted_a = FheUint32::try_encrypt(clear_a, &client_key)?;
-    let encrypted_b = FheUint32::try_encrypt(clear_b, &client_key)?;
-
-    // FheUint8: Encrypted equivalent to u8
-    let encrypted_c = FheUint8::try_encrypt(clear_c, &client_key)?;
-
-    // On the server side:
-    set_server_key(server_keys);
-
-    // Clear equivalent computations: 1344 * 5 = 6720
-    let encrypted_res_mul = &encrypted_a * &encrypted_b;
-
-    // Clear equivalent computations: 6720 >> 5 = 210
-    encrypted_a = &encrypted_res_mul >> &encrypted_b;
-
-    // Clear equivalent computations: let casted_a = a as u8;
-    let casted_a: FheUint8 = encrypted_a.cast_into();
-
-    // Clear equivalent computations: min(210, 7) = 7
-    let encrypted_res_min = &casted_a.min(&encrypted_c);
-
-    // Operation between clear and encrypted data:
-    // Clear equivalent computations: 7 & 1 = 1
-    let encrypted_res = encrypted_res_min & 1_u8;
-
-    // Decrypting on the client side:
-    let clear_res: u8 = encrypted_res.decrypt(&client_key);
-    assert_eq!(clear_res, 1_u8);
-
-    Ok(())
-}
-```
-
-To run this code, use the following command: 
-<p align="center"> <code> cargo run --release </code> </p>
-
-> [!Note]
-> Note that when running code that uses `TFHE-rs`, it is highly recommended
-to run in release mode with cargo's `--release` flag to have the best performances possible.
-
-*Find an example with more explanations in [this part of the documentation](https://docs.zama.ai/tfhe-rs/getting-started/quick_start)*
-
-<p align="right">
-  <a href="#about" > ↑ Back to top </a> 
-</p>
+Note that the number in ``` [(\&str, Parameters); 1] ``` should correspond to the number of tested parameters, e.g., if another parameter sets is uncommented, this line becomes:  ``` [(\&str, Parameters); 2] ```.
+The parameter ```PARAM_X``` corresponds to the parameters used in **Table 5**, and ```PARAM_TCHES_X``` corresponds to the parameters used in **Table 7**.



-## Resources 

-### TFHE deep dive
- [TFHE Deep Dive - Part I - Ciphertext types](https://www.zama.ai/post/tfhe-deep-dive-part-1)
- [TFHE Deep Dive - Part II - Encodings and linear leveled operations](https://www.zama.ai/post/tfhe-deep-dive-part-2)
- [TFHE Deep Dive - Part III - Key switching and leveled multiplications](https://www.zama.ai/post/tfhe-deep-dive-part-3)
- [TFHE Deep Dive - Part IV - Programmable Bootstrapping](https://www.zama.ai/post/tfhe-deep-dive-part-4)
-<br></br>
-
-### Tutorials
- [[Video tutorial] Implement signed integers using TFHE-rs ](https://www.zama.ai/post/video-tutorial-implement-signed-integers-ssing-tfhe-rs)
- [Homomorphic parity bit](https://docs.zama.ai/tfhe-rs/tutorials/parity_bit)
- [Homomorphic case changing on Ascii string](https://docs.zama.ai/tfhe-rs/tutorials/ascii_fhe_string)
- [Boolean SHA256 with TFHE-rs](https://www.zama.ai/post/boolean-sha256-tfhe-rs)
- [Dark market with TFHE-rs](https://www.zama.ai/post/dark-market-tfhe-rs)
- [Regular expression engine with TFHE-rs](https://www.zama.ai/post/regex-engine-tfhe-rs)
-
-*Explore more useful resources in [TFHE-rs tutorials](https://docs.zama.ai/tfhe-rs/tutorials) and [Awesome Zama repo](https://github.com/zama-ai/awesome-zama)*
-<br></br>
-### Documentation
-
-Full, comprehensive documentation is available here: [https://docs.zama.ai/tfhe-rs](https://docs.zama.ai/tfhe-rs).
-<p align="right">
-  <a href="#about" > ↑ Back to top </a> 
-</p>
-
-
-## Working with TFHE-rs
-
-### Disclaimers
-
-#### Security Estimation
-
-Security estimations are done using the
-[Lattice Estimator](https://github.com/malb/lattice-estimator)
-with `red_cost_model = reduction.RC.BDGL16`.
-
-When a new update is published in the Lattice Estimator, we update parameters accordingly.
-
-### Security Model
-
-The default parameters for the TFHE-rs library are chosen considering the IND-CPA security model, and are selected with a bootstrapping failure probability fixed at p_error = $2^{-40}$. In particular, it is assumed that the results of decrypted computations are not shared by the secret key owner with any third parties, as such an action can lead to leakage of the secret encryption key. If you are designing an application where decryptions must be shared, you will need to craft custom encryption parameters which are chosen in consideration of the IND-CPA^D security model [1]. 
-
-[1] Li, Baiyu, et al. "Securing approximate homomorphic encryption using differential privacy." Annual International Cryptology Conference. Cham: Springer Nature Switzerland, 2022. https://eprint.iacr.org/2022/816.pdf
-
-#### Side-Channel Attacks
-
-Mitigation for side-channel attacks has not yet been implemented in TFHE-rs,
-and will be released in upcoming versions.
-<br></br>
-
-### Citations
-To cite TFHE-rs in academic papers, please use the following entry:
-
-```text
-@Misc{TFHE-rs,
-  title={{TFHE-rs: A Pure Rust Implementation of the TFHE Scheme for Boolean and Integer Arithmetics Over Encrypted Data}},
-  author={Zama},
-  year={2022},
-  note={\url{https://github.com/zama-ai/tfhe-rs}},
-}
-```
-
-### Contributing
-
-There are two ways to contribute to TFHE-rs:
-
- [Open issues](https://github.com/zama-ai/tfhe-rs/issues/new/choose) to report bugs and typos, or to suggest new ideas
- Request to become an official contributor by emailing [hello@zama.ai](mailto:hello@zama.ai).
-
-Becoming an approved contributor involves signing our Contributor License Agreement (CLA). Only approved contributors can send pull requests, so please make sure to get in touch before you do!
-<br></br>
-
-### License
-This software is distributed under the **BSD-3-Clause-Clear** license. If you have any questions, please contact us at hello@zama.ai.
-<p align="right">
-  <a href="#about" > ↑ Back to top </a> 
-</p>
-
-
-## Support
-
-<a target="_blank" href="https://community.zama.ai">
-<picture>
-  <source media="(prefers-color-scheme: dark)" srcset="https://github.com/zama-ai/tfhe-rs/assets/157474013/08656d0a-3f44-4126-b8b6-8c601dff5380">
-  <source media="(prefers-color-scheme: light)" srcset="https://github.com/zama-ai/tfhe-rs/assets/157474013/1c9c9308-50ac-4aab-a4b9-469bb8c536a4">
-  <img alt="Support">
-</picture>
-</a>
-
-🌟 If you find this project helpful or interesting, please consider giving it a star on GitHub! Your support helps to grow the community and motivates further development. 
-
-<p align="right">
-  <a href="#about" > ↑ Back to top </a> 
-</p>
--- a/apps/trivium/README.md
+++ b/apps/trivium/README.md
@@ -15,6 +15,7 @@ Example of a Rust main below:
 ```rust
 use tfhe::{ConfigBuilder, generate_keys, FheBool};
 use tfhe::prelude::*;
+
 use tfhe_trivium::TriviumStream;

 fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
@@ -138,8 +139,10 @@ Example code:
 ```rust
 use tfhe::shortint::prelude::*;
 use tfhe::shortint::CastingKey;
+
 use tfhe::{ConfigBuilder, generate_keys, FheUint64};
 use tfhe::prelude::*;
+
 use tfhe_trivium::TriviumStreamShortint;

 fn test_shortint() {
--- a/apps/trivium/benches/kreyvium_bool.rs
+++ b/apps/trivium/benches/kreyvium_bool.rs
@@ -1,8 +1,10 @@
-use criterion::Criterion;
 use tfhe::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheBool};
+
 use tfhe_trivium::KreyviumStream;

+use criterion::Criterion;
+
 pub fn kreyvium_bool_gen(c: &mut Criterion) {
    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);
--- a/apps/trivium/benches/kreyvium_byte.rs
+++ b/apps/trivium/benches/kreyvium_byte.rs
@@ -1,8 +1,10 @@
-use criterion::Criterion;
 use tfhe::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64, FheUint8};
+
 use tfhe_trivium::{KreyviumStreamByte, TransCiphering};

+use criterion::Criterion;
+
 pub fn kreyvium_byte_gen(c: &mut Criterion) {
    let config = ConfigBuilder::default()
        .enable_function_evaluation()
--- a/apps/trivium/benches/kreyvium_shortint.rs
+++ b/apps/trivium/benches/kreyvium_shortint.rs
@@ -1,9 +1,12 @@
-use criterion::Criterion;
 use tfhe::prelude::*;
 use tfhe::shortint::prelude::*;
+use tfhe::shortint::KeySwitchingKey;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
+
 use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};

+use criterion::Criterion;
+
 pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
--- a/apps/trivium/benches/trivium_bool.rs
+++ b/apps/trivium/benches/trivium_bool.rs
@@ -1,8 +1,10 @@
-use criterion::Criterion;
 use tfhe::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheBool};
+
 use tfhe_trivium::TriviumStream;

+use criterion::Criterion;
+
 pub fn trivium_bool_gen(c: &mut Criterion) {
    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);
--- a/apps/trivium/benches/trivium_byte.rs
+++ b/apps/trivium/benches/trivium_byte.rs
@@ -1,8 +1,10 @@
-use criterion::Criterion;
 use tfhe::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64, FheUint8};
+
 use tfhe_trivium::{TransCiphering, TriviumStreamByte};

+use criterion::Criterion;
+
 pub fn trivium_byte_gen(c: &mut Criterion) {
    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);
--- a/apps/trivium/benches/trivium_shortint.rs
+++ b/apps/trivium/benches/trivium_shortint.rs
@@ -1,9 +1,12 @@
-use criterion::Criterion;
 use tfhe::prelude::*;
 use tfhe::shortint::prelude::*;
+use tfhe::shortint::KeySwitchingKey;
 use tfhe::{generate_keys, ConfigBuilder, FheUint64};
+
 use tfhe_trivium::{TransCiphering, TriviumStreamShortint};

+use criterion::Criterion;
+
 pub fn trivium_shortint_warmup(c: &mut Criterion) {
    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
--- a/apps/trivium/src/kreyvium/kreyvium.rs
+++ b/apps/trivium/src/kreyvium/kreyvium.rs
@@ -2,10 +2,12 @@
 //! for the representation of the inner bits.

 use crate::static_deque::StaticDeque;
-use rayon::prelude::*;
+
 use tfhe::prelude::*;
 use tfhe::{set_server_key, unset_server_key, FheBool, ServerKey};

+use rayon::prelude::*;
+
 /// Internal trait specifying which operations are necessary for KreyviumStream generic type
 pub trait KreyviumBoolInput<OpOutput>:
    Sized
--- a/apps/trivium/src/kreyvium/kreyvium_byte.rs
+++ b/apps/trivium/src/kreyvium/kreyvium_byte.rs
@@ -2,10 +2,12 @@
 //! for the representation of the inner bits.

 use crate::static_deque::{StaticByteDeque, StaticByteDequeInput};
-use rayon::prelude::*;
+
 use tfhe::prelude::*;
 use tfhe::{set_server_key, unset_server_key, FheUint8, ServerKey};

+use rayon::prelude::*;
+
 /// Internal trait specifying which operations are necessary for KreyviumStreamByte generic type
 pub trait KreyviumByteInput<OpOutput>:
    Sized
--- a/apps/trivium/src/kreyvium/kreyvium_shortint.rs
+++ b/apps/trivium/src/kreyvium/kreyvium_shortint.rs
@@ -1,7 +1,9 @@
 use crate::static_deque::StaticDeque;
-use rayon::prelude::*;
+
 use tfhe::shortint::prelude::*;

+use rayon::prelude::*;
+
 /// KreyviumStreamShortint: a struct implementing the Kreyvium stream cipher, using a generic
 /// Ciphertext for the internal representation of bits (intended to represent a single bit). To be
 /// able to compute FHE operations, it also owns a ServerKey.
@@ -34,7 +36,7 @@ impl KreyviumStreamShortint {
        let mut c_register: [Ciphertext; 111] = [0; 111].map(|x| sk.create_trivial(x));

        for i in 0..93 {
-            a_register[i].clone_from(&key[128 - 93 + i]);
+            a_register[i] = key[128 - 93 + i].clone();
        }
        for i in 0..84 {
            b_register[i] = sk.create_trivial(iv[128 - 84 + i]);
--- a/apps/trivium/src/kreyvium/test.rs
+++ b/apps/trivium/src/kreyvium/test.rs
@@ -1,7 +1,8 @@
-use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
 use tfhe::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};

+use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
+
 // Values for these tests come from the github repo renaud1239/Kreyvium,
 // commit fd6828f68711276c25f55e605935028f5e843f43

--- a/apps/trivium/src/static_deque/mod.rs
+++ b/apps/trivium/src/static_deque/mod.rs
@@ -1,6 +1,5 @@
 #[allow(clippy::module_inception)]
 mod static_deque;
 pub use static_deque::StaticDeque;
-
 mod static_byte_deque;
 pub use static_byte_deque::{StaticByteDeque, StaticByteDequeInput};
--- a/apps/trivium/src/static_deque/static_byte_deque.rs
+++ b/apps/trivium/src/static_deque/static_byte_deque.rs
@@ -4,6 +4,7 @@
 //! This is pretending to store bits, and allows accessing bits in chunks of 8 consecutive.

 use crate::static_deque::StaticDeque;
+
 use tfhe::FheUint8;

 /// Internal trait specifying which operations are needed by StaticByteDeque
--- a/apps/trivium/src/trans_ciphering/mod.rs
+++ b/apps/trivium/src/trans_ciphering/mod.rs
@@ -2,11 +2,13 @@
 //! when trans ciphering is available to them.

 use crate::{KreyviumStreamByte, KreyviumStreamShortint, TriviumStreamByte, TriviumStreamShortint};
-use rayon::prelude::*;
-use tfhe::prelude::*;
 use tfhe::shortint::Ciphertext;
+
+use tfhe::prelude::*;
 use tfhe::{set_server_key, unset_server_key, FheUint64, FheUint8, ServerKey};

+use rayon::prelude::*;
+
 /// Triat specifying the interface for trans ciphering a FheUint64 object. Since it is meant
 /// to be used with stream ciphers, encryption and decryption are by default the same.
 pub trait TransCiphering {
--- a/apps/trivium/src/trivium/test.rs
+++ b/apps/trivium/src/trivium/test.rs
@@ -1,7 +1,8 @@
-use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
 use tfhe::prelude::*;
 use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};

+use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
+
 // Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
 // file testvectors/trivium-80.80.test-vectors

--- a/apps/trivium/src/trivium/trivium_bool.rs
+++ b/apps/trivium/src/trivium/trivium_bool.rs
@@ -2,10 +2,12 @@
 //! for the representation of the inner bits.

 use crate::static_deque::StaticDeque;
-use rayon::prelude::*;
+
 use tfhe::prelude::*;
 use tfhe::{set_server_key, unset_server_key, FheBool, ServerKey};

+use rayon::prelude::*;
+
 /// Internal trait specifying which operations are necessary for TriviumStream generic type
 pub trait TriviumBoolInput<OpOutput>:
    Sized
--- a/apps/trivium/src/trivium/trivium_byte.rs
+++ b/apps/trivium/src/trivium/trivium_byte.rs
@@ -2,10 +2,12 @@
 //! for the representation of the inner bits.

 use crate::static_deque::{StaticByteDeque, StaticByteDequeInput};
-use rayon::prelude::*;
+
 use tfhe::prelude::*;
 use tfhe::{set_server_key, unset_server_key, FheUint8, ServerKey};

+use rayon::prelude::*;
+
 /// Internal trait specifying which operations are necessary for TriviumStreamByte generic type
 pub trait TriviumByteInput<OpOutput>:
    Sized
--- a/apps/trivium/src/trivium/trivium_shortint.rs
+++ b/apps/trivium/src/trivium/trivium_shortint.rs
@@ -1,7 +1,9 @@
 use crate::static_deque::StaticDeque;
-use rayon::prelude::*;
+
 use tfhe::shortint::prelude::*;

+use rayon::prelude::*;
+
 /// TriviumStreamShortint: a struct implementing the Trivium stream cipher, using a generic
 /// Ciphertext for the internal representation of bits (intended to represent a single bit). To be
 /// able to compute FHE operations, it also owns a ServerKey.
@@ -32,7 +34,7 @@ impl TriviumStreamShortint {
        let mut c_register: [Ciphertext; 111] = [0; 111].map(|x| sk.create_trivial(x));

        for i in 0..80 {
-            a_register[93 - 80 + i].clone_from(&key[i]);
+            a_register[93 - 80 + i] = key[i].clone();
            b_register[84 - 80 + i] = sk.create_trivial(iv[i]);
        }

--- a/backends/tfhe-cuda-backend/Cargo.toml
+++ b/backends/tfhe-cuda-backend/Cargo.toml
@@ -1,18 +0,0 @@
-[package]
-name = "tfhe-cuda-backend"
-version = "0.2.0"
-edition = "2021"
-authors = ["Zama team"]
-license = "BSD-3-Clause-Clear"
-description = "Cuda implementation of TFHE-rs primitives."
-homepage = "https://www.zama.ai/"
-documentation = "https://docs.zama.ai/tfhe-rs"
-repository = "https://github.com/zama-ai/tfhe-rs"
-readme = "README.md"
-keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
-
-[build-dependencies]
-cmake = { version = "0.1" }
-
-[dependencies]
-thiserror = "1.0"
--- a/backends/tfhe-cuda-backend/LICENSE
+++ b/backends/tfhe-cuda-backend/LICENSE
@@ -1,28 +0,0 @@
-BSD 3-Clause Clear License
-
-Copyright © 2024 ZAMA.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice, this
-list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice, this
-list of conditions and the following disclaimer in the documentation and/or other
-materials provided with the distribution.
-
-3. Neither the name of ZAMA nor the names of its contributors may be used to endorse
-or promote products derived from this software without specific prior written permission.
-
-NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE.
-THIS SOFTWARE IS PROVIDED BY THE ZAMA AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
-IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
-ZAMA OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
-OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/backends/tfhe-cuda-backend/README.md
+++ b/backends/tfhe-cuda-backend/README.md
@@ -1,52 +0,0 @@
-# TFHE Cuda backend
-
-## Introduction
-
-The `tfhe-cuda-backend` holds the code for GPU acceleration of Zama's variant of TFHE.
-It implements CUDA/C++ functions to perform homomorphic operations on LWE ciphertexts.
-
-It provides functions to allocate memory on the GPU, to copy data back 
-and forth between the CPU and the GPU, to create and destroy Cuda streams, etc.:
- `cuda_create_stream`, `cuda_destroy_stream`
- `cuda_malloc`, `cuda_check_valid_malloc`
- `cuda_memcpy_async_to_cpu`, `cuda_memcpy_async_to_gpu`
- `cuda_get_number_of_gpus`
- `cuda_synchronize_device`
-The cryptographic operations it provides are:
- an amortized implementation of the TFHE programmable bootstrap: `cuda_bootstrap_amortized_lwe_ciphertext_vector_32` and `cuda_bootstrap_amortized_lwe_ciphertext_vector_64`
- a low latency implementation of the TFHE programmable bootstrap: `cuda_bootstrap_low latency_lwe_ciphertext_vector_32` and `cuda_bootstrap_low_latency_lwe_ciphertext_vector_64`
- the keyswitch: `cuda_keyswitch_lwe_ciphertext_vector_32` and `cuda_keyswitch_lwe_ciphertext_vector_64`
- the larger precision programmable bootstrap (wop PBS, which supports up to 16 bits of message while the classical PBS only supports up to 8 bits of message) and its sub-components: `cuda_wop_pbs_64`, `cuda_extract_bits_64`, `cuda_circuit_bootstrap_64`, `cuda_cmux_tree_64`, `cuda_blind_rotation_sample_extraction_64`
- acceleration for leveled operations: `cuda_negate_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_plaintext_vector_64`, `cuda_mult_lwe_ciphertext_vector_cleartext_vector`.
-
-## Dependencies
-
-**Disclaimer**: Compilation on Windows/Mac is not supported yet. Only Nvidia GPUs are supported. 
-
- nvidia driver - for example, if you're running Ubuntu 20.04 check this [page](https://linuxconfig.org/how-to-install-the-nvidia-drivers-on-ubuntu-20-04-focal-fossa-linux) for installation
- [nvcc](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) >= 10.0
- [gcc](https://gcc.gnu.org/) >= 8.0 - check this [page](https://gist.github.com/ax3l/9489132) for more details about nvcc/gcc compatible versions
- [cmake](https://cmake.org/) >= 3.24
-
-## Build
-
-The Cuda project held in `tfhe-cuda-backend` can be compiled independently from TFHE-rs in the following way:
-```
-git clone git@github.com:zama-ai/tfhe-rs
-cd backends/tfhe-cuda-backend/cuda
-mkdir build
-cd build
-cmake ..
-make
-```
-The compute capability is detected automatically (with the first GPU information) and set accordingly.
-If your machine does not have an available Nvidia GPU, the compilation will work if you have the nvcc compiler installed. The generated executable will target a 7.0 compute capability (sm_70).
-
-## Links
-
- [TFHE](https://eprint.iacr.org/2018/421.pdf)
-
-## License
-
-This software is distributed under the BSD-3-Clause-Clear license. If you have any questions,
-please contact us at `hello@zama.ai`.
--- a/backends/tfhe-cuda-backend/build.rs
+++ b/backends/tfhe-cuda-backend/build.rs
@@ -1,34 +0,0 @@
-use std::env;
-use std::process::Command;
-
-fn main() {
-    if let Ok(val) = env::var("DOCS_RS") {
-        if val.parse::<u32>() == Ok(1) {
-            return;
-        }
-    }
-
-    println!("Build tfhe-cuda-backend");
-    if env::consts::OS == "linux" {
-        let output = Command::new("./get_os_name.sh").output().unwrap();
-        let distribution = String::from_utf8(output.stdout).unwrap();
-        if distribution != "Ubuntu\n" {
-            println!(
-                "cargo:warning=This Linux distribution is not officially supported. \
-                Only Ubuntu is supported by tfhe-cuda-backend at this time. Build may fail\n"
-            );
-        }
-        let dest = cmake::build("cuda");
-        println!("cargo:rustc-link-search=native={}", dest.display());
-        println!("cargo:rustc-link-lib=static=tfhe_cuda_backend");
-        println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
-        println!("cargo:rustc-link-lib=gomp");
-        println!("cargo:rustc-link-lib=cudart");
-        println!("cargo:rustc-link-search=native=/usr/lib/x86_64-linux-gnu/");
-        println!("cargo:rustc-link-lib=stdc++");
-    } else {
-        panic!(
-            "Error: platform not supported, tfhe-cuda-backend not built (only Linux is supported)"
-        );
-    }
-}
--- a/backends/tfhe-cuda-backend/cuda/.cmake-format-config.py
+++ b/backends/tfhe-cuda-backend/cuda/.cmake-format-config.py
@@ -1,10 +0,0 @@
-# -----------------------------
-# Options effecting formatting.
-# -----------------------------
-with section("format"):
-
-  # How wide to allow formatted cmake files
-  line_width = 120
-  
-  # How many spaces to tab for indent
-  tab_size = 2
--- a/backends/tfhe-cuda-backend/cuda/.gitignore
+++ b/backends/tfhe-cuda-backend/cuda/.gitignore
@@ -1,2 +0,0 @@
-/build/
-include/cuda_config.h
--- a/backends/tfhe-cuda-backend/cuda/CMakeLists.txt
+++ b/backends/tfhe-cuda-backend/cuda/CMakeLists.txt
@@ -1,96 +0,0 @@
-cmake_minimum_required(VERSION 3.24 FATAL_ERROR)
-project(tfhe_cuda_backend LANGUAGES CXX)
-
-# See if the minimum CUDA version is available. If not, only enable documentation building.
-set(MINIMUM_SUPPORTED_CUDA_VERSION 10.0)
-include(CheckLanguage)
-# See if CUDA is available
-check_language(CUDA)
-# If so, enable CUDA to check the version.
-if(CMAKE_CUDA_COMPILER)
-  enable_language(CUDA)
-endif()
-# If CUDA is not available, or the minimum version is too low do not build
-if(NOT CMAKE_CUDA_COMPILER)
-  message(FATAL_ERROR "Cuda compiler not found.")
-endif()
-
-if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS ${MINIMUM_SUPPORTED_CUDA_VERSION})
-  message(FATAL_ERROR "CUDA ${MINIMUM_SUPPORTED_CUDA_VERSION} or greater is required for compilation.")
-endif()
-# Get CUDA compute capability
-set(OUTPUTFILE ${CMAKE_CURRENT_SOURCE_DIR}/cuda_script) # No suffix required
-set(CUDAFILE ${CMAKE_CURRENT_SOURCE_DIR}/check_cuda.cu)
-execute_process(COMMAND nvcc -lcuda ${CUDAFILE} -o ${OUTPUTFILE})
-execute_process(
-  COMMAND ${OUTPUTFILE}
-  RESULT_VARIABLE CUDA_RETURN_CODE
-  OUTPUT_VARIABLE ARCH)
-file(REMOVE ${OUTPUTFILE})
-
-if(${CUDA_RETURN_CODE} EQUAL 0)
-  set(CUDA_SUCCESS "TRUE")
-else()
-  set(CUDA_SUCCESS "FALSE")
-endif()
-
-if(${CUDA_SUCCESS})
-  message(STATUS "CUDA Architecture: ${ARCH}")
-  message(STATUS "CUDA Version: ${CUDA_VERSION_STRING}")
-  message(STATUS "CUDA Path: ${CUDA_TOOLKIT_ROOT_DIR}")
-  message(STATUS "CUDA Libraries: ${CUDA_LIBRARIES}")
-  message(STATUS "CUDA Performance Primitives: ${CUDA_npp_LIBRARY}")
-else()
-  message(WARNING ${ARCH})
-endif()
-
-if(NOT CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE Release)
-endif()
-
-# Add OpenMP support
-find_package(OpenMP REQUIRED)
-
-set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -g")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler ${OpenMP_CXX_FLAGS}")
-if(${CUDA_SUCCESS})
-  set(CMAKE_CUDA_ARCHITECTURES native)
-  string(REPLACE "-arch=sm_" "" CUDA_ARCH "${ARCH}")
-  set(CUDA_ARCH "${CUDA_ARCH}0")
-else()
-  set(CMAKE_CUDA_ARCHITECTURES 70)
-  set(CUDA_ARCH "700")
-endif()
-
-add_compile_definitions(CUDA_ARCH=${CUDA_ARCH})
-
-# in production, should use -arch=sm_70 --ptxas-options=-v to see register spills -lineinfo for better debugging
-set(CMAKE_CUDA_FLAGS
-    "${CMAKE_CUDA_FLAGS} -ccbin ${CMAKE_CXX_COMPILER} -O3 \
-  -std=c++17 --no-exceptions  --expt-relaxed-constexpr -rdc=true \
-  --use_fast_math -Xcompiler -fPIC")
-
-set(INCLUDE_DIR include)
-
-add_subdirectory(src)
-enable_testing()
-add_subdirectory(tests_and_benchmarks)
-target_include_directories(tfhe_cuda_backend PRIVATE ${INCLUDE_DIR})
-
-# This is required for rust cargo build
-install(TARGETS tfhe_cuda_backend DESTINATION .)
-
-install(TARGETS tfhe_cuda_backend DESTINATION lib)
-
-# Define a function to add a lint target.
-find_file(CPPLINT NAMES cpplint cpplint.exe)
-if(CPPLINT)
-  # Add a custom target to lint all child projects. Dependencies are specified in child projects.
-  add_custom_target(all_lint)
-  # Don't trigger this target on ALL_BUILD or Visual Studio 'Rebuild Solution'
-  set_target_properties(all_lint PROPERTIES EXCLUDE_FROM_ALL TRUE)
-  # set_target_properties(all_lint PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE)
-endif()
--- a/backends/tfhe-cuda-backend/cuda/CPPLINT.cfg
+++ b/backends/tfhe-cuda-backend/cuda/CPPLINT.cfg
@@ -1,3 +0,0 @@
-set noparent 
-linelength=240
-filter=-legal/copyright,-readability/todo,-runtime/references,-build/c++17
--- a/backends/tfhe-cuda-backend/cuda/check_cuda.cu
+++ b/backends/tfhe-cuda-backend/cuda/check_cuda.cu
@@ -1,22 +0,0 @@
-#include <stdio.h>
-
-int main(int argc, char **argv) {
-  cudaDeviceProp dP;
-  float min_cc = 3.0;
-
-  int rc = cudaGetDeviceProperties(&dP, 0);
-  if (rc != cudaSuccess) {
-    cudaError_t error = cudaGetLastError();
-    printf("CUDA error: %s", cudaGetErrorString(error));
-    return rc; /* Failure */
-  }
-  if ((dP.major + (dP.minor / 10)) < min_cc) {
-    printf("Min Compute Capability of %2.1f required:  %d.%d found\n Not "
-           "Building CUDA Code",
-           min_cc, dP.major, dP.minor);
-    return 1; /* Failure */
-  } else {
-    printf("-arch=sm_%d%d", dP.major, dP.minor);
-    return 0; /* Success */
-  }
-}
--- a/backends/tfhe-cuda-backend/cuda/format_tfhe_cuda_backend.sh
+++ b/backends/tfhe-cuda-backend/cuda/format_tfhe_cuda_backend.sh
@@ -1,19 +0,0 @@
-#!/bin/bash
-
-set -e
-
-while getopts ":c" option; do
-  case $option in
-    c)
-      # code to execute when flag1 is provided
-      find ./{include,src,tests_and_benchmarks/include,tests_and_benchmarks/tests,tests_and_benchmarks/benchmarks} -iregex '^.*\.\(cpp\|cu\|h\|cuh\)$' -print | xargs clang-format-15 -i -style='file' --dry-run --Werror
-      cmake-format -i CMakeLists.txt -c .cmake-format-config.py
-      find ./{include,src,tests_and_benchmarks/include,tests_and_benchmarks/tests,tests_and_benchmarks/benchmarks} -type f -name "CMakeLists.txt" | xargs -I % sh -c 'cmake-format -i % -c .cmake-format-config.py'
-      git diff --exit-code
-      exit
-      ;;
-  esac
-done
-find ./{include,src,tests_and_benchmarks/include,tests_and_benchmarks/tests,tests_and_benchmarks/benchmarks} -iregex '^.*\.\(cpp\|cu\|h\|cuh\)$' -print | xargs clang-format-15 -i -style='file'
-cmake-format -i CMakeLists.txt -c .cmake-format-config.py
-find ./{include,src,tests_and_benchmarks/include,tests_and_benchmarks/tests,tests_and_benchmarks/benchmarks} -type f -name "CMakeLists.txt" | xargs -I % sh -c 'cmake-format -i % -c .cmake-format-config.py'
--- a/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
+++ b/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
@@ -1,18 +0,0 @@
-#ifndef CUDA_CIPHERTEXT_H
-#define CUDA_CIPHERTEXT_H
-
-#include <cstdint>
-
-extern "C" {
-void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *dest, void *src,
-                                                  void *v_stream,
-                                                  uint32_t gpu_index,
-                                                  uint32_t number_of_cts,
-                                                  uint32_t lwe_dimension);
-void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *dest, void *src,
-                                                  void *v_stream,
-                                                  uint32_t gpu_index,
-                                                  uint32_t number_of_cts,
-                                                  uint32_t lwe_dimension);
-};
-#endif
--- a/backends/tfhe-cuda-backend/cuda/include/device.h
+++ b/backends/tfhe-cuda-backend/cuda/include/device.h
@@ -1,94 +0,0 @@
-#ifndef DEVICE_H
-#define DEVICE_H
-
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <cuda_runtime.h>
-
-#define synchronize_threads_in_block() __syncthreads()
-
-extern "C" {
-
-#define check_cuda_error(ans)                                                  \
-  { cuda_error((ans), __FILE__, __LINE__); }
-inline void cuda_error(cudaError_t code, const char *file, int line) {
-  if (code != cudaSuccess) {
-    std::fprintf(stderr, "Cuda error: %s %s %d\n", cudaGetErrorString(code),
-                 file, line);
-    std::abort();
-  }
-}
-#define PANIC(format, ...)                                                     \
-  {                                                                            \
-    std::fprintf(stderr, "%s::%d::%s: panic.\n" format "\n", __FILE__,         \
-                 __LINE__, __func__, ##__VA_ARGS__);                           \
-    std::abort();                                                              \
-  }
-
-struct cuda_stream_t {
-  cudaStream_t stream;
-  uint32_t gpu_index;
-
-  cuda_stream_t(uint32_t gpu_index) {
-    this->gpu_index = gpu_index;
-
-    check_cuda_error(cudaStreamCreate(&stream));
-  }
-
-  void release() {
-    check_cuda_error(cudaSetDevice(gpu_index));
-    check_cuda_error(cudaStreamDestroy(stream));
-  }
-
-  void synchronize() { check_cuda_error(cudaStreamSynchronize(stream)); }
-};
-
-cuda_stream_t *cuda_create_stream(uint32_t gpu_index);
-
-void cuda_destroy_stream(cuda_stream_t *stream);
-
-void *cuda_malloc(uint64_t size, uint32_t gpu_index);
-
-void *cuda_malloc_async(uint64_t size, cuda_stream_t *stream);
-
-void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
-
-bool cuda_check_support_cooperative_groups();
-
-void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
-                              cuda_stream_t *stream);
-
-void cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size,
-                                  cuda_stream_t *stream);
-
-void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
-                              cuda_stream_t *stream);
-
-void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
-                       cuda_stream_t *stream);
-
-int cuda_get_number_of_gpus();
-
-void cuda_synchronize_device(uint32_t gpu_index);
-
-void cuda_drop(void *ptr, uint32_t gpu_index);
-
-void cuda_drop_async(void *ptr, cuda_stream_t *stream);
-
-int cuda_get_max_shared_memory(uint32_t gpu_index);
-
-void cuda_synchronize_stream(cuda_stream_t *stream);
-
-void cuda_stream_add_callback(cuda_stream_t *stream,
-                              cudaStreamCallback_t callback, void *user_data);
-
-void host_free_on_stream_callback(cudaStream_t stream, cudaError_t status,
-                                  void *host_pointer);
-}
-
-template <typename Torus>
-void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value,
-                          Torus n);
-#endif
--- a/backends/tfhe-cuda-backend/cuda/include/helper_debug.cuh
+++ b/backends/tfhe-cuda-backend/cuda/include/helper_debug.cuh
@@ -1,100 +0,0 @@
-#include "cuComplex.h"
-#include "thrust/complex.h"
-#include <iostream>
-#include <string>
-#include <type_traits>
-
-#define PRINT_VARS
-#ifdef PRINT_VARS
-#define PRINT_DEBUG_5(var, begin, end, step, cond)                             \
-  _print_debug(var, #var, begin, end, step, cond, "", false)
-#define PRINT_DEBUG_6(var, begin, end, step, cond, text)                       \
-  _print_debug(var, #var, begin, end, step, cond, text, true)
-#define CAT(A, B) A##B
-#define PRINT_SELECT(NAME, NUM) CAT(NAME##_, NUM)
-#define GET_COUNT(_1, _2, _3, _4, _5, _6, COUNT, ...) COUNT
-#define VA_SIZE(...) GET_COUNT(__VA_ARGS__, 6, 5, 4, 3, 2, 1)
-#define PRINT_DEBUG(...)                                                       \
-  PRINT_SELECT(PRINT_DEBUG, VA_SIZE(__VA_ARGS__))(__VA_ARGS__)
-#else
-#define PRINT_DEBUG(...)
-#endif
-
-template <typename T>
-__device__ typename std::enable_if<std::is_unsigned<T>::value, void>::type
-_print_debug(T *var, const char *var_name, int start, int end, int step,
-             bool cond, const char *text, bool has_text) {
-  __syncthreads();
-  if (cond) {
-    if (has_text)
-      printf("%s\n", text);
-    for (int i = start; i < end; i += step) {
-      printf("%s[%u]: %u\n", var_name, i, var[i]);
-    }
-  }
-  __syncthreads();
-}
-
-template <typename T>
-__device__ typename std::enable_if<std::is_signed<T>::value, void>::type
-_print_debug(T *var, const char *var_name, int start, int end, int step,
-             bool cond, const char *text, bool has_text) {
-  __syncthreads();
-  if (cond) {
-    if (has_text)
-      printf("%s\n", text);
-    for (int i = start; i < end; i += step) {
-      printf("%s[%u]: %d\n", var_name, i, var[i]);
-    }
-  }
-  __syncthreads();
-}
-
-template <typename T>
-__device__ typename std::enable_if<std::is_floating_point<T>::value, void>::type
-_print_debug(T *var, const char *var_name, int start, int end, int step,
-             bool cond, const char *text, bool has_text) {
-  __syncthreads();
-  if (cond) {
-    if (has_text)
-      printf("%s\n", text);
-    for (int i = start; i < end; i += step) {
-      printf("%s[%u]: %.15f\n", var_name, i, var[i]);
-    }
-  }
-  __syncthreads();
-}
-
-template <typename T>
-__device__
-    typename std::enable_if<std::is_same<T, thrust::complex<double>>::value,
-                            void>::type
-    _print_debug(T *var, const char *var_name, int start, int end, int step,
-                 bool cond, const char *text, bool has_text) {
-  __syncthreads();
-  if (cond) {
-    if (has_text)
-      printf("%s\n", text);
-    for (int i = start; i < end; i += step) {
-      printf("%s[%u]: %.15f , %.15f\n", var_name, i, var[i].real(),
-             var[i].imag());
-    }
-  }
-  __syncthreads();
-}
-
-template <typename T>
-__device__
-    typename std::enable_if<std::is_same<T, cuDoubleComplex>::value, void>::type
-    _print_debug(T *var, const char *var_name, int start, int end, int step,
-                 bool cond, const char *text, bool has_text) {
-  __syncthreads();
-  if (cond) {
-    if (has_text)
-      printf("%s\n", text);
-    for (int i = start; i < end; i += step) {
-      printf("%s[%u]: %.15f , %.15f\n", var_name, i, var[i].x, var[i].y);
-    }
-  }
-  __syncthreads();
-}
--- a/backends/tfhe-cuda-backend/cuda/include/integer.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer.h
--- a/backends/tfhe-cuda-backend/cuda/include/keyswitch.h
+++ b/backends/tfhe-cuda-backend/cuda/include/keyswitch.h
@@ -1,21 +0,0 @@
-#ifndef CNCRT_KS_H_
-#define CNCRT_KS_H_
-
-#include <cstdint>
-
-extern "C" {
-
-void cuda_keyswitch_lwe_ciphertext_vector_32(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
-    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
-    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
-    uint32_t level_count, uint32_t num_samples);
-
-void cuda_keyswitch_lwe_ciphertext_vector_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
-    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
-    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
-    uint32_t level_count, uint32_t num_samples);
-}
-
-#endif // CNCRT_KS_H_
--- a/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
+++ b/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
@@ -1,50 +0,0 @@
-#ifndef CUDA_LINALG_H_
-#define CUDA_LINALG_H_
-
-#include "programmable_bootstrap.h"
-#include <cstdint>
-#include <device.h>
-
-extern "C" {
-
-void cuda_negate_lwe_ciphertext_vector_32(cuda_stream_t *stream,
-                                          void *lwe_array_out,
-                                          void *lwe_array_in,
-                                          uint32_t input_lwe_dimension,
-                                          uint32_t input_lwe_ciphertext_count);
-void cuda_negate_lwe_ciphertext_vector_64(cuda_stream_t *stream,
-                                          void *lwe_array_out,
-                                          void *lwe_array_in,
-                                          uint32_t input_lwe_dimension,
-                                          uint32_t input_lwe_ciphertext_count);
-void cuda_add_lwe_ciphertext_vector_32(cuda_stream_t *stream,
-                                       void *lwe_array_out,
-                                       void *lwe_array_in_1,
-                                       void *lwe_array_in_2,
-                                       uint32_t input_lwe_dimension,
-                                       uint32_t input_lwe_ciphertext_count);
-void cuda_add_lwe_ciphertext_vector_64(cuda_stream_t *stream,
-                                       void *lwe_array_out,
-                                       void *lwe_array_in_1,
-                                       void *lwe_array_in_2,
-                                       uint32_t input_lwe_dimension,
-                                       uint32_t input_lwe_ciphertext_count);
-void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
-    void *plaintext_array_in, uint32_t input_lwe_dimension,
-    uint32_t input_lwe_ciphertext_count);
-void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
-    void *plaintext_array_in, uint32_t input_lwe_dimension,
-    uint32_t input_lwe_ciphertext_count);
-void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
-    void *cleartext_array_in, uint32_t input_lwe_dimension,
-    uint32_t input_lwe_ciphertext_count);
-void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
-    void *cleartext_array_in, uint32_t input_lwe_dimension,
-    uint32_t input_lwe_ciphertext_count);
-}
-
-#endif // CUDA_LINALG_H_
--- a/backends/tfhe-cuda-backend/cuda/include/programmable_bootstrap.h
+++ b/backends/tfhe-cuda-backend/cuda/include/programmable_bootstrap.h
@@ -1,320 +0,0 @@
-#ifndef CUDA_BOOTSTRAP_H
-#define CUDA_BOOTSTRAP_H
-
-#include "device.h"
-#include <cstdint>
-
-enum PBS_TYPE { MULTI_BIT = 0, CLASSICAL = 1 };
-enum PBS_VARIANT { DEFAULT = 0, CG = 1 };
-
-extern "C" {
-void cuda_fourier_polynomial_mul(void *input1, void *input2, void *output,
-                                 cuda_stream_t *stream,
-                                 uint32_t polynomial_size,
-                                 uint32_t total_polynomials);
-
-void cuda_convert_lwe_programmable_bootstrap_key_32(
-    void *dest, void *src, cuda_stream_t *stream, uint32_t input_lwe_dim,
-    uint32_t glwe_dim, uint32_t level_count, uint32_t polynomial_size);
-
-void cuda_convert_lwe_programmable_bootstrap_key_64(
-    void *dest, void *src, cuda_stream_t *stream, uint32_t input_lwe_dim,
-    uint32_t glwe_dim, uint32_t level_count, uint32_t polynomial_size);
-
-void scratch_cuda_programmable_bootstrap_amortized_32(
-    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
-    uint32_t max_shared_memory, bool allocate_gpu_memory);
-
-void scratch_cuda_programmable_bootstrap_amortized_64(
-    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
-    uint32_t max_shared_memory, bool allocate_gpu_memory);
-
-void cuda_programmable_bootstrap_amortized_lwe_ciphertext_vector_32(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
-    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
-    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
-    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
-
-void cuda_programmable_bootstrap_amortized_lwe_ciphertext_vector_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
-    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
-    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
-    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
-
-void cleanup_cuda_programmable_bootstrap_amortized(cuda_stream_t *stream,
-                                                   int8_t **pbs_buffer);
-
-void scratch_cuda_programmable_bootstrap_32(
-    cuda_stream_t *stream, int8_t **buffer, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t level_count,
-    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
-    bool allocate_gpu_memory);
-
-void scratch_cuda_programmable_bootstrap_64(
-    cuda_stream_t *stream, int8_t **buffer, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t level_count,
-    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
-    bool allocate_gpu_memory);
-
-void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
-    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
-    void *lwe_input_indexes, void *bootstrapping_key, int8_t *buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
-    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
-
-void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
-    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
-    void *lwe_input_indexes, void *bootstrapping_key, int8_t *buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
-    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
-
-void cleanup_cuda_programmable_bootstrap(cuda_stream_t *stream,
-                                         int8_t **pbs_buffer);
-
-uint64_t get_buffer_size_programmable_bootstrap_amortized_64(
-    uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory);
-
-uint64_t get_buffer_size_programmable_bootstrap_64(
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory);
-}
-
-template <typename Torus>
-__host__ __device__ uint64_t
-get_buffer_size_full_sm_programmable_bootstrap_step_one(
-    uint32_t polynomial_size) {
-  return sizeof(Torus) * polynomial_size +      // accumulator_rotated
-         sizeof(double2) * polynomial_size / 2; // accumulator fft
-}
-template <typename Torus>
-__host__ __device__ uint64_t
-get_buffer_size_full_sm_programmable_bootstrap_step_two(
-    uint32_t polynomial_size) {
-  return sizeof(Torus) * polynomial_size +      // accumulator
-         sizeof(double2) * polynomial_size / 2; // accumulator fft
-}
-
-template <typename Torus>
-__host__ __device__ uint64_t
-get_buffer_size_partial_sm_programmable_bootstrap(uint32_t polynomial_size) {
-  return sizeof(double2) * polynomial_size / 2; // accumulator fft
-}
-
-template <typename Torus>
-__host__ __device__ uint64_t
-get_buffer_size_full_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {
-  return sizeof(Torus) * polynomial_size +      // accumulator_rotated
-         sizeof(Torus) * polynomial_size +      // accumulator
-         sizeof(double2) * polynomial_size / 2; // accumulator fft
-}
-
-template <typename Torus>
-__host__ __device__ uint64_t
-get_buffer_size_partial_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {
-  return sizeof(double2) * polynomial_size / 2; // accumulator fft mask & body
-}
-
-template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer;
-
-template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
-  int8_t *d_mem;
-
-  Torus *global_accumulator;
-  double2 *global_accumulator_fft;
-
-  PBS_VARIANT pbs_variant;
-
-  pbs_buffer(cuda_stream_t *stream, uint32_t glwe_dimension,
-             uint32_t polynomial_size, uint32_t level_count,
-             uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
-             bool allocate_gpu_memory) {
-    this->pbs_variant = pbs_variant;
-
-    auto max_shared_memory = cuda_get_max_shared_memory(stream->gpu_index);
-
-    if (allocate_gpu_memory) {
-      switch (pbs_variant) {
-      case PBS_VARIANT::DEFAULT: {
-        uint64_t full_sm_step_one =
-            get_buffer_size_full_sm_programmable_bootstrap_step_one<Torus>(
-                polynomial_size);
-        uint64_t full_sm_step_two =
-            get_buffer_size_full_sm_programmable_bootstrap_step_two<Torus>(
-                polynomial_size);
-        uint64_t partial_sm =
-            get_buffer_size_partial_sm_programmable_bootstrap<Torus>(
-                polynomial_size);
-
-        uint64_t partial_dm_step_one = full_sm_step_one - partial_sm;
-        uint64_t partial_dm_step_two = full_sm_step_two - partial_sm;
-        uint64_t full_dm = full_sm_step_one;
-
-        uint64_t device_mem = 0;
-        if (max_shared_memory < partial_sm) {
-          device_mem = full_dm * input_lwe_ciphertext_count * level_count *
-                       (glwe_dimension + 1);
-        } else if (max_shared_memory < full_sm_step_two) {
-          device_mem =
-              (partial_dm_step_two + partial_dm_step_one * level_count) *
-              input_lwe_ciphertext_count * (glwe_dimension + 1);
-        } else if (max_shared_memory < full_sm_step_one) {
-          device_mem = partial_dm_step_one * input_lwe_ciphertext_count *
-                       level_count * (glwe_dimension + 1);
-        }
-        // Otherwise, both kernels run all in shared memory
-        d_mem = (int8_t *)cuda_malloc_async(device_mem, stream);
-
-        global_accumulator_fft = (double2 *)cuda_malloc_async(
-            (glwe_dimension + 1) * level_count * input_lwe_ciphertext_count *
-                (polynomial_size / 2) * sizeof(double2),
-            stream);
-
-        global_accumulator = (Torus *)cuda_malloc_async(
-            (glwe_dimension + 1) * input_lwe_ciphertext_count *
-                polynomial_size * sizeof(Torus),
-            stream);
-      } break;
-      case PBS_VARIANT::CG: {
-        uint64_t full_sm =
-            get_buffer_size_full_sm_programmable_bootstrap_cg<Torus>(
-                polynomial_size);
-        uint64_t partial_sm =
-            get_buffer_size_partial_sm_programmable_bootstrap_cg<Torus>(
-                polynomial_size);
-
-        uint64_t partial_dm = full_sm - partial_sm;
-        uint64_t full_dm = full_sm;
-        uint64_t device_mem = 0;
-
-        if (max_shared_memory < partial_sm) {
-          device_mem = full_dm * input_lwe_ciphertext_count * level_count *
-                       (glwe_dimension + 1);
-        } else if (max_shared_memory < full_sm) {
-          device_mem = partial_dm * input_lwe_ciphertext_count * level_count *
-                       (glwe_dimension + 1);
-        }
-
-        // Otherwise, both kernels run all in shared memory
-        d_mem = (int8_t *)cuda_malloc_async(device_mem, stream);
-
-        global_accumulator_fft = (double2 *)cuda_malloc_async(
-            (glwe_dimension + 1) * level_count * input_lwe_ciphertext_count *
-                polynomial_size / 2 * sizeof(double2),
-            stream);
-      } break;
-      default:
-        PANIC("Cuda error (PBS): unsupported implementation variant.")
-      }
-    }
-  }
-
-  void release(cuda_stream_t *stream) {
-    cuda_drop_async(d_mem, stream);
-    cuda_drop_async(global_accumulator_fft, stream);
-
-    if (pbs_variant == DEFAULT)
-      cuda_drop_async(global_accumulator, stream);
-  }
-};
-
-template <typename Torus>
-__host__ __device__ uint64_t get_buffer_size_programmable_bootstrap_cg(
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) {
-
-  uint64_t full_sm =
-      get_buffer_size_full_sm_programmable_bootstrap_cg<Torus>(polynomial_size);
-  uint64_t partial_sm =
-      get_buffer_size_partial_sm_programmable_bootstrap_cg<Torus>(
-          polynomial_size);
-  uint64_t partial_dm = full_sm - partial_sm;
-  uint64_t full_dm = full_sm;
-  uint64_t device_mem = 0;
-  if (max_shared_memory < partial_sm) {
-    device_mem = full_dm * input_lwe_ciphertext_count * level_count *
-                 (glwe_dimension + 1);
-  } else if (max_shared_memory < full_sm) {
-    device_mem = partial_dm * input_lwe_ciphertext_count * level_count *
-                 (glwe_dimension + 1);
-  }
-  uint64_t buffer_size = device_mem + (glwe_dimension + 1) * level_count *
-                                          input_lwe_ciphertext_count *
-                                          polynomial_size / 2 * sizeof(double2);
-  return buffer_size + buffer_size % sizeof(double2);
-}
-
-template <typename Torus>
-bool has_support_to_cuda_programmable_bootstrap_cg(uint32_t glwe_dimension,
-                                                   uint32_t polynomial_size,
-                                                   uint32_t level_count,
-                                                   uint32_t num_samples,
-                                                   uint32_t max_shared_memory);
-
-template <typename Torus>
-void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
-    cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
-    Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in,
-    Torus *lwe_input_indexes, double2 *bootstrapping_key,
-    pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
-    uint32_t level_count, uint32_t num_samples, uint32_t num_luts,
-    uint32_t lwe_idx, uint32_t max_shared_memory);
-
-template <typename Torus>
-void cuda_programmable_bootstrap_lwe_ciphertext_vector(
-    cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
-    Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in,
-    Torus *lwe_input_indexes, double2 *bootstrapping_key,
-    pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
-    uint32_t level_count, uint32_t num_samples, uint32_t num_luts,
-    uint32_t lwe_idx, uint32_t max_shared_memory);
-
-template <typename Torus, typename STorus>
-void scratch_cuda_programmable_bootstrap_cg(
-    cuda_stream_t *stream, pbs_buffer<Torus, CLASSICAL> **pbs_buffer,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
-    bool allocate_gpu_memory);
-
-template <typename Torus, typename STorus>
-void scratch_cuda_programmable_bootstrap(
-    cuda_stream_t *stream, pbs_buffer<Torus, CLASSICAL> **buffer,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
-    bool allocate_gpu_memory);
-
-#ifdef __CUDACC__
-__device__ inline int get_start_ith_ggsw(int i, uint32_t polynomial_size,
-                                         int glwe_dimension,
-                                         uint32_t level_count);
-
-template <typename T>
-__device__ T *get_ith_mask_kth_block(T *ptr, int i, int k, int level,
-                                     uint32_t polynomial_size,
-                                     int glwe_dimension, uint32_t level_count);
-
-template <typename T>
-__device__ T *get_ith_body_kth_block(T *ptr, int i, int k, int level,
-                                     uint32_t polynomial_size,
-                                     int glwe_dimension, uint32_t level_count);
-
-template <typename T>
-__device__ T *get_multi_bit_ith_lwe_gth_group_kth_block(
-    T *ptr, int g, int i, int k, int level, uint32_t grouping_factor,
-    uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t level_count);
-
-#endif
-
-#endif // CUDA_BOOTSTRAP_H
--- a/backends/tfhe-cuda-backend/cuda/include/programmable_bootstrap_multibit.h
+++ b/backends/tfhe-cuda-backend/cuda/include/programmable_bootstrap_multibit.h
@@ -1,241 +0,0 @@
-#ifndef CUDA_MULTI_BIT_H
-#define CUDA_MULTI_BIT_H
-
-#include "programmable_bootstrap.h"
-#include <cstdint>
-
-extern "C" {
-
-bool has_support_to_cuda_programmable_bootstrap_cg_multi_bit(
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t num_samples, uint32_t max_shared_memory);
-
-void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64(
-    void *dest, void *src, cuda_stream_t *stream, uint32_t input_lwe_dim,
-    uint32_t glwe_dim, uint32_t level_count, uint32_t polynomial_size,
-    uint32_t grouping_factor);
-
-void scratch_cuda_multi_bit_programmable_bootstrap_64(
-    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t lwe_dimension,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
-    uint32_t max_shared_memory, bool allocate_gpu_memory,
-    uint32_t chunk_size = 0);
-
-void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
-    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
-    void *lwe_input_indexes, void *bootstrapping_key, int8_t *buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t grouping_factor, uint32_t base_log, uint32_t level_count,
-    uint32_t num_samples, uint32_t num_luts, uint32_t lwe_idx,
-    uint32_t max_shared_memory, uint32_t lwe_chunk_size = 0);
-
-void scratch_cuda_generic_multi_bit_programmable_bootstrap_64(
-    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t lwe_dimension,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
-    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
-    uint32_t max_shared_memory, bool allocate_gpu_memory,
-    uint32_t lwe_chunk_size = 0);
-
-void cuda_generic_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
-    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
-    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t grouping_factor, uint32_t base_log, uint32_t level_count,
-    uint32_t num_samples, uint32_t num_luts, uint32_t lwe_idx,
-    uint32_t max_shared_memory, uint32_t lwe_chunk_size = 0);
-
-void cleanup_cuda_multi_bit_programmable_bootstrap(cuda_stream_t *stream,
-                                                   int8_t **pbs_buffer);
-}
-
-template <typename Torus, typename STorus>
-void scratch_cuda_cg_multi_bit_programmable_bootstrap(
-    cuda_stream_t *stream, pbs_buffer<Torus, MULTI_BIT> **pbs_buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t level_count, uint32_t grouping_factor,
-    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
-    bool allocate_gpu_memory, uint32_t lwe_chunk_size = 0);
-
-template <typename Torus>
-void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
-    cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
-    Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in,
-    Torus *lwe_input_indexes, Torus *bootstrapping_key,
-    pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
-    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
-    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory,
-    uint32_t lwe_chunk_size = 0);
-
-template <typename Torus, typename STorus>
-void scratch_cuda_multi_bit_programmable_bootstrap(
-    cuda_stream_t *stream, pbs_buffer<Torus, MULTI_BIT> **pbs_buffer,
-    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
-    uint32_t level_count, uint32_t grouping_factor,
-    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
-    bool allocate_gpu_memory, uint32_t lwe_chunk_size = 0);
-
-template <typename Torus>
-void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
-    cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
-    Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in,
-    Torus *lwe_input_indexes, Torus *bootstrapping_key,
-    pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
-    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
-    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
-    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory,
-    uint32_t lwe_chunk_size = 0);
-
-template <typename Torus>
-__host__ __device__ uint64_t
-get_buffer_size_full_sm_multibit_programmable_bootstrap_keybundle(
-    uint32_t polynomial_size);
-template <typename Torus>
-__host__ __device__ uint64_t
-get_buffer_size_full_sm_multibit_programmable_bootstrap_step_one(
-    uint32_t polynomial_size);
-template <typename Torus>
-__host__ __device__ uint64_t
-get_buffer_size_full_sm_multibit_programmable_bootstrap_step_two(
-    uint32_t polynomial_size);
-template <typename Torus>
-__host__ __device__ uint64_t
-get_buffer_size_partial_sm_multibit_programmable_bootstrap_step_one(
-    uint32_t polynomial_size);
-template <typename Torus>
-__host__ __device__ uint64_t
-get_buffer_size_full_sm_cg_multibit_programmable_bootstrap(
-    uint32_t polynomial_size);
-template <typename Torus>
-__host__ __device__ uint64_t
-get_buffer_size_partial_sm_cg_multibit_programmable_bootstrap(
-    uint32_t polynomial_size);
-
-template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
-  int8_t *d_mem_keybundle = NULL;
-  int8_t *d_mem_acc_step_one = NULL;
-  int8_t *d_mem_acc_step_two = NULL;
-  int8_t *d_mem_acc_cg = NULL;
-
-  double2 *keybundle_fft;
-  Torus *global_accumulator;
-  double2 *global_accumulator_fft;
-
-  PBS_VARIANT pbs_variant;
-
-  pbs_buffer(cuda_stream_t *stream, uint32_t glwe_dimension,
-             uint32_t polynomial_size, uint32_t level_count,
-             uint32_t input_lwe_ciphertext_count, uint32_t lwe_chunk_size,
-             PBS_VARIANT pbs_variant, bool allocate_gpu_memory) {
-    this->pbs_variant = pbs_variant;
-    auto max_shared_memory = cuda_get_max_shared_memory(stream->gpu_index);
-
-    uint64_t full_sm_keybundle =
-        get_buffer_size_full_sm_multibit_programmable_bootstrap_keybundle<
-            Torus>(polynomial_size);
-    uint64_t full_sm_accumulate_step_one =
-        get_buffer_size_full_sm_multibit_programmable_bootstrap_step_one<Torus>(
-            polynomial_size);
-    uint64_t partial_sm_accumulate_step_one =
-        get_buffer_size_partial_sm_multibit_programmable_bootstrap_step_one<
-            Torus>(polynomial_size);
-    uint64_t full_sm_accumulate_step_two =
-        get_buffer_size_full_sm_multibit_programmable_bootstrap_step_two<Torus>(
-            polynomial_size);
-    uint64_t full_sm_cg_accumulate =
-        get_buffer_size_full_sm_cg_multibit_programmable_bootstrap<Torus>(
-            polynomial_size);
-    uint64_t partial_sm_cg_accumulate =
-        get_buffer_size_partial_sm_cg_multibit_programmable_bootstrap<Torus>(
-            polynomial_size);
-
-    auto num_blocks_keybundle = input_lwe_ciphertext_count * lwe_chunk_size *
-                                (glwe_dimension + 1) * (glwe_dimension + 1) *
-                                level_count;
-    auto num_blocks_acc_step_one =
-        level_count * (glwe_dimension + 1) * input_lwe_ciphertext_count;
-    auto num_blocks_acc_step_two =
-        input_lwe_ciphertext_count * (glwe_dimension + 1);
-    auto num_blocks_acc_cg =
-        level_count * (glwe_dimension + 1) * input_lwe_ciphertext_count;
-
-    if (allocate_gpu_memory) {
-      // Keybundle
-      if (max_shared_memory < full_sm_keybundle)
-        d_mem_keybundle = (int8_t *)cuda_malloc_async(
-            num_blocks_keybundle * full_sm_keybundle, stream);
-
-      switch (pbs_variant) {
-      case DEFAULT:
-        // Accumulator step one
-        if (max_shared_memory < partial_sm_accumulate_step_one)
-          d_mem_acc_step_one = (int8_t *)cuda_malloc_async(
-              num_blocks_acc_step_one * full_sm_accumulate_step_one, stream);
-        else if (max_shared_memory < full_sm_accumulate_step_one)
-          d_mem_acc_step_one = (int8_t *)cuda_malloc_async(
-              num_blocks_acc_step_one * partial_sm_accumulate_step_one, stream);
-
-        // Accumulator step two
-        if (max_shared_memory < full_sm_accumulate_step_two)
-          d_mem_acc_step_two = (int8_t *)cuda_malloc_async(
-              num_blocks_acc_step_two * full_sm_accumulate_step_two, stream);
-        break;
-      case CG:
-        // Accumulator CG
-        if (max_shared_memory < partial_sm_cg_accumulate)
-          d_mem_acc_cg = (int8_t *)cuda_malloc_async(
-              num_blocks_acc_cg * full_sm_cg_accumulate, stream);
-        else if (max_shared_memory < full_sm_cg_accumulate)
-          d_mem_acc_cg = (int8_t *)cuda_malloc_async(
-              num_blocks_acc_cg * partial_sm_cg_accumulate, stream);
-        break;
-      default:
-        PANIC("Cuda error (PBS): unsupported implementation variant.")
-      }
-
-      keybundle_fft = (double2 *)cuda_malloc_async(
-          num_blocks_keybundle * (polynomial_size / 2) * sizeof(double2),
-          stream);
-      global_accumulator = (Torus *)cuda_malloc_async(
-          num_blocks_acc_step_two * polynomial_size * sizeof(Torus), stream);
-      global_accumulator_fft = (double2 *)cuda_malloc_async(
-          num_blocks_acc_step_one * (polynomial_size / 2) * sizeof(double2),
-          stream);
-    }
-  }
-
-  void release(cuda_stream_t *stream) {
-
-    if (d_mem_keybundle)
-      cuda_drop_async(d_mem_keybundle, stream);
-    switch (pbs_variant) {
-    case DEFAULT:
-      if (d_mem_acc_step_one)
-        cuda_drop_async(d_mem_acc_step_one, stream);
-      if (d_mem_acc_step_two)
-        cuda_drop_async(d_mem_acc_step_two, stream);
-      break;
-    case CG:
-      if (d_mem_acc_cg)
-        cuda_drop_async(d_mem_acc_cg, stream);
-      break;
-    default:
-      PANIC("Cuda error (PBS): unsupported implementation variant.")
-    }
-
-    cuda_drop_async(keybundle_fft, stream);
-    cuda_drop_async(global_accumulator, stream);
-    cuda_drop_async(global_accumulator_fft, stream);
-  }
-};
-
-#ifdef __CUDACC__
-
-__host__ uint32_t get_lwe_chunk_size(uint32_t ct_count);
-
-#endif
-
-#endif // CUDA_MULTI_BIT_H
--- a/backends/tfhe-cuda-backend/cuda/src/CMakeLists.txt
+++ b/backends/tfhe-cuda-backend/cuda/src/CMakeLists.txt
@@ -1,18 +0,0 @@
-set(SOURCES
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bit_extraction.h
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bitwise_ops.h
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bootstrap.h
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bootstrap_multibit.h
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/ciphertext.h
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/circuit_bootstrap.h
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/device.h
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/integer.h
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/keyswitch.h
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/linear_algebra.h
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/shifts.h
-    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/vertical_packing.h)
-file(GLOB_RECURSE SOURCES "*.cu")
-add_library(tfhe_cuda_backend STATIC ${SOURCES})
-set_target_properties(tfhe_cuda_backend PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-target_link_libraries(tfhe_cuda_backend PUBLIC cudart OpenMP::OpenMP_CXX)
-target_include_directories(tfhe_cuda_backend PRIVATE .)
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu
@@ -1 +0,0 @@
-#include "ciphertext.cuh"
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
@@ -1,44 +0,0 @@
-#ifndef CUDA_CIPHERTEXT_CUH
-#define CUDA_CIPHERTEXT_CUH
-
-#include "ciphertext.h"
-#include "device.h"
-#include <cstdint>
-
-template <typename T>
-void cuda_convert_lwe_ciphertext_vector_to_gpu(T *dest, T *src,
-                                               cuda_stream_t *stream,
-                                               uint32_t number_of_cts,
-                                               uint32_t lwe_dimension) {
-  cudaSetDevice(stream->gpu_index);
-  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
-  cuda_memcpy_async_to_gpu(dest, src, size, stream);
-}
-
-void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *dest, void *src,
-                                                  cuda_stream_t *stream,
-                                                  uint32_t number_of_cts,
-                                                  uint32_t lwe_dimension) {
-  cuda_convert_lwe_ciphertext_vector_to_gpu<uint64_t>(
-      (uint64_t *)dest, (uint64_t *)src, stream, number_of_cts, lwe_dimension);
-}
-
-template <typename T>
-void cuda_convert_lwe_ciphertext_vector_to_cpu(T *dest, T *src,
-                                               cuda_stream_t *stream,
-                                               uint32_t number_of_cts,
-                                               uint32_t lwe_dimension) {
-  cudaSetDevice(stream->gpu_index);
-  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
-  cuda_memcpy_async_to_cpu(dest, src, size, stream);
-}
-
-void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *dest, void *src,
-                                                  cuda_stream_t *stream,
-                                                  uint32_t number_of_cts,
-                                                  uint32_t lwe_dimension) {
-  cuda_convert_lwe_ciphertext_vector_to_cpu<uint64_t>(
-      (uint64_t *)dest, (uint64_t *)src, stream, number_of_cts, lwe_dimension);
-}
-
-#endif
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/gadget.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/gadget.cuh
@@ -1,162 +0,0 @@
-#ifndef CNCRT_CRYPTO_CUH
-#define CNCRT_CRPYTO_CUH
-
-#include "device.h"
-#include <cstdint>
-
-/**
- * GadgetMatrix implements the iterator design pattern to decompose a set of
- * num_poly consecutive polynomials with degree params::degree. A total of
- * level_count levels is expected and each call to decompose_and_compress_next()
- * writes to the result the next level. It is also possible to advance an
- * arbitrary amount of levels by using decompose_and_compress_level().
- *
- * This class always decomposes the entire set of num_poly polynomials.
- * By default, it works on a single polynomial.
- */
-#pragma once
-template <typename T, class params> class GadgetMatrix {
-private:
-  uint32_t level_count;
-  uint32_t base_log;
-  uint32_t mask;
-  uint32_t halfbg;
-  uint32_t num_poly;
-  T offset;
-  int current_level;
-  T mask_mod_b;
-  T *state;
-
-public:
-  __device__ GadgetMatrix(uint32_t base_log, uint32_t level_count, T *state,
-                          uint32_t num_poly = 1)
-      : base_log(base_log), level_count(level_count), num_poly(num_poly),
-        state(state) {
-
-    mask_mod_b = (1ll << base_log) - 1ll;
-    current_level = level_count;
-    int tid = threadIdx.x;
-    for (int i = 0; i < num_poly * params::opt; i++) {
-      state[tid] >>= (sizeof(T) * 8 - base_log * level_count);
-      tid += params::degree / params::opt;
-    }
-    synchronize_threads_in_block();
-  }
-
-  // Decomposes all polynomials at once
-  __device__ void decompose_and_compress_next(double2 *result) {
-    for (int j = 0; j < num_poly; j++) {
-      auto result_slice = result + j * params::degree / 2;
-      decompose_and_compress_next_polynomial(result_slice, j);
-    }
-  }
-
-  // Decomposes a single polynomial
-  __device__ void decompose_and_compress_next_polynomial(double2 *result,
-                                                         int j) {
-    if (j == 0)
-      current_level -= 1;
-
-    int tid = threadIdx.x;
-    auto state_slice = state + j * params::degree;
-    for (int i = 0; i < params::opt / 2; i++) {
-      T res_re = state_slice[tid] & mask_mod_b;
-      T res_im = state_slice[tid + params::degree / 2] & mask_mod_b;
-      state_slice[tid] >>= base_log;
-      state_slice[tid + params::degree / 2] >>= base_log;
-      T carry_re = ((res_re - 1ll) | state_slice[tid]) & res_re;
-      T carry_im =
-          ((res_im - 1ll) | state_slice[tid + params::degree / 2]) & res_im;
-      carry_re >>= (base_log - 1);
-      carry_im >>= (base_log - 1);
-      state_slice[tid] += carry_re;
-      state_slice[tid + params::degree / 2] += carry_im;
-      res_re -= carry_re << base_log;
-      res_im -= carry_im << base_log;
-
-      result[tid].x = (int32_t)res_re;
-      result[tid].y = (int32_t)res_im;
-
-      tid += params::degree / params::opt;
-    }
-    synchronize_threads_in_block();
-  }
-
-  // Decomposes a single polynomial
-  __device__ void
-  decompose_and_compress_next_polynomial_elements(double2 *result, int j) {
-    if (j == 0)
-      current_level -= 1;
-
-    int tid = threadIdx.x;
-    auto state_slice = state + j * params::degree;
-    for (int i = 0; i < params::opt / 2; i++) {
-      T res_re = state_slice[tid] & mask_mod_b;
-      T res_im = state_slice[tid + params::degree / 2] & mask_mod_b;
-      state_slice[tid] >>= base_log;
-      state_slice[tid + params::degree / 2] >>= base_log;
-      T carry_re = ((res_re - 1ll) | state_slice[tid]) & res_re;
-      T carry_im =
-          ((res_im - 1ll) | state_slice[tid + params::degree / 2]) & res_im;
-      carry_re >>= (base_log - 1);
-      carry_im >>= (base_log - 1);
-      state_slice[tid] += carry_re;
-      state_slice[tid + params::degree / 2] += carry_im;
-      res_re -= carry_re << base_log;
-      res_im -= carry_im << base_log;
-
-      result[i].x = (int32_t)res_re;
-      result[i].y = (int32_t)res_im;
-
-      tid += params::degree / params::opt;
-    }
-    synchronize_threads_in_block();
-  }
-
-  __device__ void decompose_and_compress_level(double2 *result, int level) {
-    for (int i = 0; i < level_count - level; i++)
-      decompose_and_compress_next(result);
-  }
-};
-
-template <typename T> class GadgetMatrixSingle {
-private:
-  uint32_t level_count;
-  uint32_t base_log;
-  uint32_t mask;
-  uint32_t halfbg;
-  T offset;
-
-public:
-  __device__ GadgetMatrixSingle(uint32_t base_log, uint32_t level_count)
-      : base_log(base_log), level_count(level_count) {
-    uint32_t bg = 1 << base_log;
-    this->halfbg = bg / 2;
-    this->mask = bg - 1;
-    T temp = 0;
-    for (int i = 0; i < this->level_count; i++) {
-      temp += 1ULL << (sizeof(T) * 8 - (i + 1) * this->base_log);
-    }
-    this->offset = temp * this->halfbg;
-  }
-
-  __device__ T decompose_one_level_single(T element, uint32_t level) {
-    T s = element + this->offset;
-    uint32_t decal = (sizeof(T) * 8 - (level + 1) * this->base_log);
-    T temp1 = (s >> decal) & this->mask;
-    return (T)(temp1 - this->halfbg);
-  }
-};
-
-template <typename Torus>
-__device__ Torus decompose_one(Torus &state, Torus mask_mod_b, int base_log) {
-  Torus res = state & mask_mod_b;
-  state >>= base_log;
-  Torus carry = ((res - 1ll) | state) & res;
-  carry >>= base_log - 1;
-  state += carry;
-  res -= carry << base_log;
-  return res;
-}
-
-#endif // CNCRT_CRPYTO_H
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
@@ -1,74 +0,0 @@
-#ifndef CNCRT_GGSW_CUH
-#define CNCRT_GGSW_CUH
-
-#include "device.h"
-#include "fft/bnsmfft.cuh"
-#include "polynomial/parameters.cuh"
-
-template <typename T, typename ST, class params, sharedMemDegree SMD>
-__global__ void device_batch_fft_ggsw_vector(double2 *dest, T *src,
-                                             int8_t *device_mem) {
-
-  extern __shared__ int8_t sharedmem[];
-  double2 *selected_memory;
-
-  if constexpr (SMD == FULLSM)
-    selected_memory = (double2 *)sharedmem;
-  else
-    selected_memory = (double2 *)device_mem[blockIdx.x * params::degree];
-
-  // Compression
-  int offset = blockIdx.x * blockDim.x;
-
-  int tid = threadIdx.x;
-#pragma unroll
-  for (int i = 0; i < params::opt / 2; i++) {
-    ST x = src[(tid) + params::opt * offset];
-    ST y = src[(tid + params::degree / 2) + params::opt * offset];
-    selected_memory[tid].x = x / (double)std::numeric_limits<T>::max();
-    selected_memory[tid].y = y / (double)std::numeric_limits<T>::max();
-    tid += params::degree / params::opt;
-  }
-  synchronize_threads_in_block();
-
-  // Switch to the FFT space
-  NSMFFT_direct<HalfDegree<params>>(selected_memory);
-  synchronize_threads_in_block();
-
-  // Write the output to global memory
-  tid = threadIdx.x;
-#pragma unroll
-  for (int j = 0; j < params::opt / 2; j++) {
-    dest[tid + (params::opt >> 1) * offset] = selected_memory[tid];
-    tid += params::degree / params::opt;
-  }
-}
-
-/**
- * Applies the FFT transform on sequence of GGSW ciphertexts already in the
- * global memory
- */
-template <typename T, typename ST, class params>
-void batch_fft_ggsw_vector(cuda_stream_t *stream, double2 *dest, T *src,
-                           int8_t *d_mem, uint32_t r, uint32_t glwe_dim,
-                           uint32_t polynomial_size, uint32_t level_count,
-                           uint32_t gpu_index, uint32_t max_shared_memory) {
-  cudaSetDevice(stream->gpu_index);
-
-  int shared_memory_size = sizeof(double) * polynomial_size;
-
-  int gridSize = r * (glwe_dim + 1) * (glwe_dim + 1) * level_count;
-  int blockSize = polynomial_size / params::opt;
-
-  if (max_shared_memory < shared_memory_size) {
-    device_batch_fft_ggsw_vector<T, ST, params, NOSM>
-        <<<gridSize, blockSize, 0, stream->stream>>>(dest, src, d_mem);
-  } else {
-    device_batch_fft_ggsw_vector<T, ST, params, FULLSM>
-        <<<gridSize, blockSize, shared_memory_size, stream->stream>>>(dest, src,
-                                                                      d_mem);
-  }
-  check_cuda_error(cudaGetLastError());
-}
-
-#endif // CNCRT_GGSW_CUH
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
@@ -1,48 +0,0 @@
-#include "keyswitch.cuh"
-#include "keyswitch.h"
-#include <cstdint>
-
-/* Perform keyswitch on a batch of 32 bits input LWE ciphertexts.
- * Head out to the equivalent operation on 64 bits for more details.
- */
-void cuda_keyswitch_lwe_ciphertext_vector_32(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
-    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
-    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
-    uint32_t level_count, uint32_t num_samples) {
-  cuda_keyswitch_lwe_ciphertext_vector(
-      stream, static_cast<uint32_t *>(lwe_array_out),
-      static_cast<uint32_t *>(lwe_output_indexes),
-      static_cast<uint32_t *>(lwe_array_in),
-      static_cast<uint32_t *>(lwe_input_indexes), static_cast<uint32_t *>(ksk),
-      lwe_dimension_in, lwe_dimension_out, base_log, level_count, num_samples);
-}
-
-/* Perform keyswitch on a batch of 64 bits input LWE ciphertexts.
- *
- * - `v_stream` is a void pointer to the Cuda stream to be used in the kernel
- * launch
- * - `gpu_index` is the index of the GPU to be used in the kernel launch
- *  - lwe_array_out: output batch of num_samples keyswitched ciphertexts c =
- * (a0,..an-1,b) where n is the output LWE dimension (lwe_dimension_out)
- *  - lwe_array_in: input batch of num_samples LWE ciphertexts, containing
- * lwe_dimension_in mask values + 1 body value
- *  - ksk: the keyswitch key to be used in the operation
- *  - base log: the log of the base used in the decomposition (should be the one
- * used to create the ksk)
- *
- * This function calls a wrapper to a device kernel that performs the keyswitch
- * 	- num_samples blocks of threads are launched
- */
-void cuda_keyswitch_lwe_ciphertext_vector_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
-    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
-    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
-    uint32_t level_count, uint32_t num_samples) {
-  cuda_keyswitch_lwe_ciphertext_vector(
-      stream, static_cast<uint64_t *>(lwe_array_out),
-      static_cast<uint64_t *>(lwe_output_indexes),
-      static_cast<uint64_t *>(lwe_array_in),
-      static_cast<uint64_t *>(lwe_input_indexes), static_cast<uint64_t *>(ksk),
-      lwe_dimension_in, lwe_dimension_out, base_log, level_count, num_samples);
-}
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
@@ -1,140 +0,0 @@
-#ifndef CNCRT_KS_CUH
-#define CNCRT_KS_CUH
-
-#include "device.h"
-#include "gadget.cuh"
-#include "polynomial/polynomial_math.cuh"
-#include "torus.cuh"
-#include <thread>
-#include <vector>
-
-template <typename Torus>
-__device__ Torus *get_ith_block(Torus *ksk, int i, int level,
-                                uint32_t lwe_dimension_out,
-                                uint32_t level_count) {
-  int pos = i * level_count * (lwe_dimension_out + 1) +
-            level * (lwe_dimension_out + 1);
-  Torus *ptr = &ksk[pos];
-  return ptr;
-}
-
-/*
- * keyswitch kernel
- * Each thread handles a piece of the following equation:
- * $$GLWE_s2(\Delta.m+e) = (0,0,..,0,b) - \sum_{i=0,k-1} <Dec(a_i),
- * (GLWE_s2(s1_i q/beta),..,GLWE(s1_i q/beta^l)>$$ where k is the dimension of
- * the GLWE ciphertext. If the polynomial dimension in GLWE is > 1, this
- * equation is solved for each polynomial coefficient. where Dec denotes the
- * decomposition with base beta and l levels and the inner product is done
- * between the decomposition of a_i and l GLWE encryptions of s1_i q/\beta^j,
- * with j in [1,l] We obtain a GLWE encryption of Delta.m (with Delta the
- * scaling factor) under key s2 instead of s1, with an increased noise
- *
- */
-template <typename Torus>
-__global__ void
-keyswitch(Torus *lwe_array_out, Torus *lwe_output_indexes, Torus *lwe_array_in,
-          Torus *lwe_input_indexes, Torus *ksk, uint32_t lwe_dimension_in,
-          uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
-          int lwe_lower, int lwe_upper, int cutoff) {
-  int tid = threadIdx.x;
-
-  extern __shared__ int8_t sharedmem[];
-
-  Torus *local_lwe_array_out = (Torus *)sharedmem;
-
-  auto block_lwe_array_in = get_chunk(
-      lwe_array_in, lwe_input_indexes[blockIdx.x], lwe_dimension_in + 1);
-  auto block_lwe_array_out = get_chunk(
-      lwe_array_out, lwe_output_indexes[blockIdx.x], lwe_dimension_out + 1);
-
-  auto gadget = GadgetMatrixSingle<Torus>(base_log, level_count);
-
-  int lwe_part_per_thd;
-  if (tid < cutoff) {
-    lwe_part_per_thd = lwe_upper;
-  } else {
-    lwe_part_per_thd = lwe_lower;
-  }
-  __syncthreads();
-
-  for (int k = 0; k < lwe_part_per_thd; k++) {
-    int idx = tid + k * blockDim.x;
-    local_lwe_array_out[idx] = 0;
-  }
-  __syncthreads();
-
-  if (tid == 0) {
-    local_lwe_array_out[lwe_dimension_out] =
-        block_lwe_array_in[lwe_dimension_in];
-  }
-
-  for (int i = 0; i < lwe_dimension_in; i++) {
-
-    __syncthreads();
-
-    Torus a_i =
-        round_to_closest_multiple(block_lwe_array_in[i], base_log, level_count);
-
-    Torus state = a_i >> (sizeof(Torus) * 8 - base_log * level_count);
-    Torus mask_mod_b = (1ll << base_log) - 1ll;
-
-    for (int j = 0; j < level_count; j++) {
-      auto ksk_block = get_ith_block(ksk, i, j, lwe_dimension_out, level_count);
-      Torus decomposed = decompose_one<Torus>(state, mask_mod_b, base_log);
-      for (int k = 0; k < lwe_part_per_thd; k++) {
-        int idx = tid + k * blockDim.x;
-        local_lwe_array_out[idx] -= (Torus)ksk_block[idx] * decomposed;
-      }
-    }
-  }
-
-  for (int k = 0; k < lwe_part_per_thd; k++) {
-    int idx = tid + k * blockDim.x;
-    block_lwe_array_out[idx] = local_lwe_array_out[idx];
-  }
-}
-
-/// assume lwe_array_in in the gpu
-template <typename Torus>
-__host__ void cuda_keyswitch_lwe_ciphertext_vector(
-    cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
-    Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *ksk,
-    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
-    uint32_t level_count, uint32_t num_samples) {
-
-  cudaSetDevice(stream->gpu_index);
-  constexpr int ideal_threads = 128;
-
-  int lwe_size = lwe_dimension_out + 1;
-  int lwe_lower, lwe_upper, cutoff;
-  if (lwe_size % ideal_threads == 0) {
-    lwe_lower = lwe_size / ideal_threads;
-    lwe_upper = lwe_size / ideal_threads;
-    cutoff = 0;
-  } else {
-    int y = ceil((double)lwe_size / (double)ideal_threads) * ideal_threads -
-            lwe_size;
-    cutoff = ideal_threads - y;
-    lwe_lower = lwe_size / ideal_threads;
-    lwe_upper = (int)ceil((double)lwe_size / (double)ideal_threads);
-  }
-
-  int lwe_size_after = lwe_size * num_samples;
-
-  int shared_mem = sizeof(Torus) * lwe_size;
-
-  cuda_memset_async(lwe_array_out, 0, sizeof(Torus) * lwe_size_after, stream);
-  check_cuda_error(cudaGetLastError());
-
-  dim3 grid(num_samples, 1, 1);
-  dim3 threads(ideal_threads, 1, 1);
-
-  keyswitch<Torus><<<grid, threads, shared_mem, stream->stream>>>(
-      lwe_array_out, lwe_output_indexes, lwe_array_in, lwe_input_indexes, ksk,
-      lwe_dimension_in, lwe_dimension_out, base_log, level_count, lwe_lower,
-      lwe_upper, cutoff);
-  check_cuda_error(cudaGetLastError());
-}
-
-#endif
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
@@ -1,74 +0,0 @@
-#ifndef CNCRT_TORUS_CUH
-#define CNCRT_TORUS_CUH
-
-#include "types/int128.cuh"
-#include <limits>
-
-template <typename T>
-__device__ inline void typecast_double_to_torus(double x, T &r) {
-  r = T(x);
-}
-
-template <>
-__device__ inline void typecast_double_to_torus<uint32_t>(double x,
-                                                          uint32_t &r) {
-  r = __double2uint_rn(x);
-}
-
-template <>
-__device__ inline void typecast_double_to_torus<uint64_t>(double x,
-                                                          uint64_t &r) {
-  // The ull intrinsic does not behave in the same way on all architectures and
-  // on some platforms this causes the cmux tree test to fail
-  // Hence the intrinsic is not used here
-  uint128 nnnn = make_uint128_from_float(x);
-  uint64_t lll = nnnn.lo_;
-  r = lll;
-}
-
-template <typename T>
-__device__ inline T round_to_closest_multiple(T x, uint32_t base_log,
-                                              uint32_t level_count) {
-  T shift = sizeof(T) * 8 - level_count * base_log;
-  T mask = 1ll << (shift - 1);
-  T b = (x & mask) >> (shift - 1);
-  T res = x >> shift;
-  res += b;
-  res <<= shift;
-  return res;
-}
-
-template <typename T>
-__device__ __forceinline__ void rescale_torus_element(T element, T &output,
-                                                      uint32_t log_shift) {
-  output =
-      round((double)element / (double(std::numeric_limits<T>::max()) + 1.0) *
-            (double)log_shift);
-}
-
-template <typename T>
-__device__ __forceinline__ T rescale_torus_element(T element,
-                                                   uint32_t log_shift) {
-  return round((double)element / (double(std::numeric_limits<T>::max()) + 1.0) *
-               (double)log_shift);
-}
-
-template <>
-__device__ __forceinline__ void
-rescale_torus_element<uint32_t>(uint32_t element, uint32_t &output,
-                                uint32_t log_shift) {
-  output =
-      round(__uint2double_rn(element) /
-            (__uint2double_rn(std::numeric_limits<uint32_t>::max()) + 1.0) *
-            __uint2double_rn(log_shift));
-}
-
-template <>
-__device__ __forceinline__ void
-rescale_torus_element<uint64_t>(uint64_t element, uint64_t &output,
-                                uint32_t log_shift) {
-  output = round(__ull2double_rn(element) /
-                 (__ull2double_rn(std::numeric_limits<uint64_t>::max()) + 1.0) *
-                 __uint2double_rn(log_shift));
-}
-#endif // CNCRT_TORUS_H
--- a/backends/tfhe-cuda-backend/cuda/src/device.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/device.cu
@@ -1,238 +0,0 @@
-#include "device.h"
-#include <cstdint>
-#include <cuda_runtime.h>
-
-/// Unsafe function to create a CUDA stream, must check first that GPU exists
-cuda_stream_t *cuda_create_stream(uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
-  cuda_stream_t *stream = new cuda_stream_t(gpu_index);
-  return stream;
-}
-
-/// Unsafe function to destroy CUDA stream, must check first the GPU exists
-void cuda_destroy_stream(cuda_stream_t *stream) { stream->release(); }
-
-/// Unsafe function that will try to allocate even if gpu_index is invalid
-/// or if there's not enough memory. A safe wrapper around it must call
-/// cuda_check_valid_malloc() first
-void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
-  void *ptr;
-  check_cuda_error(cudaMalloc((void **)&ptr, size));
-
-  return ptr;
-}
-
-/// Allocates a size-byte array at the device memory. Tries to do it
-/// asynchronously.
-void *cuda_malloc_async(uint64_t size, cuda_stream_t *stream) {
-  check_cuda_error(cudaSetDevice(stream->gpu_index));
-  void *ptr;
-
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#elif (CUDART_VERSION >= 11020)
-  int support_async_alloc;
-  check_cuda_error(cudaDeviceGetAttribute(&support_async_alloc,
-                                          cudaDevAttrMemoryPoolsSupported,
-                                          stream->gpu_index));
-
-  if (support_async_alloc) {
-    check_cuda_error(cudaMallocAsync((void **)&ptr, size, stream->stream));
-  } else {
-    check_cuda_error(cudaMalloc((void **)&ptr, size));
-  }
-#else
-  check_cuda_error(cudaMalloc((void **)&ptr, size));
-#endif
-  return ptr;
-}
-
-/// Check that allocation is valid
-void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
-  size_t total_mem, free_mem;
-  check_cuda_error(cudaMemGetInfo(&free_mem, &total_mem));
-  if (size > free_mem) {
-    PANIC("Cuda error: not enough memory on device. "
-          "Available: %zu vs Requested: %lu",
-          free_mem, size)
-  }
-}
-
-/// Returns
-///  false if Cooperative Groups is not supported.
-///  true otherwise
-bool cuda_check_support_cooperative_groups() {
-  int cooperative_groups_supported = 0;
-  check_cuda_error(cudaDeviceGetAttribute(&cooperative_groups_supported,
-                                          cudaDevAttrCooperativeLaunch, 0));
-
-  return cooperative_groups_supported > 0;
-}
-
-/// Copy memory to the GPU asynchronously
-void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
-                              cuda_stream_t *stream) {
-  if (size == 0)
-    return;
-  cudaPointerAttributes attr;
-  check_cuda_error(cudaPointerGetAttributes(&attr, dest));
-  if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid device pointer in async copy to GPU.")
-  }
-
-  check_cuda_error(cudaSetDevice(stream->gpu_index));
-  check_cuda_error(
-      cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream->stream));
-}
-
-/// Copy memory within a GPU asynchronously
-void cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size,
-                                  cuda_stream_t *stream) {
-  if (size == 0)
-    return;
-  cudaPointerAttributes attr_dest;
-  check_cuda_error(cudaPointerGetAttributes(&attr_dest, dest));
-  if (attr_dest.device != stream->gpu_index &&
-      attr_dest.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid dest device pointer in copy from GPU to GPU.")
-  }
-  cudaPointerAttributes attr_src;
-  check_cuda_error(cudaPointerGetAttributes(&attr_src, src));
-  if (attr_src.device != stream->gpu_index &&
-      attr_src.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
-  }
-  if (attr_src.device != attr_dest.device) {
-    PANIC("Cuda error: different devices specified in copy from GPU to GPU.")
-  }
-
-  check_cuda_error(cudaSetDevice(stream->gpu_index));
-  check_cuda_error(cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice,
-                                   stream->stream));
-}
-
-/// Synchronizes device
-void cuda_synchronize_device(uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
-  check_cuda_error(cudaDeviceSynchronize());
-}
-
-void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
-                       cuda_stream_t *stream) {
-  if (size == 0)
-    return;
-  cudaPointerAttributes attr;
-  check_cuda_error(cudaPointerGetAttributes(&attr, dest));
-  if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid dest device pointer in cuda memset.")
-  }
-  check_cuda_error(cudaSetDevice(stream->gpu_index));
-  check_cuda_error(cudaMemsetAsync(dest, val, size, stream->stream));
-}
-
-template <typename Torus>
-__global__ void cuda_set_value_kernel(Torus *array, Torus value, Torus n) {
-  int index = threadIdx.x + blockIdx.x * blockDim.x;
-  if (index < n)
-    array[index] = value;
-}
-
-template <typename Torus>
-void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value,
-                          Torus n) {
-  cudaPointerAttributes attr;
-  check_cuda_error(cudaPointerGetAttributes(&attr, d_array));
-  if (attr.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid dest device pointer in cuda set value.")
-  }
-  int block_size = 256;
-  int num_blocks = (n + block_size - 1) / block_size;
-
-  // Launch the kernel
-  cuda_set_value_kernel<<<num_blocks, block_size, 0, *stream>>>(d_array, value,
-                                                                n);
-  check_cuda_error(cudaGetLastError());
-}
-
-/// Explicitly instantiate cuda_set_value_async for 32 and 64 bits
-template void cuda_set_value_async(cudaStream_t *stream, uint64_t *d_array,
-                                   uint64_t value, uint64_t n);
-template void cuda_set_value_async(cudaStream_t *stream, uint32_t *d_array,
-                                   uint32_t value, uint32_t n);
-
-/// Copy memory to the CPU asynchronously
-void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
-                              cuda_stream_t *stream) {
-  if (size == 0)
-    return;
-  cudaPointerAttributes attr;
-  check_cuda_error(cudaPointerGetAttributes(&attr, src));
-  if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
-    PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
-  }
-
-  check_cuda_error(cudaSetDevice(stream->gpu_index));
-  check_cuda_error(
-      cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream->stream));
-}
-
-/// Return number of GPUs available
-int cuda_get_number_of_gpus() {
-  int num_gpus;
-  check_cuda_error(cudaGetDeviceCount(&num_gpus));
-  return num_gpus;
-}
-
-/// Drop a cuda array
-void cuda_drop(void *ptr, uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
-  check_cuda_error(cudaFree(ptr));
-}
-
-/// Drop a cuda array asynchronously, if supported on the device
-void cuda_drop_async(void *ptr, cuda_stream_t *stream) {
-
-  check_cuda_error(cudaSetDevice(stream->gpu_index));
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#elif (CUDART_VERSION >= 11020)
-  int support_async_alloc;
-  check_cuda_error(cudaDeviceGetAttribute(&support_async_alloc,
-                                          cudaDevAttrMemoryPoolsSupported,
-                                          stream->gpu_index));
-
-  if (support_async_alloc) {
-    check_cuda_error(cudaFreeAsync(ptr, stream->stream));
-  } else {
-    check_cuda_error(cudaFree(ptr));
-  }
-#else
-  check_cuda_error(cudaFree(ptr));
-#endif
-}
-
-/// Get the maximum size for the shared memory
-int cuda_get_max_shared_memory(uint32_t gpu_index) {
-  check_cuda_error(cudaSetDevice(gpu_index));
-  int max_shared_memory = 0;
-  cudaDeviceGetAttribute(&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock,
-                         gpu_index);
-  check_cuda_error(cudaGetLastError());
-  return max_shared_memory;
-}
-
-void cuda_synchronize_stream(cuda_stream_t *stream) { stream->synchronize(); }
-
-void cuda_stream_add_callback(cuda_stream_t *stream,
-                              cudaStreamCallback_t callback, void *user_data) {
-
-  check_cuda_error(
-      cudaStreamAddCallback(stream->stream, callback, user_data, 0));
-}
-
-void host_free_on_stream_callback(cudaStream_t stream, cudaError_t status,
-                                  void *host_pointer) {
-  free(host_pointer);
-}
--- a/backends/tfhe-cuda-backend/cuda/src/fft/bnsmfft.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/fft/bnsmfft.cuh
@@ -1,725 +0,0 @@
-#ifndef GPU_BOOTSTRAP_FFT_CUH
-#define GPU_BOOTSTRAP_FFT_CUH
-
-#include "polynomial/functions.cuh"
-#include "polynomial/parameters.cuh"
-#include "twiddles.cuh"
-#include "types/complex/operations.cuh"
-
-/*
- * Direct negacyclic FFT:
- *   - before the FFT the N real coefficients are stored into a
- *     N/2 sized complex with the even coefficients in the real part
- *     and the odd coefficients in the imaginary part. This is referred to
- *     as the half-size FFT
- *   - when calling BNSMFFT_direct for the forward negacyclic FFT of PBS,
- *     opt is divided by 2 because the butterfly pattern is always applied
- *     between pairs of coefficients
- *   - instead of twisting each coefficient A_j before the FFT by
- *     multiplying by the w^j roots of unity (aka twiddles, w=exp(-i pi /N)),
- *     the FFT is modified, and for each level k of the FFT the twiddle:
- *     w_j,k = exp(-i pi j/2^k)
- *     is replaced with:
- *     \zeta_j,k = exp(-i pi (2j-1)/2^k)
- */
-template <class params> __device__ void NSMFFT_direct(double2 *A) {
-
-  /* We don't make bit reverse here, since twiddles are already reversed
-   *  Each thread is always in charge of "opt/2" pairs of coefficients,
-   *  which is why we always loop through N/2 by N/opt strides
-   *  The pragma unroll instruction tells the compiler to unroll the
-   *  full loop, which should increase performance
-   */
-
-  size_t tid = threadIdx.x;
-  size_t twid_id;
-  size_t i1, i2;
-  double2 u, v, w;
-  // level 1
-  // we don't make actual complex multiplication on level1 since we have only
-  // one twiddle, it's real and image parts are equal, so we can multiply
-  // it with simpler operations
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    i1 = tid;
-    i2 = tid + params::degree / 2;
-
-    u = A[i1];
-    v = A[i2] * (double2){0.707106781186547461715008466854,
-                          0.707106781186547461715008466854};
-
-    A[i1] += v;
-    A[i2] = u - v;
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 2
-  // from this level there are more than one twiddles and none of them has equal
-  // real and imag parts, so complete complex multiplication is needed
-  // for each level params::degree / 2^level represents number of coefficients
-  // inside divided chunk of specific level
-  //
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 4);
-    i1 = 2 * (params::degree / 4) * twid_id + (tid & (params::degree / 4 - 1));
-    i2 = i1 + params::degree / 4;
-
-    w = negtwiddles[twid_id + 2];
-    u = A[i1];
-    v = A[i2] * w;
-
-    A[i1] += v;
-    A[i2] = u - v;
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 3
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 8);
-    i1 = 2 * (params::degree / 8) * twid_id + (tid & (params::degree / 8 - 1));
-    i2 = i1 + params::degree / 8;
-
-    w = negtwiddles[twid_id + 4];
-    u = A[i1];
-    v = A[i2] * w;
-
-    A[i1] += v;
-    A[i2] = u - v;
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 4
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 16);
-    i1 =
-        2 * (params::degree / 16) * twid_id + (tid & (params::degree / 16 - 1));
-    i2 = i1 + params::degree / 16;
-
-    w = negtwiddles[twid_id + 8];
-    u = A[i1];
-    v = A[i2] * w;
-
-    A[i1] += v;
-    A[i2] = u - v;
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 5
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 32);
-    i1 =
-        2 * (params::degree / 32) * twid_id + (tid & (params::degree / 32 - 1));
-    i2 = i1 + params::degree / 32;
-
-    w = negtwiddles[twid_id + 16];
-    u = A[i1];
-    v = A[i2] * w;
-
-    A[i1] += v;
-    A[i2] = u - v;
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 6
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 64);
-    i1 =
-        2 * (params::degree / 64) * twid_id + (tid & (params::degree / 64 - 1));
-    i2 = i1 + params::degree / 64;
-
-    w = negtwiddles[twid_id + 32];
-    u = A[i1];
-    v = A[i2] * w;
-
-    A[i1] += v;
-    A[i2] = u - v;
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 7
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 128);
-    i1 = 2 * (params::degree / 128) * twid_id +
-         (tid & (params::degree / 128 - 1));
-    i2 = i1 + params::degree / 128;
-
-    w = negtwiddles[twid_id + 64];
-    u = A[i1];
-    v = A[i2] * w;
-
-    A[i1] += v;
-    A[i2] = u - v;
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // from level 8, we need to check size of params degree, because we support
-  // minimum actual polynomial size = 256,  when compressed size is halfed and
-  // minimum supported compressed size is 128, so we always need first 7
-  // levels of butterfly operation, since butterfly levels are hardcoded
-  // we need to check if polynomial size is big enough to require specific level
-  // of butterfly.
-  if constexpr (params::degree >= 256) {
-    // level 8
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 256);
-      i1 = 2 * (params::degree / 256) * twid_id +
-           (tid & (params::degree / 256 - 1));
-      i2 = i1 + params::degree / 256;
-
-      w = negtwiddles[twid_id + 128];
-      u = A[i1];
-      v = A[i2] * w;
-
-      A[i1] += v;
-      A[i2] = u - v;
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-
-  if constexpr (params::degree >= 512) {
-    // level 9
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 512);
-      i1 = 2 * (params::degree / 512) * twid_id +
-           (tid & (params::degree / 512 - 1));
-      i2 = i1 + params::degree / 512;
-
-      w = negtwiddles[twid_id + 256];
-      u = A[i1];
-      v = A[i2] * w;
-
-      A[i1] += v;
-      A[i2] = u - v;
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-
-  if constexpr (params::degree >= 1024) {
-    // level 10
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 1024);
-      i1 = 2 * (params::degree / 1024) * twid_id +
-           (tid & (params::degree / 1024 - 1));
-      i2 = i1 + params::degree / 1024;
-
-      w = negtwiddles[twid_id + 512];
-      u = A[i1];
-      v = A[i2] * w;
-
-      A[i1] += v;
-      A[i2] = u - v;
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-
-  if constexpr (params::degree >= 2048) {
-    // level 11
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 2048);
-      i1 = 2 * (params::degree / 2048) * twid_id +
-           (tid & (params::degree / 2048 - 1));
-      i2 = i1 + params::degree / 2048;
-
-      w = negtwiddles[twid_id + 1024];
-      u = A[i1];
-      v = A[i2] * w;
-
-      A[i1] += v;
-      A[i2] = u - v;
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-
-  if constexpr (params::degree >= 4096) {
-    // level 12
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 4096);
-      i1 = 2 * (params::degree / 4096) * twid_id +
-           (tid & (params::degree / 4096 - 1));
-      i2 = i1 + params::degree / 4096;
-
-      w = negtwiddles[twid_id + 2048];
-      u = A[i1];
-      v = A[i2] * w;
-
-      A[i1] += v;
-      A[i2] = u - v;
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-
-  // compressed size = 8192 is actual polynomial size = 16384.
-  // from this size, twiddles can't fit in constant memory,
-  // so from here, butterfly operation access device memory.
-  if constexpr (params::degree >= 8192) {
-    // level 13
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 8192);
-      i1 = 2 * (params::degree / 8192) * twid_id +
-           (tid & (params::degree / 8192 - 1));
-      i2 = i1 + params::degree / 8192;
-
-      w = negtwiddles13[twid_id];
-      u = A[i1];
-      v = A[i2] * w;
-
-      A[i1] += v;
-      A[i2] = u - v;
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-}
-
-/*
- * negacyclic inverse fft
- */
-template <class params> __device__ void NSMFFT_inverse(double2 *A) {
-
-  /* We don't make bit reverse here, since twiddles are already reversed
-   *  Each thread is always in charge of "opt/2" pairs of coefficients,
-   *  which is why we always loop through N/2 by N/opt strides
-   *  The pragma unroll instruction tells the compiler to unroll the
-   *  full loop, which should increase performance
-   */
-
-  size_t tid = threadIdx.x;
-  size_t twid_id;
-  size_t i1, i2;
-  double2 u, w;
-
-  // divide input by compressed polynomial size
-  tid = threadIdx.x;
-  for (size_t i = 0; i < params::opt; ++i) {
-    A[tid] /= params::degree;
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // none of the twiddles have equal real and imag part, so
-  // complete complex multiplication has to be done
-  // here we have more than one twiddle
-  // mapping in backward fft is reversed
-  // butterfly operation is started from last level
-
-  // compressed size = 8192 is actual polynomial size = 16384.
-  // twiddles for this size can't fit in constant memory so
-  // butterfly operation for this level access device memory to fetch
-  // twiddles
-  if constexpr (params::degree >= 8192) {
-    // level 13
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 8192);
-      i1 = 2 * (params::degree / 8192) * twid_id +
-           (tid & (params::degree / 8192 - 1));
-      i2 = i1 + params::degree / 8192;
-
-      w = negtwiddles13[twid_id];
-      u = A[i1] - A[i2];
-
-      A[i1] += A[i2];
-      A[i2] = u * conjugate(w);
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-
-  if constexpr (params::degree >= 4096) {
-    // level 12
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 4096);
-      i1 = 2 * (params::degree / 4096) * twid_id +
-           (tid & (params::degree / 4096 - 1));
-      i2 = i1 + params::degree / 4096;
-
-      w = negtwiddles[twid_id + 2048];
-      u = A[i1] - A[i2];
-
-      A[i1] += A[i2];
-      A[i2] = u * conjugate(w);
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-
-  if constexpr (params::degree >= 2048) {
-    // level 11
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 2048);
-      i1 = 2 * (params::degree / 2048) * twid_id +
-           (tid & (params::degree / 2048 - 1));
-      i2 = i1 + params::degree / 2048;
-
-      w = negtwiddles[twid_id + 1024];
-      u = A[i1] - A[i2];
-
-      A[i1] += A[i2];
-      A[i2] = u * conjugate(w);
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-
-  if constexpr (params::degree >= 1024) {
-    // level 10
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 1024);
-      i1 = 2 * (params::degree / 1024) * twid_id +
-           (tid & (params::degree / 1024 - 1));
-      i2 = i1 + params::degree / 1024;
-
-      w = negtwiddles[twid_id + 512];
-      u = A[i1] - A[i2];
-
-      A[i1] += A[i2];
-      A[i2] = u * conjugate(w);
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-
-  if constexpr (params::degree >= 512) {
-    // level 9
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 512);
-      i1 = 2 * (params::degree / 512) * twid_id +
-           (tid & (params::degree / 512 - 1));
-      i2 = i1 + params::degree / 512;
-
-      w = negtwiddles[twid_id + 256];
-      u = A[i1] - A[i2];
-
-      A[i1] += A[i2];
-      A[i2] = u * conjugate(w);
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-
-  if constexpr (params::degree >= 256) {
-    // level 8
-    tid = threadIdx.x;
-#pragma unroll
-    for (size_t i = 0; i < params::opt / 2; ++i) {
-      twid_id = tid / (params::degree / 256);
-      i1 = 2 * (params::degree / 256) * twid_id +
-           (tid & (params::degree / 256 - 1));
-      i2 = i1 + params::degree / 256;
-
-      w = negtwiddles[twid_id + 128];
-      u = A[i1] - A[i2];
-
-      A[i1] += A[i2];
-      A[i2] = u * conjugate(w);
-
-      tid += params::degree / params::opt;
-    }
-    __syncthreads();
-  }
-
-  // below level 8, we don't need to check size of params degree, because we
-  // support minimum actual polynomial size = 256,  when compressed size is
-  // halfed and minimum supported compressed size is 128, so we always need
-  // last 7 levels of butterfly operation, since butterfly levels are hardcoded
-  // we don't need to check if polynomial size is big enough to require
-  // specific level of butterfly.
-  // level 7
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 128);
-    i1 = 2 * (params::degree / 128) * twid_id +
-         (tid & (params::degree / 128 - 1));
-    i2 = i1 + params::degree / 128;
-
-    w = negtwiddles[twid_id + 64];
-    u = A[i1] - A[i2];
-
-    A[i1] += A[i2];
-    A[i2] = u * conjugate(w);
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 6
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 64);
-    i1 =
-        2 * (params::degree / 64) * twid_id + (tid & (params::degree / 64 - 1));
-    i2 = i1 + params::degree / 64;
-
-    w = negtwiddles[twid_id + 32];
-    u = A[i1] - A[i2];
-
-    A[i1] += A[i2];
-    A[i2] = u * conjugate(w);
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 5
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 32);
-    i1 =
-        2 * (params::degree / 32) * twid_id + (tid & (params::degree / 32 - 1));
-    i2 = i1 + params::degree / 32;
-
-    w = negtwiddles[twid_id + 16];
-    u = A[i1] - A[i2];
-
-    A[i1] += A[i2];
-    A[i2] = u * conjugate(w);
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 4
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 16);
-    i1 =
-        2 * (params::degree / 16) * twid_id + (tid & (params::degree / 16 - 1));
-    i2 = i1 + params::degree / 16;
-
-    w = negtwiddles[twid_id + 8];
-    u = A[i1] - A[i2];
-
-    A[i1] += A[i2];
-    A[i2] = u * conjugate(w);
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 3
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 8);
-    i1 = 2 * (params::degree / 8) * twid_id + (tid & (params::degree / 8 - 1));
-    i2 = i1 + params::degree / 8;
-
-    w = negtwiddles[twid_id + 4];
-    u = A[i1] - A[i2];
-
-    A[i1] += A[i2];
-    A[i2] = u * conjugate(w);
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 2
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 4);
-    i1 = 2 * (params::degree / 4) * twid_id + (tid & (params::degree / 4 - 1));
-    i2 = i1 + params::degree / 4;
-
-    w = negtwiddles[twid_id + 2];
-    u = A[i1] - A[i2];
-
-    A[i1] += A[i2];
-    A[i2] = u * conjugate(w);
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // level 1
-  tid = threadIdx.x;
-#pragma unroll
-  for (size_t i = 0; i < params::opt / 2; ++i) {
-    twid_id = tid / (params::degree / 2);
-    i1 = 2 * (params::degree / 2) * twid_id + (tid & (params::degree / 2 - 1));
-    i2 = i1 + params::degree / 2;
-
-    w = negtwiddles[twid_id + 1];
-    u = A[i1] - A[i2];
-
-    A[i1] += A[i2];
-    A[i2] = u * conjugate(w);
-
-    tid += params::degree / params::opt;
-  }
-  __syncthreads();
-}
-
-/*
- * global batch fft
- * does fft in half size
- * unrolling half size fft result in half size + 1 elements
- * this function must be called with actual degree
- * function takes as input already compressed input
- */
-template <class params, sharedMemDegree SMD>
-__global__ void batch_NSMFFT(double2 *d_input, double2 *d_output,
-                             double2 *buffer) {
-  extern __shared__ double2 sharedMemoryFFT[];
-  double2 *fft = (SMD == NOSM) ? &buffer[blockIdx.x * params::degree / 2]
-                               : sharedMemoryFFT;
-  int tid = threadIdx.x;
-
-#pragma unroll
-  for (int i = 0; i < params::opt / 2; i++) {
-    fft[tid] = d_input[blockIdx.x * (params::degree / 2) + tid];
-    tid = tid + params::degree / params::opt;
-  }
-  __syncthreads();
-  NSMFFT_direct<HalfDegree<params>>(fft);
-  __syncthreads();
-
-  tid = threadIdx.x;
-#pragma unroll
-  for (int i = 0; i < params::opt / 2; i++) {
-    d_output[blockIdx.x * (params::degree / 2) + tid] = fft[tid];
-    tid = tid + params::degree / params::opt;
-  }
-}
-
-/*
- * global batch polynomial multiplication
- * only used for fft tests
- * d_input1 and d_output must not have the same pointer
- * d_input1 can be modified inside the function
- */
-template <class params, sharedMemDegree SMD>
-__global__ void batch_polynomial_mul(double2 *d_input1, double2 *d_input2,
-                                     double2 *d_output, double2 *buffer) {
-  extern __shared__ double2 sharedMemoryFFT[];
-  double2 *fft = (SMD == NOSM) ? &buffer[blockIdx.x * params::degree / 2]
-                               : sharedMemoryFFT;
-
-  // Move first polynomial into shared memory(if possible otherwise it will
-  // be moved in device buffer)
-  int tid = threadIdx.x;
-#pragma unroll
-  for (int i = 0; i < params::opt / 2; i++) {
-    fft[tid] = d_input1[blockIdx.x * (params::degree / 2) + tid];
-    tid = tid + params::degree / params::opt;
-  }
-
-  // Perform direct negacyclic fourier transform
-  __syncthreads();
-  NSMFFT_direct<HalfDegree<params>>(fft);
-  __syncthreads();
-
-  // Put the result of direct fft inside input1
-  tid = threadIdx.x;
-#pragma unroll
-  for (int i = 0; i < params::opt / 2; i++) {
-    d_input1[blockIdx.x * (params::degree / 2) + tid] = fft[tid];
-    tid = tid + params::degree / params::opt;
-  }
-  __syncthreads();
-
-  // Move first polynomial into shared memory(if possible otherwise it will
-  // be moved in device buffer)
-  tid = threadIdx.x;
-#pragma unroll
-  for (int i = 0; i < params::opt / 2; i++) {
-    fft[tid] = d_input2[blockIdx.x * (params::degree / 2) + tid];
-    tid = tid + params::degree / params::opt;
-  }
-
-  // Perform direct negacyclic fourier transform on the second polynomial
-  __syncthreads();
-  NSMFFT_direct<HalfDegree<params>>(fft);
-  __syncthreads();
-
-  // calculate pointwise multiplication inside fft buffer
-  tid = threadIdx.x;
-#pragma unroll
-  for (int i = 0; i < params::opt / 2; i++) {
-    fft[tid] *= d_input1[blockIdx.x * (params::degree / 2) + tid];
-    tid = tid + params::degree / params::opt;
-  }
-
-  // Perform backward negacyclic fourier transform
-  __syncthreads();
-  NSMFFT_inverse<HalfDegree<params>>(fft);
-  __syncthreads();
-
-  // copy results in output buffer
-  tid = threadIdx.x;
-#pragma unroll
-  for (int i = 0; i < params::opt / 2; i++) {
-    d_output[blockIdx.x * (params::degree / 2) + tid] = fft[tid];
-    tid = tid + params::degree / params::opt;
-  }
-}
-
-#endif // GPU_BOOTSTRAP_FFT_CUH
--- a/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu
--- a/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cuh
@@ -1,13 +0,0 @@
-#ifndef GPU_BOOTSTRAP_TWIDDLES_CUH
-#define GPU_BOOTSTRAP_TWIDDLES_CUH
-
-/*
- * 'negtwiddles' are stored in constant memory for faster access times
- * because of it's limited size, only twiddles for up to 2^12 polynomial size
- * can be stored there, twiddles for 2^13 are stored in device memory
- * 'negtwiddles13'
- */
-
-extern __constant__ double2 negtwiddles[4096];
-extern __device__ double2 negtwiddles13[4096];
-#endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
@@ -1,51 +0,0 @@
-#include "integer/bitwise_ops.cuh"
-
-void scratch_cuda_integer_radix_bitop_kb_64(
-    cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t big_lwe_dimension,
-    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
-    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
-    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
-    uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
-    bool allocate_gpu_memory) {
-
-  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
-                          big_lwe_dimension, small_lwe_dimension, ks_level,
-                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
-                          message_modulus, carry_modulus);
-
-  scratch_cuda_integer_radix_bitop_kb<uint64_t>(
-      stream, (int_bitop_buffer<uint64_t> **)mem_ptr, lwe_ciphertext_count,
-      params, op_type, allocate_gpu_memory);
-}
-
-void cuda_bitop_integer_radix_ciphertext_kb_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_1,
-    void *lwe_array_2, int8_t *mem_ptr, void *bsk, void *ksk,
-    uint32_t lwe_ciphertext_count) {
-
-  host_integer_radix_bitop_kb<uint64_t>(
-      stream, static_cast<uint64_t *>(lwe_array_out),
-      static_cast<uint64_t *>(lwe_array_1),
-      static_cast<uint64_t *>(lwe_array_2),
-      (int_bitop_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
-      lwe_ciphertext_count);
-}
-
-void cuda_bitnot_integer_radix_ciphertext_kb_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
-    int8_t *mem_ptr, void *bsk, void *ksk, uint32_t lwe_ciphertext_count) {
-
-  host_integer_radix_bitnot_kb<uint64_t>(
-      stream, static_cast<uint64_t *>(lwe_array_out),
-      static_cast<uint64_t *>(lwe_array_in),
-      (int_bitop_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
-      lwe_ciphertext_count);
-}
-
-void cleanup_cuda_integer_bitop(cuda_stream_t *stream, int8_t **mem_ptr_void) {
-
-  int_bitop_buffer<uint64_t> *mem_ptr =
-      (int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
-  mem_ptr->release(stream);
-}
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
@@ -1,52 +0,0 @@
-#ifndef CUDA_INTEGER_BITWISE_OPS_CUH
-#define CUDA_INTEGER_BITWISE_OPS_CUH
-
-#include "crypto/keyswitch.cuh"
-#include "device.h"
-#include "integer.cuh"
-#include "integer.h"
-#include "pbs/programmable_bootstrap_classic.cuh"
-#include "pbs/programmable_bootstrap_multibit.cuh"
-#include "polynomial/functions.cuh"
-#include "utils/kernel_dimensions.cuh"
-#include <omp.h>
-
-template <typename Torus>
-__host__ void
-host_integer_radix_bitop_kb(cuda_stream_t *stream, Torus *lwe_array_out,
-                            Torus *lwe_array_1, Torus *lwe_array_2,
-                            int_bitop_buffer<Torus> *mem_ptr, void *bsk,
-                            Torus *ksk, uint32_t num_radix_blocks) {
-
-  auto lut = mem_ptr->lut;
-
-  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
-      stream, lwe_array_out, lwe_array_1, lwe_array_2, bsk, ksk,
-      num_radix_blocks, lut);
-}
-
-template <typename Torus>
-__host__ void
-host_integer_radix_bitnot_kb(cuda_stream_t *stream, Torus *lwe_array_out,
-                             Torus *lwe_array_in,
-                             int_bitop_buffer<Torus> *mem_ptr, void *bsk,
-                             Torus *ksk, uint32_t num_radix_blocks) {
-
-  auto lut = mem_ptr->lut;
-
-  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      stream, lwe_array_out, lwe_array_in, bsk, ksk, num_radix_blocks, lut);
-}
-
-template <typename Torus>
-__host__ void scratch_cuda_integer_radix_bitop_kb(
-    cuda_stream_t *stream, int_bitop_buffer<Torus> **mem_ptr,
-    uint32_t num_radix_blocks, int_radix_params params, BITOP_TYPE op,
-    bool allocate_gpu_memory) {
-
-  cudaSetDevice(stream->gpu_index);
-  *mem_ptr = new int_bitop_buffer<Torus>(stream, op, params, num_radix_blocks,
-                                         allocate_gpu_memory);
-}
-
-#endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
@@ -1,45 +0,0 @@
-#include "integer/cmux.cuh"
-
-void scratch_cuda_integer_radix_cmux_kb_64(
-    cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t big_lwe_dimension,
-    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
-    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
-    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
-    uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
-
-  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
-                          big_lwe_dimension, small_lwe_dimension, ks_level,
-                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
-                          message_modulus, carry_modulus);
-
-  std::function<uint64_t(uint64_t)> predicate_lut_f =
-      [](uint64_t x) -> uint64_t { return x == 1; };
-
-  scratch_cuda_integer_radix_cmux_kb(
-      stream, (int_cmux_buffer<uint64_t> **)mem_ptr, predicate_lut_f,
-      lwe_ciphertext_count, params, allocate_gpu_memory);
-}
-
-void cuda_cmux_integer_radix_ciphertext_kb_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_condition,
-    void *lwe_array_true, void *lwe_array_false, int8_t *mem_ptr, void *bsk,
-    void *ksk, uint32_t lwe_ciphertext_count) {
-
-  host_integer_radix_cmux_kb<uint64_t>(
-      stream, static_cast<uint64_t *>(lwe_array_out),
-      static_cast<uint64_t *>(lwe_condition),
-      static_cast<uint64_t *>(lwe_array_true),
-      static_cast<uint64_t *>(lwe_array_false),
-      (int_cmux_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
-
-      lwe_ciphertext_count);
-}
-
-void cleanup_cuda_integer_radix_cmux(cuda_stream_t *stream,
-                                     int8_t **mem_ptr_void) {
-
-  int_cmux_buffer<uint64_t> *mem_ptr =
-      (int_cmux_buffer<uint64_t> *)(*mem_ptr_void);
-  mem_ptr->release(stream);
-}
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
@@ -1,102 +0,0 @@
-#ifndef CUDA_INTEGER_CMUX_CUH
-#define CUDA_INTEGER_CMUX_CUH
-
-#include "integer.cuh"
-#include <omp.h>
-
-template <typename Torus>
-__host__ void zero_out_if(cuda_stream_t *stream, Torus *lwe_array_out,
-                          Torus *lwe_array_input, Torus *lwe_condition,
-                          int_zero_out_if_buffer<Torus> *mem_ptr,
-                          int_radix_lut<Torus> *predicate, void *bsk,
-                          Torus *ksk, uint32_t num_radix_blocks) {
-  cudaSetDevice(stream->gpu_index);
-  auto params = mem_ptr->params;
-
-  int big_lwe_size = params.big_lwe_dimension + 1;
-
-  // Left message is shifted
-  int num_blocks = 0, num_threads = 0;
-  int num_entries = (params.big_lwe_dimension + 1);
-  getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
-
-  // We can't use integer_radix_apply_bivariate_lookup_table_kb since the
-  // second operand is fixed
-  auto tmp_lwe_array_input = mem_ptr->tmp;
-  for (int i = 0; i < num_radix_blocks; i++) {
-    auto lwe_array_out_block = tmp_lwe_array_input + i * big_lwe_size;
-    auto lwe_array_input_block = lwe_array_input + i * big_lwe_size;
-
-    device_pack_bivariate_blocks<<<num_blocks, num_threads, 0,
-                                   stream->stream>>>(
-        lwe_array_out_block, predicate->lwe_indexes_in, lwe_array_input_block,
-        lwe_condition, predicate->lwe_indexes_in, params.big_lwe_dimension,
-        params.message_modulus, 1);
-    check_cuda_error(cudaGetLastError());
-  }
-
-  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      stream, lwe_array_out, tmp_lwe_array_input, bsk, ksk, num_radix_blocks,
-      predicate);
-}
-
-template <typename Torus>
-__host__ void
-host_integer_radix_cmux_kb(cuda_stream_t *stream, Torus *lwe_array_out,
-                           Torus *lwe_condition, Torus *lwe_array_true,
-                           Torus *lwe_array_false,
-                           int_cmux_buffer<Torus> *mem_ptr, void *bsk,
-                           Torus *ksk, uint32_t num_radix_blocks) {
-
-  auto params = mem_ptr->params;
-
-  // Since our CPU threads will be working on different streams we shall assert
-  // the work in the main stream is completed
-  stream->synchronize();
-  auto true_stream = mem_ptr->zero_if_true_buffer->local_stream;
-  auto false_stream = mem_ptr->zero_if_false_buffer->local_stream;
-
-#pragma omp parallel sections
-  {
-    // Both sections may be executed in parallel
-#pragma omp section
-    {
-      auto mem_true = mem_ptr->zero_if_true_buffer;
-      zero_out_if(true_stream, mem_ptr->tmp_true_ct, lwe_array_true,
-                  lwe_condition, mem_true, mem_ptr->inverted_predicate_lut, bsk,
-                  ksk, num_radix_blocks);
-    }
-#pragma omp section
-    {
-      auto mem_false = mem_ptr->zero_if_false_buffer;
-      zero_out_if(false_stream, mem_ptr->tmp_false_ct, lwe_array_false,
-                  lwe_condition, mem_false, mem_ptr->predicate_lut, bsk, ksk,
-                  num_radix_blocks);
-    }
-  }
-  cuda_synchronize_stream(true_stream);
-  cuda_synchronize_stream(false_stream);
-
-  // If the condition was true, true_ct will have kept its value and false_ct
-  // will be 0 If the condition was false, true_ct will be 0 and false_ct will
-  // have kept its value
-  auto added_cts = mem_ptr->tmp_true_ct;
-  host_addition(stream, added_cts, mem_ptr->tmp_true_ct, mem_ptr->tmp_false_ct,
-                params.big_lwe_dimension, num_radix_blocks);
-
-  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      stream, lwe_array_out, added_cts, bsk, ksk, num_radix_blocks,
-      mem_ptr->message_extract_lut);
-}
-
-template <typename Torus>
-__host__ void scratch_cuda_integer_radix_cmux_kb(
-    cuda_stream_t *stream, int_cmux_buffer<Torus> **mem_ptr,
-    std::function<Torus(Torus)> predicate_lut_f, uint32_t num_radix_blocks,
-    int_radix_params params, bool allocate_gpu_memory) {
-
-  cudaSetDevice(stream->gpu_index);
-  *mem_ptr = new int_cmux_buffer<Torus>(stream, predicate_lut_f, params,
-                                        num_radix_blocks, allocate_gpu_memory);
-}
-#endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
@@ -1,83 +0,0 @@
-#include "integer/comparison.cuh"
-
-void scratch_cuda_integer_radix_comparison_kb_64(
-    cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t big_lwe_dimension,
-    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
-    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
-    uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
-    PBS_TYPE pbs_type, COMPARISON_TYPE op_type, bool is_signed,
-    bool allocate_gpu_memory) {
-
-  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
-                          big_lwe_dimension, small_lwe_dimension, ks_level,
-                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
-                          message_modulus, carry_modulus);
-
-  switch (op_type) {
-  case EQ:
-  case NE:
-    scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
-        stream, (int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks,
-        params, op_type, false, allocate_gpu_memory);
-    break;
-  case GT:
-  case GE:
-  case LT:
-  case LE:
-  case MAX:
-  case MIN:
-    scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
-        stream, (int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks,
-        params, op_type, is_signed, allocate_gpu_memory);
-    break;
-  }
-}
-
-void cuda_comparison_integer_radix_ciphertext_kb_64(
-    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_1,
-    void *lwe_array_2, int8_t *mem_ptr, void *bsk, void *ksk,
-    uint32_t num_radix_blocks) {
-
-  int_comparison_buffer<uint64_t> *buffer =
-      (int_comparison_buffer<uint64_t> *)mem_ptr;
-  switch (buffer->op) {
-  case EQ:
-  case NE:
-    host_integer_radix_equality_check_kb<uint64_t>(
-        stream, static_cast<uint64_t *>(lwe_array_out),
-        static_cast<uint64_t *>(lwe_array_1),
-        static_cast<uint64_t *>(lwe_array_2), buffer, bsk,
-        static_cast<uint64_t *>(ksk), num_radix_blocks);
-    break;
-  case GT:
-  case GE:
-  case LT:
-  case LE:
-    host_integer_radix_difference_check_kb<uint64_t>(
-        stream, static_cast<uint64_t *>(lwe_array_out),
-        static_cast<uint64_t *>(lwe_array_1),
-        static_cast<uint64_t *>(lwe_array_2), buffer,
-        buffer->diff_buffer->operator_f, bsk, static_cast<uint64_t *>(ksk),
-        num_radix_blocks);
-    break;
-  case MAX:
-  case MIN:
-    host_integer_radix_maxmin_kb<uint64_t>(
-        stream, static_cast<uint64_t *>(lwe_array_out),
-        static_cast<uint64_t *>(lwe_array_1),
-        static_cast<uint64_t *>(lwe_array_2), buffer, bsk,
-        static_cast<uint64_t *>(ksk), num_radix_blocks);
-    break;
-  default:
-    PANIC("Cuda error: integer operation not supported")
-  }
-}
-
-void cleanup_cuda_integer_comparison(cuda_stream_t *stream,
-                                     int8_t **mem_ptr_void) {
-
-  int_comparison_buffer<uint64_t> *mem_ptr =
-      (int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
-  mem_ptr->release(stream);
-}
--- a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh
@@ -1,584 +0,0 @@
-#ifndef CUDA_INTEGER_COMPARISON_OPS_CUH
-#define CUDA_INTEGER_COMPARISON_OPS_CUH
-
-#include "crypto/keyswitch.cuh"
-#include "device.h"
-#include "integer.cuh"
-#include "integer.h"
-#include "integer/cmux.cuh"
-#include "integer/negation.cuh"
-#include "integer/scalar_addition.cuh"
-#include "pbs/programmable_bootstrap_classic.cuh"
-#include "pbs/programmable_bootstrap_multibit.cuh"
-#include "types/complex/operations.cuh"
-#include "utils/kernel_dimensions.cuh"
-
-// lwe_dimension + 1 threads
-// todo: This kernel MUST be refactored to a binary reduction
-template <typename Torus>
-__global__ void device_accumulate_all_blocks(Torus *output, Torus *input_block,
-                                             uint32_t lwe_dimension,
-                                             uint32_t num_blocks) {
-  int idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (idx < lwe_dimension + 1) {
-    auto block = &input_block[idx];
-
-    Torus sum = block[0];
-    for (int i = 1; i < num_blocks; i++) {
-      sum += block[i * (lwe_dimension + 1)];
-    }
-
-    output[idx] = sum;
-  }
-}
-
-template <typename Torus>
-__host__ void accumulate_all_blocks(cuda_stream_t *stream, Torus *output,
-                                    Torus *input, uint32_t lwe_dimension,
-                                    uint32_t num_radix_blocks) {
-
-  cudaSetDevice(stream->gpu_index);
-  int num_blocks = 0, num_threads = 0;
-  int num_entries = (lwe_dimension + 1);
-  getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
-  // Add all blocks and store in sum
-  device_accumulate_all_blocks<<<num_blocks, num_threads, 0, stream->stream>>>(
-      output, input, lwe_dimension, num_radix_blocks);
-  check_cuda_error(cudaGetLastError());
-}
-
-/* This takes an array of lwe ciphertexts, where each is an encryption of
- * either 0 or 1.
- *
- * It writes in lwe_array_out a single lwe ciphertext encrypting 1 if all input
- * blocks are 1 otherwise the block encrypts 0
- *
- */
-template <typename Torus>
-__host__ void
-are_all_comparisons_block_true(cuda_stream_t *stream, Torus *lwe_array_out,
-                               Torus *lwe_array_in,
-                               int_comparison_buffer<Torus> *mem_ptr, void *bsk,
-                               Torus *ksk, uint32_t num_radix_blocks) {
-
-  cudaSetDevice(stream->gpu_index);
-  auto params = mem_ptr->params;
-  auto big_lwe_dimension = params.big_lwe_dimension;
-  auto glwe_dimension = params.glwe_dimension;
-  auto polynomial_size = params.polynomial_size;
-  auto message_modulus = params.message_modulus;
-  auto carry_modulus = params.carry_modulus;
-
-  auto are_all_block_true_buffer =
-      mem_ptr->eq_buffer->are_all_block_true_buffer;
-  auto tmp_out = are_all_block_true_buffer->tmp_out;
-
-  uint32_t total_modulus = message_modulus * carry_modulus;
-  uint32_t max_value = total_modulus - 1;
-
-  cuda_memcpy_async_gpu_to_gpu(
-      tmp_out, lwe_array_in,
-      num_radix_blocks * (big_lwe_dimension + 1) * sizeof(Torus), stream);
-
-  uint32_t remaining_blocks = num_radix_blocks;
-
-  while (remaining_blocks > 0) {
-    // Split in max_value chunks
-    uint32_t chunk_length = std::min(max_value, remaining_blocks);
-    int num_chunks = remaining_blocks / chunk_length;
-
-    // Since all blocks encrypt either 0 or 1, we can sum max_value of them
-    // as in the worst case we will be adding `max_value` ones
-    auto input_blocks = tmp_out;
-    auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
-    for (int i = 0; i < num_chunks; i++) {
-      accumulate_all_blocks(stream, accumulator, input_blocks,
-                            big_lwe_dimension, chunk_length);
-
-      accumulator += (big_lwe_dimension + 1);
-      remaining_blocks -= (chunk_length - 1);
-      input_blocks += (big_lwe_dimension + 1) * chunk_length;
-    }
-    accumulator = are_all_block_true_buffer->tmp_block_accumulated;
-    auto is_equal_to_num_blocks_map =
-        &are_all_block_true_buffer->is_equal_to_lut_map;
-
-    // Selects a LUT
-    int_radix_lut<Torus> *lut;
-    if (are_all_block_true_buffer->op == COMPARISON_TYPE::NE) {
-      // is_non_zero_lut_buffer LUT
-      lut = mem_ptr->eq_buffer->is_non_zero_lut;
-    } else {
-      if ((*is_equal_to_num_blocks_map).find(chunk_length) !=
-          (*is_equal_to_num_blocks_map).end()) {
-        // The LUT is already computed
-        lut = (*is_equal_to_num_blocks_map)[chunk_length];
-      } else {
-        // LUT needs to be computed
-        auto new_lut = new int_radix_lut<Torus>(stream, params, max_value,
-                                                num_radix_blocks, true);
-
-        auto is_equal_to_num_blocks_lut_f = [max_value,
-                                             chunk_length](Torus x) -> Torus {
-          return (x & max_value) == chunk_length;
-        };
-        generate_device_accumulator<Torus>(
-            stream, new_lut->lut, glwe_dimension, polynomial_size,
-            message_modulus, carry_modulus, is_equal_to_num_blocks_lut_f);
-
-        (*is_equal_to_num_blocks_map)[chunk_length] = new_lut;
-        lut = new_lut;
-      }
-    }
-
-    // Applies the LUT
-    if (remaining_blocks == 1) {
-      // In the last iteration we copy the output to the final address
-      integer_radix_apply_univariate_lookup_table_kb<Torus>(
-          stream, lwe_array_out, accumulator, bsk, ksk, 1, lut);
-      return;
-    } else {
-      integer_radix_apply_univariate_lookup_table_kb<Torus>(
-          stream, tmp_out, accumulator, bsk, ksk, num_chunks, lut);
-    }
-  }
-}
-
-/* This takes an array of lwe ciphertexts, where each is an encryption of
- * either 0 or 1.
- *
- * It writes in lwe_array_out a single lwe ciphertext encrypting 1 if at least
- * one input ciphertext encrypts 1 otherwise encrypts 0
- */
-template <typename Torus>
-__host__ void is_at_least_one_comparisons_block_true(
-    cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_in,
-    int_comparison_buffer<Torus> *mem_ptr, void *bsk, Torus *ksk,
-    uint32_t num_radix_blocks) {
-  auto params = mem_ptr->params;
-  auto big_lwe_dimension = params.big_lwe_dimension;
-  auto message_modulus = params.message_modulus;
-  auto carry_modulus = params.carry_modulus;
-
-  auto buffer = mem_ptr->eq_buffer->are_all_block_true_buffer;
-
-  uint32_t total_modulus = message_modulus * carry_modulus;
-  uint32_t max_value = total_modulus - 1;
-
-  cuda_memcpy_async_gpu_to_gpu(
-      mem_ptr->tmp_lwe_array_out, lwe_array_in,
-      num_radix_blocks * (big_lwe_dimension + 1) * sizeof(Torus), stream);
-
-  uint32_t remaining_blocks = num_radix_blocks;
-  while (remaining_blocks > 0) {
-    // Split in max_value chunks
-    uint32_t chunk_length = std::min(max_value, remaining_blocks);
-    int num_chunks = remaining_blocks / chunk_length;
-
-    // Since all blocks encrypt either 0 or 1, we can sum max_value of them
-    // as in the worst case we will be adding `max_value` ones
-    auto input_blocks = mem_ptr->tmp_lwe_array_out;
-    auto accumulator = buffer->tmp_block_accumulated;
-    for (int i = 0; i < num_chunks; i++) {
-      accumulate_all_blocks(stream, accumulator, input_blocks,
-                            big_lwe_dimension, chunk_length);
-
-      accumulator += (big_lwe_dimension + 1);
-      remaining_blocks -= (chunk_length - 1);
-      input_blocks += (big_lwe_dimension + 1) * chunk_length;
-    }
-    accumulator = buffer->tmp_block_accumulated;
-
-    // Selects a LUT
-    int_radix_lut<Torus> *lut = mem_ptr->eq_buffer->is_non_zero_lut;
-
-    // Applies the LUT
-    if (remaining_blocks == 1) {
-      // In the last iteration we copy the output to the final address
-      integer_radix_apply_univariate_lookup_table_kb<Torus>(
-          stream, lwe_array_out, accumulator, bsk, ksk, 1, lut);
-      return;
-    } else {
-      integer_radix_apply_univariate_lookup_table_kb<Torus>(
-          stream, mem_ptr->tmp_lwe_array_out, accumulator, bsk, ksk, num_chunks,
-          lut);
-    }
-  }
-}
-
-// This takes an input slice of blocks.
-//
-// Each block can encrypt any value as long as its < message_modulus.
-//
-// It will compare blocks with 0, for either equality or difference.
-//
-// This returns a Vec of block, where each block encrypts 1 or 0
-// depending of if all blocks matched with the comparison type with 0.
-//
-// E.g. For ZeroComparisonType::Equality, if all input blocks are zero
-// than all returned block will encrypt 1
-//
-// The returned Vec will have less block than the number of input blocks.
-// The returned blocks potentially needs to be 'reduced' to one block
-// with eg are_all_comparisons_block_true.
-//
-// This function exists because sometimes it is faster to concatenate
-// multiple vec of 'boolean' shortint block before reducing them with
-// are_all_comparisons_block_true
-template <typename Torus>
-__host__ void host_compare_with_zero_equality(
-    cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_in,
-    int_comparison_buffer<Torus> *mem_ptr, void *bsk, Torus *ksk,
-    int32_t num_radix_blocks, int_radix_lut<Torus> *zero_comparison) {
-
-  cudaSetDevice(stream->gpu_index);
-  auto params = mem_ptr->params;
-  auto big_lwe_dimension = params.big_lwe_dimension;
-  auto message_modulus = params.message_modulus;
-  auto carry_modulus = params.carry_modulus;
-
-  // The idea is that we will sum chunks of blocks until carries are full
-  // then we compare the sum with 0.
-  //
-  // If all blocks were 0, the sum will be zero
-  // If at least one bock was not zero, the sum won't be zero
-  uint32_t total_modulus = message_modulus * carry_modulus;
-  uint32_t message_max = message_modulus - 1;
-
-  uint32_t num_elements_to_fill_carry = (total_modulus - 1) / message_max;
-
-  size_t big_lwe_size = big_lwe_dimension + 1;
-  size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
-
-  int num_sum_blocks = 0;
-  // Accumulator
-  auto sum = lwe_array_out;
-
-  if (num_radix_blocks == 1) {
-    // Just copy
-    cuda_memcpy_async_gpu_to_gpu(sum, lwe_array_in, big_lwe_size_bytes, stream);
-    num_sum_blocks = 1;
-  } else {
-    uint32_t remainder_blocks = num_radix_blocks;
-    auto sum_i = sum;
-    auto chunk = lwe_array_in;
-    while (remainder_blocks > 1) {
-      uint32_t chunk_size =
-          std::min(remainder_blocks, num_elements_to_fill_carry);
-
-      accumulate_all_blocks(stream, sum_i, chunk, big_lwe_dimension,
-                            chunk_size);
-
-      num_sum_blocks++;
-      remainder_blocks -= (chunk_size - 1);
-
-      // Update operands
-      chunk += (chunk_size - 1) * big_lwe_size;
-      sum_i += big_lwe_size;
-    }
-  }
-
-  integer_radix_apply_univariate_lookup_table_kb<Torus>(
-      stream, sum, sum, bsk, ksk, num_sum_blocks, zero_comparison);
-  are_all_comparisons_block_true(stream, lwe_array_out, sum, mem_ptr, bsk, ksk,
-                                 num_sum_blocks);
-}
-
-template <typename Torus>
-__host__ void host_integer_radix_equality_check_kb(
-    cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_1,
-    Torus *lwe_array_2, int_comparison_buffer<Torus> *mem_ptr, void *bsk,
-    Torus *ksk, uint32_t num_radix_blocks) {
-
-  cudaSetDevice(stream->gpu_index);
-  auto eq_buffer = mem_ptr->eq_buffer;
-
-  // Applies the LUT for the comparison operation
-  auto comparisons = mem_ptr->tmp_block_comparisons;
-  integer_radix_apply_bivariate_lookup_table_kb(
-      stream, comparisons, lwe_array_1, lwe_array_2, bsk, ksk, num_radix_blocks,
-      eq_buffer->operator_lut);
-
-  // This takes a Vec of blocks, where each block is either 0 or 1.
-  //
-  // It returns a block encrypting 1 if all input blocks are 1
-  // otherwise the block encrypts 0
-  are_all_comparisons_block_true(stream, lwe_array_out, comparisons, mem_ptr,
-                                 bsk, ksk, num_radix_blocks);
-}
-
-template <typename Torus>
-__host__ void
-compare_radix_blocks_kb(cuda_stream_t *stream, Torus *lwe_array_out,
-                        Torus *lwe_array_left, Torus *lwe_array_right,
-                        int_comparison_buffer<Torus> *mem_ptr, void *bsk,
-                        Torus *ksk, uint32_t num_radix_blocks) {
-
-  auto params = mem_ptr->params;
-  auto big_lwe_dimension = params.big_lwe_dimension;
-  auto message_modulus = params.message_modulus;
-  auto carry_modulus = params.carry_modulus;
-
-  // When rhs > lhs, the subtraction will overflow, and the bit of padding will
-  // be set to 1
-  // meaning that the output of the pbs will be the negative (modulo message
-  // space)
-  //
-  // Example:
-  // lhs: 1, rhs: 3, message modulus: 4, carry modulus 4
-  // lhs - rhs = -2 % (4 * 4) = 14 = 1|1110 (padding_bit|b4b3b2b1)
-  // Since there was an overflow the bit of padding is 1 and not 0.
-  // When applying the LUT for an input value of 14 we would expect 1,
-  // but since the bit of padding is 1, we will get -1 modulus our message
-  // space, so (-1) % (4 * 4) = 15 = 1|1111 We then add one and get 0 = 0|0000
-
-  // Subtract
-  // Here we need the true lwe sub, not the one that comes from shortint.
-  host_subtraction(stream, lwe_array_out, lwe_array_left, lwe_array_right,
-                   big_lwe_dimension, num_radix_blocks);
-
-  // Apply LUT to compare to 0
-  auto is_non_zero_lut = mem_ptr->eq_buffer->is_non_zero_lut;
-  integer_radix_apply_univariate_lookup_table_kb(
-      stream, lwe_array_out, lwe_array_out, bsk, ksk, num_radix_blocks,
-      is_non_zero_lut);
-
-  // Add one
-  // Here Lhs can have the following values: (-1) % (message modulus * carry
-  // modulus), 0, 1 So the output values after the addition will be: 0, 1, 2
-  host_integer_radix_add_scalar_one_inplace(stream, lwe_array_out,
-                                            big_lwe_dimension, num_radix_blocks,
-                                            message_modulus, carry_modulus);
-}
-
-// Reduces a vec containing shortint blocks that encrypts a sign
-// (inferior, equal, superior) to one single shortint block containing the
-// final sign
-template <typename Torus>
-__host__ void
-tree_sign_reduction(cuda_stream_t *stream, Torus *lwe_array_out,
-                    Torus *lwe_block_comparisons,
-                    int_tree_sign_reduction_buffer<Torus> *tree_buffer,
-                    std::function<Torus(Torus)> sign_handler_f, void *bsk,
-                    Torus *ksk, uint32_t num_radix_blocks) {
-
-  cudaSetDevice(stream->gpu_index);
-  auto params = tree_buffer->params;
-  auto big_lwe_dimension = params.big_lwe_dimension;
-  auto glwe_dimension = params.glwe_dimension;
-  auto polynomial_size = params.polynomial_size;
-  auto message_modulus = params.message_modulus;
-  auto carry_modulus = params.carry_modulus;
-
-  // Tree reduction
-  // Reduces a vec containing shortint blocks that encrypts a sign
-  // (inferior, equal, superior) to one single shortint block containing the
-  // final sign
-  size_t big_lwe_size = big_lwe_dimension + 1;
-  size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
-
-  auto x = tree_buffer->tmp_x;
-  auto y = tree_buffer->tmp_y;
-  if (x != lwe_block_comparisons)
-    cuda_memcpy_async_gpu_to_gpu(x, lwe_block_comparisons,
-                                 big_lwe_size_bytes * num_radix_blocks, stream);
-
-  uint32_t partial_block_count = num_radix_blocks;
-
-  auto inner_tree_leaf = tree_buffer->tree_inner_leaf_lut;
-  while (partial_block_count > 2) {
-    pack_blocks(stream, y, x, big_lwe_dimension, partial_block_count, 4);
-
-    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        stream, x, y, bsk, ksk, partial_block_count >> 1, inner_tree_leaf);
-
-    if ((partial_block_count % 2) != 0) {
-      partial_block_count >>= 1;
-      partial_block_count++;
-
-      auto last_y_block = y + (partial_block_count - 1) * big_lwe_size;
-      auto last_x_block = x + (partial_block_count - 1) * big_lwe_size;
-
-      cuda_memcpy_async_gpu_to_gpu(last_x_block, last_y_block,
-                                   big_lwe_size_bytes, stream);
-    } else {
-      partial_block_count >>= 1;
-    }
-  }
-
-  auto last_lut = tree_buffer->tree_last_leaf_lut;
-  auto block_selector_f = tree_buffer->block_selector_f;
-  std::function<Torus(Torus)> f;
-
-  if (partial_block_count == 2) {
-    pack_blocks(stream, y, x, big_lwe_dimension, partial_block_count, 4);
-
-    f = [block_selector_f, sign_handler_f](Torus x) -> Torus {
-      int msb = (x >> 2) & 3;
-      int lsb = x & 3;
-
-      int final_sign = block_selector_f(msb, lsb);
-      return sign_handler_f(final_sign);
-    };
-  } else {
-    // partial_block_count == 1
-    y = x;
-    f = sign_handler_f;
-  }
-  generate_device_accumulator<Torus>(stream, last_lut->lut, glwe_dimension,
-                                     polynomial_size, message_modulus,
-                                     carry_modulus, f);
-
-  // Last leaf
-  integer_radix_apply_univariate_lookup_table_kb(stream, lwe_array_out, y, bsk,
-                                                 ksk, 1, last_lut);
-}
-
-template <typename Torus>
-__host__ void host_integer_radix_difference_check_kb(
-    cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_left,
-    Torus *lwe_array_right, int_comparison_buffer<Torus> *mem_ptr,
-    std::function<Torus(Torus)> reduction_lut_f, void *bsk, Torus *ksk,
-    uint32_t num_radix_blocks) {
-
-  cudaSetDevice(stream->gpu_index);
-  auto diff_buffer = mem_ptr->diff_buffer;
-
-  auto params = mem_ptr->params;
-  auto big_lwe_dimension = params.big_lwe_dimension;
-  auto big_lwe_size = big_lwe_dimension + 1;
-  auto message_modulus = params.message_modulus;
-  auto carry_modulus = params.carry_modulus;
-
-  uint32_t packed_num_radix_blocks = num_radix_blocks;
-  auto lhs = lwe_array_left;
-  auto rhs = lwe_array_right;
-  if (carry_modulus >= message_modulus) {
-    // Packing is possible
-    // Pack inputs
-    Torus *packed_left = diff_buffer->tmp_packed_left;
-    Torus *packed_right = diff_buffer->tmp_packed_right;
-    // In case the ciphertext is signed, the sign block and the one before it
-    // are handled separately
-    if (mem_ptr->is_signed) {
-      packed_num_radix_blocks -= 2;
-    }
-    pack_blocks(stream, packed_left, lwe_array_left, big_lwe_dimension,
-                packed_num_radix_blocks, message_modulus);
-    pack_blocks(stream, packed_right, lwe_array_right, big_lwe_dimension,
-                packed_num_radix_blocks, message_modulus);
-    // From this point we have half number of blocks
-    packed_num_radix_blocks /= 2;
-
-    // Clean noise
-    auto identity_lut = mem_ptr->identity_lut;
-    integer_radix_apply_univariate_lookup_table_kb(
-        stream, packed_left, packed_left, bsk, ksk, packed_num_radix_blocks,
-        identity_lut);
-    integer_radix_apply_univariate_lookup_table_kb(
-        stream, packed_right, packed_right, bsk, ksk, packed_num_radix_blocks,
-        identity_lut);
-
-    lhs = packed_left;
-    rhs = packed_right;
-  }
-
-  // comparisons will be assigned
-  // - 0 if lhs < rhs
-  // - 1 if lhs == rhs
-  // - 2 if lhs > rhs
-  auto comparisons = mem_ptr->tmp_block_comparisons;
-  auto num_comparisons = 0;
-  if (!mem_ptr->is_signed) {
-    // Compare packed blocks, or simply the total number of radix blocks in the
-    // inputs
-    compare_radix_blocks_kb(stream, comparisons, lhs, rhs, mem_ptr, bsk, ksk,
-                            packed_num_radix_blocks);
-    num_comparisons = packed_num_radix_blocks;
-  } else {
-    // Packing is possible
-    if (carry_modulus >= message_modulus) {
-      // Compare (num_radix_blocks - 2) / 2 packed blocks
-      compare_radix_blocks_kb(stream, comparisons, lhs, rhs, mem_ptr, bsk, ksk,
-                              packed_num_radix_blocks);
-
-      // Compare the last block before the sign block separately
-      auto identity_lut = mem_ptr->identity_lut;
-      Torus *last_left_block_before_sign_block =
-          diff_buffer->tmp_packed_left + packed_num_radix_blocks * big_lwe_size;
-      Torus *last_right_block_before_sign_block =
-          diff_buffer->tmp_packed_right +
-          packed_num_radix_blocks * big_lwe_size;
-      integer_radix_apply_univariate_lookup_table_kb(
-          stream, last_left_block_before_sign_block,
-          lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsk, ksk, 1,
-          identity_lut);
-      integer_radix_apply_univariate_lookup_table_kb(
-          stream, last_right_block_before_sign_block,
-          lwe_array_right + (num_radix_blocks - 2) * big_lwe_size, bsk, ksk, 1,
-          identity_lut);
-      compare_radix_blocks_kb(
-          stream, comparisons + packed_num_radix_blocks * big_lwe_size,
-          last_left_block_before_sign_block, last_right_block_before_sign_block,
-          mem_ptr, bsk, ksk, 1);
-      // Compare the sign block separately
-      integer_radix_apply_bivariate_lookup_table_kb(
-          stream, comparisons + (packed_num_radix_blocks + 1) * big_lwe_size,
-          lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
-          lwe_array_right + (num_radix_blocks - 1) * big_lwe_size, bsk, ksk, 1,
-          mem_ptr->signed_lut);
-      num_comparisons = packed_num_radix_blocks + 2;
-
-    } else {
-      compare_radix_blocks_kb(stream, comparisons, lwe_array_left,
-                              lwe_array_right, mem_ptr, bsk, ksk,
-                              num_radix_blocks - 1);
-      // Compare the sign block separately
-      integer_radix_apply_bivariate_lookup_table_kb(
-          stream, comparisons + (num_radix_blocks - 1) * big_lwe_size,
-          lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
-          lwe_array_right + (num_radix_blocks - 1) * big_lwe_size, bsk, ksk, 1,
-          mem_ptr->signed_lut);
-      num_comparisons = num_radix_blocks;
-    }
-  }
-
-  // Reduces a vec containing radix blocks that encrypts a sign
-  // (inferior, equal, superior) to one single radix block containing the
-  // final sign
-  tree_sign_reduction(stream, lwe_array_out, comparisons,
-                      mem_ptr->diff_buffer->tree_buffer, reduction_lut_f, bsk,
-                      ksk, num_comparisons);
-}
-
-template <typename Torus>
-__host__ void scratch_cuda_integer_radix_comparison_check_kb(
-    cuda_stream_t *stream, int_comparison_buffer<Torus> **mem_ptr,
-    uint32_t num_radix_blocks, int_radix_params params, COMPARISON_TYPE op,
-    bool is_signed, bool allocate_gpu_memory) {
-
-  cudaSetDevice(stream->gpu_index);
-  *mem_ptr = new int_comparison_buffer<Torus>(
-      stream, op, params, num_radix_blocks, is_signed, allocate_gpu_memory);
-}
-
-template <typename Torus>
-__host__ void
-host_integer_radix_maxmin_kb(cuda_stream_t *stream, Torus *lwe_array_out,
-                             Torus *lwe_array_left, Torus *lwe_array_right,
-                             int_comparison_buffer<Torus> *mem_ptr, void *bsk,
-                             Torus *ksk, uint32_t total_num_radix_blocks) {
-
-  cudaSetDevice(stream->gpu_index);
-  // Compute the sign
-  host_integer_radix_difference_check_kb(
-      stream, mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right,
-      mem_ptr, mem_ptr->identity_lut_f, bsk, ksk, total_num_radix_blocks);
-
-  // Selector
-  host_integer_radix_cmux_kb(
-      stream, lwe_array_out, mem_ptr->tmp_lwe_array_out, lwe_array_left,
-      lwe_array_right, mem_ptr->cmux_buffer, bsk, ksk, total_num_radix_blocks);
-}
-
-#endif
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
J-B Orfila	d220008757	(artifact_tches): Cleaning	2025-05-14 11:44:32 +02:00
Loris	b1b55b6426	Typos	2025-02-03 18:20:02 +01:00
Loris	77bea74ac9	(float) Tches Artifact 2025 Co-authored-by: Loris Bergerat <loris.bergerat@zama.ai>	2025-01-31 20:04:58 +01:00