f

2026-01-11 07:38:08 -05:00 · 2024-02-26 14:32:44 +01:00 · 2024-02-26 14:06:29 +01:00 · 2024-02-26 11:19:10 +01:00 · 2024-02-23 19:00:26 +01:00 · 2024-02-23 18:58:05 +01:00
785 changed files with 128021 additions and 31404 deletions
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -5,13 +5,3 @@ failure-output = "final"
 fail-fast = false
 retries = 0
 slow-timeout = "5m"
-
-
-[[profile.ci.overrides]]
-filter = 'test(/^.*param_message_1_carry_[567]_ks_pbs$/) or test(/^.*param_message_4_carry_4_ks_pbs$/)'
-retries = 3
-
-[[profile.ci.overrides]]
-filter = 'test(/^.*param_message_[23]_carry_[23]_ks_pbs$/)'
-retries = 1
-
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,6 +1,6 @@
 ---
 name: Bug report
-about: Report a problem with concrete
+about: Report a problem with TFHE-rs
 title: ''
 labels: triage_required
 assignees: ''
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,6 +1,6 @@
 ---
 name: Feature request
-about: Suggest an idea for concrete
+about: Suggest an idea for TFHE-rs
 title: ''
 labels: feature_request
 assignees: ''
--- a/.github/workflows/approve_label.yml
+++ b/.github/workflows/approve_label.yml
@@ -0,0 +1,34 @@
+# Manage approved label in pull request
+name: PR approved label manager
+
+on:
+  pull_request:
+  pull_request_review:
+    types: [submitted]
+
+jobs:
+  trigger-tests:
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+    steps:
+      - name: Get current labels
+        uses: snnaplab/get-labels-action@f426df40304808ace3b5282d4f036515f7609576
+
+      # Remove label if a push is performed after an approval
+      - name: Remove approved label
+        if: ${{ github.event_name == 'pull_request' && contains(fromJSON(env.LABELS), 'approved') }}
+        uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
+        with:
+          # We use a PAT to have the same user (zama-bot) for label deletion as for creation.
+          github_token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+          labels: approved
+
+      # Add label only if the review is approved and if the label doesn't already exist
+      - name: Add approved label
+        uses: actions-ecosystem/action-add-labels@18f1af5e3544586314bbe15c0273249c770b2daf
+        if: ${{ github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && !contains(fromJSON(env.LABELS), 'approved') }}
+        with:
+          # We need to use a PAT to be able to trigger `labeled` event for the other workflow.
+          github_token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+          labels: approved
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -5,66 +5,59 @@ env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
-    # All the inputs are provided by Slab
-    inputs:
-      instance_id:
-        description: "AWS instance ID"
-        type: string
-      instance_image_id:
-        description: "AWS instance AMI ID"
-        type: string
-      instance_type:
-        description: "AWS instance product type"
-        type: string
-      runner_name:
-        description: "Action runner name"
-        type: string
-      request_id:
-        description: 'Slab request ID'
-        type: string
-      fork_repo:
-        description: 'Name of forked repo as user/repo'
-        type: string
-      fork_git_sha:
-        description: 'Git SHA to checkout from fork'
-        type: string
+  pull_request:

 jobs:
-  fast-tests:
-    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
-      cancel-in-progress: true
-    runs-on: ${{ inputs.runner_name }}
+  setup-ec2:
+    name: Setup EC2 instance (fast-tests)
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+      instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
+      aws-region: ${{ steps.start-instance.outputs.aws-region }}
    steps:
-      # Step used for log purpose.
-      - name: Instance configuration used
-        run: |
-          echo "ID: ${{ inputs.instance_id }}"
-          echo "AMI: ${{ inputs.instance_image_id }}"
-          echo "Type: ${{ inputs.instance_type }}"
-          echo "Request ID: ${{ inputs.request_id }}"
-          echo "Fork repo: ${{ inputs.fork_repo }}"
-          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
-
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
        with:
-          repository: ${{ inputs.fork_repo }}
-          ref: ${{ inputs.fork_git_sha }}
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          profile: cpu-big
+
+  fast-tests:
+    name: Fast CPU tests
+    needs: setup-ec2
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: true
+    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11

      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: stable
-          default: true
+
+      - name: Run concrete-csprng tests
+        run: |
+          make test_concrete_csprng

      - name: Run core tests
        run: |
@@ -106,14 +99,39 @@ jobs:
        run: |
          make test_high_level_api

+      - name: Run safe deserialization tests
+        run: |
+          make test_safe_deserialization
+
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+  teardown-ec2:
+    name: Teardown EC2 instance (fast-tests)
+    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
+    needs: [ setup-ec2, fast-tests ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          region: ${{ needs.setup-ec2.outputs.aws-region }}
+          label: ${{ needs.setup-ec2.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "EC2 teardown (fast-tests) failed. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_gpu_4090_tests.yml
+++ b/.github/workflows/aws_tfhe_gpu_4090_tests.yml
@@ -0,0 +1,70 @@
+# Compile and test tfhe-cuda-backend on an RTX 4090 machine
+name: TFHE Cuda Backend - 4090 full tests
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+  pull_request:
+    types: [labeled]
+
+jobs:
+  cuda-tests-linux:
+    name: CUDA tests (RTX 4090)
+    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, '4090_test') }}
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: true
+    runs-on: ["self-hosted", "4090-desktop"]
+
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: stable
+
+      - name: Run fmt checks
+        run: |
+          make check_fmt_gpu
+
+      - name: Run clippy checks
+        run: |
+          make clippy_gpu
+
+      - name: Run all tests
+        run: |
+          make test_gpu
+
+      - name: Run user docs tests
+        run: |
+          make test_user_doc_gpu
+
+      - name: Test C API
+        run: |
+          make test_c_api_gpu
+
+      - uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
+        if: ${{ github.event_name == 'pull_request' }}
+        with:
+          labels: 4090_test
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Slack Notification
+        if: ${{ always() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_gpu_tests.yml
+++ b/.github/workflows/aws_tfhe_gpu_tests.yml
@@ -0,0 +1,138 @@
+# Compile and test tfhe-cuda-backend on an AWS instance
+name: TFHE Cuda Backend - Full tests
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+  pull_request:
+
+jobs:
+  setup-ec2:
+    name: Setup EC2 instance (cuda-tests)
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+      instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
+      aws-region: ${{ steps.start-instance.outputs.aws-region }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          profile: gpu-test
+
+  cuda-tests-linux:
+    name: CUDA tests
+    needs: setup-ec2
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.2"
+            gcc: 9
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: stable
+
+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Run fmt checks
+        run: |
+          make check_fmt_gpu
+
+      - name: Run clippy checks
+        run: |
+          make clippy_gpu
+
+      - name: Run all tests
+        run: |
+          make test_gpu
+
+      - name: Run user docs tests
+        run: |
+          make test_user_doc_gpu
+
+      - name: Test C API
+        run: |
+          make test_c_api_gpu
+
+
+      - name: Slack Notification
+        if: ${{ always() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "CUDA AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-ec2:
+    name: Teardown EC2 instance (cuda-tests)
+    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
+    needs: [ setup-ec2, cuda-tests-linux ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          region: ${{ needs.setup-ec2.outputs.aws-region }}
+          label: ${{ needs.setup-ec2.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "EC2 teardown (cuda-tests) failed. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -1,86 +1,106 @@
-name: AWS Integer Tests on CPU
+name: AWS Unsigned Integer Tests on CPU

 env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
-    # All the inputs are provided by Slab
-    inputs:
-      instance_id:
-        description: "AWS instance ID"
-        type: string
-      instance_image_id:
-        description: "AWS instance AMI ID"
-        type: string
-      instance_type:
-        description: "AWS instance product type"
-        type: string
-      runner_name:
-        description: "Action runner name"
-        type: string
-      request_id:
-        description: 'Slab request ID'
-        type: string
-      fork_repo:
-        description: 'Name of forked repo as user/repo'
-        type: string
-      fork_git_sha:
-        description: 'Git SHA to checkout from fork'
-        type: string
+  pull_request:
+    types: [ labeled ]

 jobs:
-  integer-tests:
-    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
-      cancel-in-progress: true
-    runs-on: ${{ inputs.runner_name }}
+  setup-ec2:
+    name: Setup EC2 instance (unsigned-integer-tests)
+    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+      instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
+      aws-region: ${{ steps.start-instance.outputs.aws-region }}
    steps:
-      # Step used for log purpose.
-      - name: Instance configuration used
-        run: |
-          echo "ID: ${{ inputs.instance_id }}"
-          echo "AMI: ${{ inputs.instance_image_id }}"
-          echo "Type: ${{ inputs.instance_type }}"
-          echo "Request ID: ${{ inputs.request_id }}"
-          echo "Fork repo: ${{ inputs.fork_repo }}"
-          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
-
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
        with:
-          repository: ${{ inputs.fork_repo }}
-          ref: ${{ inputs.fork_git_sha }}
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          profile: cpu-big
+
+  unsigned-integer-tests:
+    name: Unsigned integer tests
+    needs: setup-ec2
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: true
+    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11

      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: stable
-          default: true
+
+      - name: Gen Keys if required
+        run: |
+          make GEN_KEY_CACHE_MULTI_BIT_ONLY=TRUE gen_key_cache
+
+      - name: Run unsigned integer multi-bit tests
+        run: |
+          AVX512_SUPPORT=ON make test_unsigned_integer_multi_bit_ci

      - name: Gen Keys if required
        run: |
          make gen_key_cache

-      - name: Run integer tests
+      - name: Run unsigned integer tests
        run: |
-          BIG_TESTS_INSTANCE=TRUE make test_integer_ci
+          AVX512_SUPPORT=ON BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_ci

      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+          SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-ec2:
+    name: Teardown EC2 instance (unsigned-integer-tests)
+    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
+    needs: [ setup-ec2, unsigned-integer-tests ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          region: ${{ needs.setup-ec2.outputs.aws-region }}
+          label: ${{ needs.setup-ec2.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "EC2 teardown (unsigned-integer-tests) failed. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -0,0 +1,110 @@
+name: AWS Signed Integer Tests on CPU
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+  pull_request:
+    types: [ labeled ]
+
+jobs:
+  setup-ec2:
+    name: Setup EC2 instance (signed-integer-tests)
+    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+      instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
+      aws-region: ${{ steps.start-instance.outputs.aws-region }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          profile: cpu-big
+
+  signed-integer-tests:
+    name: Signed integer tests
+    needs: setup-ec2
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: true
+    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: stable
+
+      - name: Gen Keys if required
+        run: |
+          make GEN_KEY_CACHE_MULTI_BIT_ONLY=TRUE gen_key_cache
+
+      - name: Run shortint multi-bit tests
+        run: |
+          make test_shortint_multi_bit_ci
+
+      - name: Run signed integer multi-bit tests
+        run: |
+          AVX512_SUPPORT=ON make test_signed_integer_multi_bit_ci
+
+      - name: Gen Keys if required
+        run: |
+          make gen_key_cache
+
+      - name: Run signed integer tests
+        run: |
+          AVX512_SUPPORT=ON BIG_TESTS_INSTANCE=TRUE make test_signed_integer_ci
+
+      - name: Slack Notification
+        if: ${{ always() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-ec2:
+    name: Teardown EC2 instance (signed-integer-tests)
+    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
+    needs: [ setup-ec2, signed-integer-tests ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          region: ${{ needs.setup-ec2.outputs.aws-region }}
+          label: ${{ needs.setup-ec2.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "EC2 teardown (signed-integer-tests) failed. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -4,66 +4,61 @@ env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
-    # All the inputs are provided by Slab
-    inputs:
-      instance_id:
-        description: "AWS instance ID"
-        type: string
-      instance_image_id:
-        description: "AWS instance AMI ID"
-        type: string
-      instance_type:
-        description: "AWS instance product type"
-        type: string
-      runner_name:
-        description: "Action runner name"
-        type: string
-      request_id:
-        description: 'Slab request ID'
-        type: string
-      fork_repo:
-        description: 'Name of forked repo as user/repo'
-        type: string
-      fork_git_sha:
-        description: 'Git SHA to checkout from fork'
-        type: string
+  pull_request:
+    types: [ labeled ]

 jobs:
-  shortint-tests:
-    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
-      cancel-in-progress: true
-    runs-on: ${{ inputs.runner_name }}
+  setup-ec2:
+    name: Setup EC2 instance (cpu-tests)
+    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+      instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
+      aws-region: ${{ steps.start-instance.outputs.aws-region }}
    steps:
-      # Step used for log purpose.
-      - name: Instance configuration used
-        run: |
-          echo "ID: ${{ inputs.instance_id }}"
-          echo "AMI: ${{ inputs.instance_image_id }}"
-          echo "Type: ${{ inputs.instance_type }}"
-          echo "Request ID: ${{ inputs.request_id }}"
-          echo "Fork repo: ${{ inputs.fork_repo }}"
-          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
-
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
        with:
-          repository: ${{ inputs.fork_repo }}
-          ref: ${{ inputs.fork_git_sha }}
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          profile: cpu-big
+
+  cpu-tests:
+    name: CPU tests
+    needs: setup-ec2
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: true
+    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11

      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: stable
-          default: true
+
+      - name: Run concrete-csprng tests
+        run: |
+          make test_concrete_csprng

      - name: Run core tests
        run: |
@@ -96,15 +91,42 @@ jobs:
      - name: Run example tests
        run: |
          make test_examples
+          make dark_market
+
+      - name: Run apps tests
+        run: |
+          make test_trivium
+          make test_kreyvium

      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Shortint tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+          SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-ec2:
+    name: Teardown EC2 instance (cpu-tests)
+    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
+    needs: [ setup-ec2, cpu-tests ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          region: ${{ needs.setup-ec2.outputs.aws-region }}
+          label: ${{ needs.setup-ec2.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "EC2 teardown (cpu-tests) failed. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -4,66 +4,57 @@ env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
-    # All the inputs are provided by Slab
-    inputs:
-      instance_id:
-        description: "AWS instance ID"
-        type: string
-      instance_image_id:
-        description: "AWS instance AMI ID"
-        type: string
-      instance_type:
-        description: "AWS instance product type"
-        type: string
-      runner_name:
-        description: "Action runner name"
-        type: string
-      request_id:
-        description: 'Slab request ID'
-        type: string
-      fork_repo:
-        description: 'Name of forked repo as user/repo'
-        type: string
-      fork_git_sha:
-        description: 'Git SHA to checkout from fork'
-        type: string
+  pull_request:
+    types: [ labeled ]

 jobs:
-  wasm-tests:
-    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
-      cancel-in-progress: true
-    runs-on: ${{ inputs.runner_name }}
+  setup-ec2:
+    name: Setup EC2 instance (wasm-tests)
+    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+      instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
+      aws-region: ${{ steps.start-instance.outputs.aws-region }}
    steps:
-      # Step used for log purpose.
-      - name: Instance configuration used
-        run: |
-          echo "ID: ${{ inputs.instance_id }}"
-          echo "AMI: ${{ inputs.instance_image_id }}"
-          echo "Type: ${{ inputs.instance_type }}"
-          echo "Request ID: ${{ inputs.request_id }}"
-          echo "Fork repo: ${{ inputs.fork_repo }}"
-          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
-
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
        with:
-          repository: ${{ inputs.fork_repo }}
-          ref: ${{ inputs.fork_git_sha }}
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          profile: cpu-small
+
+  wasm-tests:
+    name: WASM tests
+    needs: setup-ec2
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: true
+    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11

      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: stable
-          default: true

      - name: Run js on wasm API tests
        run: |
@@ -77,11 +68,32 @@ jobs:
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
          SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
-          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+  teardown-ec2:
+    name: Teardown EC2 instance (wasm-tests)
+    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
+    needs: [ setup-ec2, wasm-tests ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          region: ${{ needs.setup-ec2.outputs.aws-region }}
+          label: ${{ needs.setup-ec2.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "EC2 teardown (wasm-tests) failed. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/boolean_benchmark.yml
+++ b/.github/workflows/boolean_benchmark.yml
@@ -19,11 +19,20 @@ on:
      request_id:
        description: "Slab request ID"
        type: string
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"

 env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"

 jobs:
  run-boolean-benchmarks:
@@ -43,7 +52,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -53,14 +62,13 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: nightly
-          override: true

      - name: Run benchmarks with AVX512
        run: |
-          make AVX512_SUPPORT=ON bench_boolean
+          make bench_boolean

      - name: Parse results
        run: |
@@ -88,13 +96,13 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
        with:
          name: ${{ github.sha }}_boolean
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
@@ -117,7 +125,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/cargo_build.yml
+++ b/.github/workflows/cargo_build.yml
@@ -6,6 +6,7 @@ on:
 env:
  CARGO_TERM_COLOR: always
  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"

 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref }}
@@ -17,16 +18,30 @@ jobs:

    strategy:
      matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-latest-large, windows-latest]
      fail-fast: false

    steps:
-      - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+
+      - name: Install and run newline linter checks
+        if: matrix.os == 'ubuntu-latest'
+        run: |
+          wget https://github.com/fernandrone/linelint/releases/download/0.0.6/linelint-linux-amd64
+          echo "16b70fb7b471d6f95cbdc0b4e5dc2b0ac9e84ba9ecdc488f7bdf13df823aca4b linelint-linux-amd64" > checksum
+          sha256sum -c checksum || exit 1
+          chmod +x linelint-linux-amd64
+          mv linelint-linux-amd64 /usr/local/bin/linelint
+          make check_newline

      - name: Run pcc checks
        run: |
          make pcc

+      - name: Build concrete-csprng
+        run: |
+          make build_concrete_csprng
+
      - name: Build Release core
        run: |
          make build_core AVX512_SUPPORT=ON
--- a/.github/workflows/check_commit.yml
+++ b/.github/workflows/check_commit.yml
@@ -10,7 +10,7 @@ jobs:
      - name: Check first line
        uses: gsactions/commit-message-checker@16fa2d5de096ae0d35626443bcd24f1e756cafee
        with:
-          pattern: '^((feat|fix|chore|refactor|style|test|docs|doc)\(\w+\)\:) .+$'
+          pattern: '^((feat|fix|chore|refactor|style|test|docs|doc)(\(\w+\))?\:) .+$'
          flags: "gs"
          error: 'Your first line has to contain a commit type and scope like "feat(my_feature): msg".'
          excludeDescription: "true" # optional: this excludes the description body of a pull request
--- a/.github/workflows/aws_tfhe_multi_bit_tests.yml
+++ b/.github/workflows/aws_tfhe_multi_bit_tests.yml
@@ -1,9 +1,10 @@
-name: AWS Multi Bit Tests on CPU
+name: Code Coverage

 env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -33,11 +34,12 @@ on:
        type: string

 jobs:
-  multi-bit-tests:
+  code-coverage:
    concurrency:
      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
      cancel-in-progress: true
    runs-on: ${{ inputs.runner_name }}
+    timeout-minutes: 1080
    steps:
      # Step used for log purpose.
      - name: Instance configuration used
@@ -50,7 +52,7 @@ jobs:
          echo "Fork git sha: ${{ inputs.fork_git_sha }}"

      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: ${{ inputs.fork_repo }}
          ref: ${{ inputs.fork_git_sha }}
@@ -60,31 +62,58 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: stable
-          default: true

-      - name: Gen Keys if required
-        run: |
-          make GEN_KEY_CACHE_MULTI_BIT_ONLY=TRUE gen_key_cache
+      - name: Check for file changes
+        id: changed-files
+        uses: tj-actions/changed-files@ec75ae5ab7296b81fd4cddb77294d6718932ebab
+        with:
+          files_yaml: |
+            tfhe:
+              - tfhe/src/**
+            concrete_csprng:
+              - concrete-csprng/src/**

-      - name: Run shortint multi-bit tests
+      - name: Generate Keys
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        run: |
-          make test_shortint_multi_bit_ci
+          make GEN_KEY_CACHE_COVERAGE_ONLY=TRUE gen_key_cache
+          make gen_key_cache_core_crypto

-      - name: Run integer multi-bit tests
+      - name: Run coverage for core_crypto
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        run: |
-          make test_integer_multi_bit_ci
+          make test_core_crypto_cov AVX512_SUPPORT=ON
+
+      - name: Run coverage for boolean
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
+        run: |
+          make test_boolean_cov
+
+      - name: Run coverage for shortint
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
+        run: |
+          make test_shortint_cov
+
+      - name: Upload tfhe coverage to Codecov
+        uses: codecov/codecov-action@e0b68c6749509c5f83f984dd99a76a1c1a231044
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          directory: ./coverage/
+          fail_ci_if_error: true
+          files: shortint/cobertura.xml,boolean/cobertura.xml,core_crypto/cobertura.xml,core_crypto_avx512/cobertura.xml

      - name: Slack Notification
-        if: ${{ always() }}
+        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Shortint tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Code coverage finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/core_crypto_benchmark.yml
+++ b/.github/workflows/core_crypto_benchmark.yml
@@ -1,5 +1,5 @@
-# Run PBS benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: PBS benchmarks
+# Run core crypto benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: Core crypto benchmarks

 on:
  workflow_dispatch:
@@ -19,15 +19,24 @@ on:
      request_id:
        description: "Slab request ID"
        type: string
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"

 env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"

 jobs:
-  run-pbs-benchmarks:
-    name: Execute PBS benchmarks in EC2
+  run-core-crypto-benchmarks:
+    name: Execute core crypto benchmarks in EC2
    runs-on: ${{ github.event.inputs.runner_name }}
    if: ${{ !cancelled() }}
    steps:
@@ -43,7 +52,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -53,14 +62,14 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: nightly
-          override: true

      - name: Run benchmarks with AVX512
        run: |
-          make AVX512_SUPPORT=ON bench_pbs
+          make bench_pbs
+          make bench_ks

      - name: Parse results
        run: |
@@ -78,13 +87,13 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
        with:
-          name: ${{ github.sha }}_pbs
+          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
@@ -107,7 +116,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/core_crypto_gpu_benchmark.yml
+++ b/.github/workflows/core_crypto_gpu_benchmark.yml
@@ -0,0 +1,153 @@
+# Run core crypto benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
+name: Core crypto GPU benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  run-core-crypto-benchmarks:
+    name: Execute GPU core crypto benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.2"
+            gcc: 9
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make bench_pbs_gpu
+          make bench_ks_gpu
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --backend gpu \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --name-suffix avx512 \
+          --walk-subdirs \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_core_crypto
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on downloaded artifact"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "PBS GPU benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/csprng_randomness_tests.yml
+++ b/.github/workflows/csprng_randomness_tests.yml
@@ -0,0 +1,95 @@
+name: CSPRNG randomness testing Workflow
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+  pull_request:
+    types: [ labeled ]
+
+
+jobs:
+  setup-ec2:
+    name: Setup EC2 instance (csprng-randomness-tests)
+    if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+      instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
+      aws-region: ${{ steps.start-instance.outputs.aws-region }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          profile: cpu-small
+
+  csprng-randomness-tests:
+    name: CSPRNG randomness tests
+    needs: setup-ec2
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: true
+    runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: stable
+
+      - name: Dieharder randomness test suite
+        run: |
+          make dieharder_csprng
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "concrete-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-ec2:
+    name: Teardown EC2 instance (csprng-randomness-tests)
+    if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
+    needs: [ setup-ec2, csprng-randomness-tests ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          region: ${{ needs.setup-ec2.outputs.aws-region }}
+          label: ${{ needs.setup-ec2.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "EC2 teardown (csprng-randomness-tests) failed. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/integer_benchmark.yml
+++ b/.github/workflows/integer_benchmark.yml
@@ -25,6 +25,7 @@ env:
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"

 jobs:
  run-integer-benchmarks:
@@ -44,7 +45,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -54,14 +55,13 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: nightly
-          override: true

      - name: Run benchmarks with AVX512
        run: |
-          make AVX512_SUPPORT=ON bench_integer
+          make FAST_BENCH=TRUE bench_integer

      - name: Parse benchmarks to csv
        run: |
@@ -69,7 +69,7 @@ jobs:
            parse_integer_benches

      - name: Upload csv results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
        with:
          name: ${{ github.sha }}_csv_integer
          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
@@ -90,13 +90,13 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
        with:
          name: ${{ github.sha }}_integer
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
@@ -119,7 +119,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/integer_full_benchmark.yml
+++ b/.github/workflows/integer_full_benchmark.yml
@@ -0,0 +1,155 @@
+# Run all integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: Integer full benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  prepare-matrix:
+    name: Prepare operations matrix
+    runs-on: ubuntu-latest
+    outputs:
+      op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
+    steps:
+      - name: Weekly benchmarks
+        if: ${{ github.event.inputs.user_inputs == 'weekly_benchmarks' }}
+        run: |
+          echo "OP_FLAVOR=[\"default\"]" >> ${GITHUB_ENV}
+
+      - name: Quarterly benchmarks
+        if: ${{ github.event.inputs.user_inputs == 'quarterly_benchmarks' }}
+        run: |
+          echo "OP_FLAVOR=[\"default\", \"smart\", \"unchecked\", \"misc\"]" >> ${GITHUB_ENV}
+
+      -  name: Set operation flavor output
+         id: set_op_flavor
+         run: |
+          echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> ${GITHUB_OUTPUT}
+
+  integer-benchmarks:
+    name: Execute integer benchmarks for all operations flavor
+    needs: prepare-matrix
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    continue-on-error: true
+    timeout-minutes: 1440  # 24 hours
+    strategy:
+      max-parallel: 1
+      matrix:
+        command: [ integer, integer_multi_bit]
+        op_flavor: ${{ fromJson(needs.prepare-matrix.outputs.op_flavor) }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Get benchmark details
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${{ env.COMMIT_HASH }}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${{ env.COMMIT_DATE }}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+  slack-notification:
+    name: Slack Notification
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ failure() }}
+    needs: integer-benchmarks
+    steps:
+      - name: Notify
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Integer full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_gpu_4090_full_benchmark.yml
+++ b/.github/workflows/integer_gpu_4090_full_benchmark.yml
@@ -0,0 +1,110 @@
+# Run all integer benchmarks on an RTX 4090 machine and return parsed results to Slab CI bot.
+name: TFHE Cuda Backend - 4090 Integer full benchmarks
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+  pull_request:
+    types: [labeled]
+  schedule:
+    # Weekly benchmarks will be triggered each Friday at 9p.m.
+    - cron: '0 21 * * 5'
+
+jobs:
+  cuda-integer-benchmarks:
+    name: Cuda integer benchmarks for all operations flavor  (RTX 4090)
+    if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || contains(github.event.label.name, '4090_bench') }}
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: true
+    runs-on: ["self-hosted", "4090-desktop"]
+    timeout-minutes: 1440  # 24 hours
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+      matrix:
+        command: [ integer, integer_multi_bit]
+        op_flavor: [ default, unchecked ]
+
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+
+      - name: Get benchmark details
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware "rtx4090"
+          --backend gpu \
+          --project-version "${{ env.COMMIT_HASH }}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${{ env.COMMIT_DATE }}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
+        if: ${{ github.event_name == 'pull_request' }}
+        with:
+          labels: 4090_bench
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Slack Notification
+        if: ${{ always() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Integer RTX 4090 full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/integer_gpu_benchmark.yml
+++ b/.github/workflows/integer_gpu_benchmark.yml
@@ -0,0 +1,157 @@
+# Run integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
+name: Integer GPU benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  run-integer-benchmarks:
+    name: Execute integer benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.2"
+            gcc: 9
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_gpu
+
+      - name: Parse benchmarks to csv
+        run: |
+          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
+            parse_integer_benches
+
+      - name: Upload csv results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_csv_integer
+          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --backend gpu \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_integer
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Integer GPU benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_gpu_full_benchmark.yml
+++ b/.github/workflows/integer_gpu_full_benchmark.yml
@@ -0,0 +1,162 @@
+# Run all integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
+name: Integer GPU full benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  integer-benchmarks:
+    name: Execute integer benchmarks for all operations flavor
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    continue-on-error: true
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+      matrix:
+        command: [ integer, integer_multi_bit]
+        op_flavor: [ default, unchecked ]
+        # explicit include-based build matrix, of known valid options
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.2"
+            gcc: 9
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Get benchmark details
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --backend gpu \
+          --project-version "${{ env.COMMIT_HASH }}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${{ env.COMMIT_DATE }}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+  slack-notification:
+    name: Slack Notification
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ failure() }}
+    needs: integer-benchmarks
+    steps:
+      - name: Notify
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Integer GPU full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_multi_bit_benchmark.yml
+++ b/.github/workflows/integer_multi_bit_benchmark.yml
@@ -25,6 +25,7 @@ env:
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"

 jobs:
  run-integer-benchmarks:
@@ -44,7 +45,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -54,14 +55,13 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: nightly
-          override: true

      - name: Run multi-bit benchmarks with AVX512
        run: |
-          make AVX512_SUPPORT=ON bench_integer_multi_bit
+          make FAST_BENCH=TRUE bench_integer_multi_bit

      - name: Parse benchmarks to csv
        run: |
@@ -69,7 +69,7 @@ jobs:
            parse_integer_benches

      - name: Upload csv results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
        with:
          name: ${{ github.sha }}_csv_integer
          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
@@ -90,13 +90,13 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
        with:
          name: ${{ github.sha }}_integer
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
@@ -119,7 +119,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/integer_multi_bit_gpu_benchmark.yml
+++ b/.github/workflows/integer_multi_bit_gpu_benchmark.yml
@@ -0,0 +1,158 @@
+# Run integer benchmarks with multi-bit cryptographic parameters on an AWS instance and return parsed results to Slab CI bot.
+name: Integer Multi-bit benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  run-integer-benchmarks:
+    name: Execute integer multi-bit benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "11.8"
+            cuda_arch: "70"
+            gcc: 9
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Run multi-bit benchmarks with AVX512
+        run: |
+          make FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_multi_bit_gpu
+
+      - name: Parse benchmarks to csv
+        run: |
+          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
+            parse_integer_benches
+
+      - name: Upload csv results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_csv_integer
+          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --backend gpu \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_integer
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Integer GPU benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/m1_tests.yml
+++ b/.github/workflows/m1_tests.yml
@@ -14,8 +14,8 @@ on:
 env:
  CARGO_TERM_COLOR: always
  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-  CARGO_PROFILE: release_lto_off
  FAST_TESTS: "TRUE"

 concurrency:
@@ -26,20 +26,25 @@ jobs:
  cargo-builds:
    if: ${{ (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'm1_test') }}
    runs-on: ["self-hosted", "m1mac"]
+    # 12 hours, default is 6 hours, hopefully this is more than enough
+    timeout-minutes: 720

    steps:
-      - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11

      - name: Install latest stable
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: stable
-          default: true

      - name: Run pcc checks
        run: |
          make pcc

+      - name: Build concrete-csprng
+        run: |
+          make build_concrete_csprng
+
      - name: Build Release core
        run: |
          make build_core
@@ -64,6 +69,10 @@ jobs:
        run: |
          make build_c_api

+      - name: Run concrete-csprng tests
+        run: |
+          make test_concrete_csprng
+
      - name: Run core tests
        run: |
          make test_core_crypto
@@ -103,10 +112,9 @@ jobs:
        run: |
          make test_shortint_multi_bit_ci

-      # # These multi bit integer tests are too slow on M1 with low core count and low RAM
-      # - name: Run integer multi bit tests
-      #   run: |
-      #     make test_integer_multi_bit_ci
+      - name: Run integer multi bit tests
+        run: |
+          make test_integer_multi_bit_ci

  remove_label:
    name: Remove m1_test label
@@ -124,7 +132,7 @@ jobs:
      - name: Slack Notification
        if: ${{ needs.cargo-builds.result != 'skipped' }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ needs.cargo-builds.result }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release.yml
+++ b/.github/workflows/make_release.yml
@@ -30,7 +30,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -49,7 +49,7 @@ jobs:

      - name: Publish web package
        if: ${{ inputs.push_web_package }}
-        uses: JS-DevTools/npm-publish@5a85faf05d2ade2d5b6682bfe5359915d5159c6c
+        uses: JS-DevTools/npm-publish@4b07b26a2f6e0a51846e1870223e545bae91c552
        with:
          token: ${{ secrets.NPM_TOKEN }}
          package: tfhe/pkg/package.json
@@ -65,7 +65,7 @@ jobs:

      - name: Publish Node package
        if: ${{ inputs.push_node_package }}
-        uses: JS-DevTools/npm-publish@5a85faf05d2ade2d5b6682bfe5359915d5159c6c
+        uses: JS-DevTools/npm-publish@4b07b26a2f6e0a51846e1870223e545bae91c552
        with:
          token: ${{ secrets.NPM_TOKEN }}
          package: tfhe/pkg/package.json
@@ -74,11 +74,11 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "tfhe release failed: (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_concrete_csprng.yml
+++ b/.github/workflows/make_release_concrete_csprng.yml
@@ -0,0 +1,42 @@
+# Publish new release of tfhe-rs on various platform.
+name: Publish concrete-csprng release
+
+on:
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: "Dry-run"
+        type: boolean
+        default: true
+
+env:
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  publish_release:
+    name: Publish concrete-csprng Release
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Publish crate.io package
+        env:
+          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
+        run: |
+          cargo publish -p concrete-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "concrete-csprng release failed: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -0,0 +1,51 @@
+# Perform a security check on all the cryptographic parameters set
+name: Parameters curves security check
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+
+on:
+  push:
+    branches:
+      - "main"
+  workflow_dispatch:
+
+jobs:
+  params-curves-security-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+
+      - name: Checkout lattice-estimator
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: malb/lattice-estimator
+          path: lattice_estimator
+
+      - name: Install Sage
+        run: |
+          sudo apt update
+          sudo apt install -y sagemath
+
+      - name: Collect parameters
+        run: |
+          CARGO_PROFILE=devo make write_params_to_file
+
+      - name: Perform security check
+        run: |
+          PYTHONPATH=lattice_estimator sage ci/lattice_estimator.sage
+
+      - name: Slack Notification
+        if: ${{ always() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Security check for parameters finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/placeholder_workflow.yml
+++ b/.github/workflows/placeholder_workflow.yml
@@ -0,0 +1,14 @@
+# Placeholder workflow file allowing running it without having to merge to main first
+name: Placeholder Workflow
+
+on:
+  workflow_dispatch:
+
+jobs:
+  placeholder:
+    name: Placeholder
+    runs-on: ubuntu-latest
+
+    steps:
+      - run: |
+          echo "Hello this is a Placeholder Workflow"
--- a/.github/workflows/shortint_benchmark.yml
+++ b/.github/workflows/shortint_benchmark.yml
@@ -24,6 +24,7 @@ env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"

 jobs:
  run-shortint-benchmarks:
@@ -43,7 +44,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -53,14 +54,13 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: nightly
-          override: true

      - name: Run benchmarks with AVX512
        run: |
-          make AVX512_SUPPORT=ON bench_shortint
+          make bench_shortint

      - name: Parse results
        run: |
@@ -88,13 +88,13 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
        with:
          name: ${{ github.sha }}_shortint
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
@@ -117,7 +117,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/shortint_full_benchmark.yml
+++ b/.github/workflows/shortint_full_benchmark.yml
@@ -0,0 +1,149 @@
+# Run all shortint benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: Shortint full benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  shortint-benchmarks:
+    name: Execute shortint benchmarks for all operations flavor
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    strategy:
+      max-parallel: 1
+      matrix:
+        op_flavor: [ default, smart, unchecked ]
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Get benchmark details
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_shortint
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      # This small benchmark needs to be executed only once.
+      - name: Measure key sizes
+        if: matrix.op_flavor == 'default'
+        run: |
+          make measure_shortint_key_sizes
+
+      - name: Parse key sizes results
+        if: matrix.op_flavor == 'default'
+        run: |
+          python3 ./ci/benchmark_parser.py tfhe/shortint_key_sizes.csv ${{ env.RESULTS_FILENAME }} \
+          --key-sizes \
+          --append-results
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+  slack-notification:
+    name: Slack Notification
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ failure() }}
+    needs: shortint-benchmarks
+    steps:
+      - name: Notify
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Shortint full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/signed_integer_benchmark.yml
+++ b/.github/workflows/signed_integer_benchmark.yml
@@ -0,0 +1,129 @@
+# Run signed integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: Signed Integer benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  run-integer-benchmarks:
+    name: Execute signed integer benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make FAST_BENCH=TRUE bench_signed_integer
+
+      - name: Parse benchmarks to csv
+        run: |
+          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
+            parse_integer_benches
+
+      - name: Upload csv results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_csv_integer
+          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_integer
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Signed integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/signed_integer_full_benchmark.yml
+++ b/.github/workflows/signed_integer_full_benchmark.yml
@@ -0,0 +1,133 @@
+# Run all signed integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: Signed Integer full benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  integer-benchmarks:
+    name: Execute signed integer benchmarks for all operations flavor
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    continue-on-error: true
+    timeout-minutes: 1440  # 24 hours
+    strategy:
+      max-parallel: 1
+      matrix:
+        command: [ integer, integer_multi_bit ]
+        op_flavor: [ default, unchecked ]
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Get benchmark details
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_signed_${{ matrix.command }}
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${{ env.COMMIT_HASH }}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${{ env.COMMIT_DATE }}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+  slack-notification:
+    name: Slack Notification
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ failure() }}
+    needs: integer-benchmarks
+    steps:
+      - name: Notify
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Signed integer full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/signed_integer_multi_bit_benchmark.yml
+++ b/.github/workflows/signed_integer_multi_bit_benchmark.yml
@@ -0,0 +1,129 @@
+# Run signed integer benchmarks with multi-bit cryptographic parameters on an AWS instance and return parsed results to Slab CI bot.
+name: Signed Integer Multi-bit benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  run-integer-benchmarks:
+    name: Execute signed integer multi-bit benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Run multi-bit benchmarks with AVX512
+        run: |
+          make FAST_BENCH=TRUE bench_signed_integer_multi_bit
+
+      - name: Parse benchmarks to csv
+        run: |
+          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
+            parse_integer_benches
+
+      - name: Upload csv results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_csv_integer
+          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
+        with:
+          name: ${{ github.sha }}_integer
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Signed integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/start_benchmarks.yml
+++ b/.github/workflows/start_benchmarks.yml
@@ -20,12 +20,24 @@ on:
        description: "Run integer benches"
        type: boolean
        default: true
+      signed_integer_bench:
+        description: "Run signed integer benches"
+        type: boolean
+        default: true
      integer_multi_bit_bench:
        description: "Run integer multi bit benches"
        type: boolean
        default: true
-      pbs_bench:
-        description: "Run PBS benches"
+      signed_integer_multi_bit_bench:
+        description: "Run signed integer multi bit benches"
+        type: boolean
+        default: true
+      core_crypto_bench:
+        description: "Run core crypto benches"
+        type: boolean
+        default: true
+      core_crypto_gpu_bench:
+        description: "Run core crypto benches on GPU"
        type: boolean
        default: true
      wasm_client_bench:
@@ -38,17 +50,21 @@ jobs:
    if: ${{ (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' }}
    strategy:
      matrix:
-        command: [boolean_bench, shortint_bench, integer_bench, integer_multi_bit_bench, pbs_bench, wasm_client_bench]
+        command: [ boolean_bench, shortint_bench,
+                   integer_bench, integer_multi_bit_bench,
+                   signed_integer_bench, signed_integer_multi_bit_bench,
+                   integer_gpu_bench, integer_multi_bit_gpu_bench,
+                   core_crypto_bench, core_crypto_gpu_bench, wasm_client_bench ]
    runs-on: ubuntu-latest
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@de0eba32790fb9bf87471b32855a30fc8f9d5fc6
+        uses: tj-actions/changed-files@ec75ae5ab7296b81fd4cddb77294d6718932ebab
        with:
          files_yaml: |
            common_benches:
@@ -69,23 +85,33 @@ jobs:
            integer_bench:
              - tfhe/src/shortint/**
              - tfhe/src/integer/**
-              - tfhe/benches/integer/**
+              - tfhe/benches/integer/bench.rs
              - .github/workflows/integer_benchmark.yml
            integer_multi_bit_bench:
              - tfhe/src/shortint/**
              - tfhe/src/integer/**
-              - tfhe/benches/integer/**
-              - .github/workflows/integer_benchmark.yml
-            pbs_bench:
+              - tfhe/benches/integer/bench.rs
+              - .github/workflows/integer_multi_bit_benchmark.yml
+            signed_integer_bench:
+              - tfhe/src/shortint/**
+              - tfhe/src/integer/**
+              - tfhe/benches/integer/signed_bench.rs
+              - .github/workflows/signed_integer_benchmark.yml
+            signed_integer_multi_bit_bench:
+              - tfhe/src/shortint/**
+              - tfhe/src/integer/**
+              - tfhe/benches/integer/signed_bench.rs
+              - .github/workflows/signed_integer_multi_bit_benchmark.yml
+            core_crypto_bench:
              - tfhe/src/core_crypto/**
              - tfhe/benches/core_crypto/**
-              - .github/workflows/pbs_benchmark.yml
+              - .github/workflows/core_crypto_benchmark.yml
            wasm_client_bench:
              - tfhe/web_wasm_parallel_tests/**
              - .github/workflows/wasm_client_benchmark.yml

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
--- a/.github/workflows/start_full_benchmarks.yml
+++ b/.github/workflows/start_full_benchmarks.yml
@@ -0,0 +1,66 @@
+# Start all benchmark jobs, including full shortint and integer, on Slab CI bot.
+name: Start full suite benchmarks
+
+on:
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 1a.m.
+    - cron: '0 1 * * 6'
+    # Quarterly benchmarks will be triggered right before end of quarter, the 25th of the current month at 4a.m.
+    # These benchmarks are far longer to execute hence the reason to run them only four time a year.
+    - cron: '0 4 25 MAR,JUN,SEP,DEC *'
+  workflow_dispatch:
+    inputs:
+      benchmark_type:
+        description: 'Benchmark type'
+        required: true
+        default: 'weekly'
+        type: choice
+        options:
+          - weekly
+          - quarterly
+
+jobs:
+  start-benchmarks:
+    if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' }}
+    strategy:
+      matrix:
+        command: [ boolean_bench, shortint_full_bench,
+                   integer_full_bench, signed_integer_full_bench, integer_gpu_full_bench,
+                   core_crypto_bench, core_crypto_gpu_bench, wasm_client_bench ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Set benchmarks type as weekly
+        if: (github.event_name == 'workflow_dispatch' && inputs.benchmark_type == 'weekly') || github.event.schedule == '0 1 * * 6'
+        run: |
+          echo "BENCH_TYPE=weekly_benchmarks" >> "${GITHUB_ENV}"
+
+      - name: Set benchmarks type as quarterly
+        if: (github.event_name == 'workflow_dispatch' && inputs.benchmark_type == 'quarterly') || github.event.schedule == '0 4 25 MAR,JUN,SEP,DEC *'
+        run: |
+          echo "BENCH_TYPE=quarterly_benchmarks" >> "${GITHUB_ENV}"
+
+      - name: Start AWS job in Slab
+        shell: bash
+        run: |
+          echo -n '{"command": "${{ matrix.command }}", "git_ref": "${{ github.ref }}", "sha": "${{ github.sha }}", "user_inputs": "${{ env.BENCH_TYPE }}"}' > command.json
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh command.json '${{ secrets.JOB_SECRET }}')"
+          curl -v -k \
+          --fail-with-body \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: start_aws" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @command.json \
+          ${{ secrets.SLAB_URL }}
--- a/.github/workflows/sync_on_push.yml
+++ b/.github/workflows/sync_on_push.yml
@@ -13,11 +13,11 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0
      - name: Save repo
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
        with:
          name: repo-archive
          path: '.'
--- a/.github/workflows/trigger_aws_tests_on_pr.yml
+++ b/.github/workflows/trigger_aws_tests_on_pr.yml
@@ -1,34 +0,0 @@
-# Trigger an AWS build each time commits are pushed to a pull request.
-name: PR AWS build trigger
-
-on:
-  pull_request:
-  pull_request_review:
-    types: [submitted]
-
-jobs:
-  trigger-tests:
-    runs-on: ubuntu-latest
-    permissions:
-      pull-requests: write
-    steps:
-      - name: Launch fast tests
-        if: ${{ github.event_name == 'pull_request' }}
-        uses: mshick/add-pr-comment@a65df5f64fc741e91c59b8359a4bc56e57aaf5b1
-        with:
-          allow-repeats: true
-          message: |
-            @slab-ci cpu_fast_test
-
-      - name: Launch full tests suite
-        if: ${{ github.event_name == 'pull_request_review' && github.event.review.state == 'approved' }}
-        uses: mshick/add-pr-comment@a65df5f64fc741e91c59b8359a4bc56e57aaf5b1
-        with:
-          allow-repeats: true
-          message: |
-            Pull Request has been approved :tada:
-            Launching full test suite...
-            @slab-ci cpu_test
-            @slab-ci cpu_integer_test
-            @slab-ci cpu_multi_bit_test
-            @slab-ci cpu_wasm_test
--- a/.github/workflows/wasm_client_benchmark.yml
+++ b/.github/workflows/wasm_client_benchmark.yml
@@ -19,11 +19,20 @@ on:
      request_id:
        description: "Slab request ID"
        type: string
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"

 env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"

 jobs:
  run-wasm-client-benchmarks:
@@ -43,7 +52,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -53,10 +62,9 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: nightly
-          override: true

      - name: Run benchmarks
        run: |
@@ -89,13 +97,13 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3
        with:
          name: ${{ github.sha }}_wasm
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
@@ -118,7 +126,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.gitignore
+++ b/.gitignore
@@ -3,9 +3,9 @@ target/
 .vscode/

 # Path we use for internal-keycache during tests
-./keys/
+/keys/
 # In case of symlinked keys
-./keys
+/keys

 **/Cargo.lock
 **/*.bin
@@ -13,3 +13,9 @@ target/
 # Some of our bench outputs
 /tfhe/benchmarks_parameters
 **/*.csv
+
+# dieharder run log
+dieharder_run.log
+
+# Coverage reports
+/coverage/
--- a/.linelint.yml
+++ b/.linelint.yml
@@ -0,0 +1,14 @@
+ignore:
+  - .git
+  - target
+  - tfhe/benchmarks_parameters
+  - tfhe/web_wasm_parallel_tests/node_modules
+  - tfhe/web_wasm_parallel_tests/dist
+  - keys
+  - coverage
+
+rules:
+  # checks if file ends in a newline character
+  end-of-file:
+    enable: true
+    single-new-line: true
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,13 @@
 [workspace]
 resolver = "2"
-members = ["tfhe", "tasks", "apps/trivium"]
+members = [
+    "tfhe",
+    "tasks",
+    "apps/trivium",
+    "concrete-csprng",
+    "backends/tfhe-cuda-backend",
+    "mpi_test",
+]

 [profile.bench]
 lto = "fat"
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 BSD 3-Clause Clear License

-Copyright © 2023 ZAMA.
+Copyright © 2024 ZAMA.
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,
--- a/486
+++ b/486
@@ -3,18 +3,31 @@ OS:=$(shell uname)
 RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
 CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
 TARGET_ARCH_FEATURE:=$(shell ./scripts/get_arch_feature.sh)
-RS_BUILD_TOOLCHAIN:=$(shell \
-	( (echo $(TARGET_ARCH_FEATURE) | grep -q x86) && echo stable) || echo $(RS_CHECK_TOOLCHAIN))
+RS_BUILD_TOOLCHAIN:=stable
 CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
 CARGO_PROFILE?=release
-MIN_RUST_VERSION:=$(shell grep rust-version tfhe/Cargo.toml | cut -d '=' -f 2 | xargs)
+MIN_RUST_VERSION:=$(shell grep '^rust-version[[:space:]]*=' tfhe/Cargo.toml | cut -d '=' -f 2 | xargs)
 AVX512_SUPPORT?=OFF
 WASM_RUSTFLAGS:=
 BIG_TESTS_INSTANCE?=FALSE
 GEN_KEY_CACHE_MULTI_BIT_ONLY?=FALSE
+GEN_KEY_CACHE_COVERAGE_ONLY?=FALSE
 PARSE_INTEGER_BENCH_CSV_FILE?=tfhe_rs_integer_benches.csv
 FAST_TESTS?=FALSE
+FAST_BENCH?=FALSE
 BENCH_OP_FLAVOR?=DEFAULT
+NODE_VERSION=20
+FORWARD_COMPAT?=OFF
+# sed: -n, do not print input stream, -e means a script/expression
+# 1,/version/ indicates from the first line, to the line matching version at the start of the line
+# p indicates to print, so we keep only the start of the Cargo.toml until we hit the first version
+# entry which should be the version of tfhe
+TFHE_CURRENT_VERSION:=\
+$(shell sed -n -e '1,/^version/p' tfhe/Cargo.toml | \
+grep '^version[[:space:]]*=' | cut -d '=' -f 2 | xargs)
+# Cargo has a hard time distinguishing between our package from the workspace and a package that
+# could be a dependency, so we build an unambiguous spec here
+TFHE_SPEC:=tfhe@$(TFHE_CURRENT_VERSION)
 # This is done to avoid forgetting it, we still precise the RUSTFLAGS in the commands to be able to
 # copy paste the command in the terminal and change them if required without forgetting the flags
 export RUSTFLAGS?=-C target-cpu=native
@@ -31,10 +44,42 @@ else
 		MULTI_BIT_ONLY=
 endif

+ifeq ($(GEN_KEY_CACHE_COVERAGE_ONLY),TRUE)
+		COVERAGE_ONLY=--coverage-only
+else
+		COVERAGE_ONLY=
+endif
+
+ifeq ($(FORWARD_COMPAT),ON)
+		FORWARD_COMPAT_FEATURE=forward_compatibility
+else
+		FORWARD_COMPAT_FEATURE=
+endif
+
 # Variables used only for regex_engine example
 REGEX_STRING?=''
 REGEX_PATTERN?=''

+# tfhe-cuda-backend
+TFHECUDA_SRC="backends/tfhe-cuda-backend/cuda"
+TFHECUDA_BUILD=$(TFHECUDA_SRC)/build
+
+# Exclude these files from coverage reports
+define COVERAGE_EXCLUDED_FILES
+--exclude-files apps/trivium/src/trivium/* \
+--exclude-files apps/trivium/src/kreyvium/* \
+--exclude-files apps/trivium/src/static_deque/* \
+--exclude-files apps/trivium/src/trans_ciphering/* \
+--exclude-files tasks/src/* \
+--exclude-files tfhe/benches/boolean/* \
+--exclude-files tfhe/benches/core_crypto/* \
+--exclude-files tfhe/benches/shortint/* \
+--exclude-files tfhe/benches/integer/* \
+--exclude-files tfhe/benches/* \
+--exclude-files tfhe/examples/regex_engine/* \
+--exclude-files tfhe/examples/utilities/*
+endef
+
 .PHONY: rs_check_toolchain # Echo the rust toolchain used for checks
 rs_check_toolchain:
 	@echo $(RS_CHECK_TOOLCHAIN)
@@ -76,136 +121,212 @@ install_wasm_pack: install_rs_build_toolchain
 install_node:
 	curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | $(SHELL)
 	source ~/.bashrc
-	$(SHELL) -i -c 'nvm install node' || \
+	$(SHELL) -i -c 'nvm install $(NODE_VERSION)' || \
 	( echo "Unable to install node, unknown error." && exit 1 )

+.PHONY: install_dieharder # Install dieharder for apt distributions or macOS
+install_dieharder:
+	@dieharder -h > /dev/null 2>&1 || \
+	if [[ "$(OS)" == "Linux" ]]; then \
+		sudo apt update && sudo apt install -y dieharder; \
+	elif [[ "$(OS)" == "Darwin" ]]; then\
+		brew install dieharder; \
+	fi || ( echo "Unable to install dieharder, unknown error." && exit 1 )
+
+.PHONY: install_tarpaulin # Install tarpaulin to perform code coverage
+install_tarpaulin: install_rs_build_toolchain
+	@cargo tarpaulin --version > /dev/null 2>&1 || \
+	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cargo-tarpaulin --locked || \
+	( echo "Unable to install cargo tarpaulin, unknown error." && exit 1 )
+
+.PHONY: check_linelint_installed # Check if linelint newline linter is installed
+check_linelint_installed:
+	@printf "\n" | linelint - > /dev/null 2>&1 || \
+	( echo "Unable to locate linelint. Try installing it: https://github.com/fernandrone/linelint/releases" && exit 1 )
+
 .PHONY: fmt # Format rust code
 fmt: install_rs_check_toolchain
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt

-.PHONT: check_fmt # Check rust code format
+.PHONY: fmt_gpu # Format rust and cuda code
+fmt_gpu: install_rs_check_toolchain
+	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt
+	cd "$(TFHECUDA_SRC)" && ./format_tfhe_cuda_backend.sh
+
+.PHONY: check_fmt # Check rust code format
 check_fmt: install_rs_check_toolchain
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt --check

+.PHONY: check_fmt_gpu # Check rust and cuda code format
+check_fmt_gpu: install_rs_check_toolchain
+	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt --check
+	cd "$(TFHECUDA_SRC)" && ./format_tfhe_cuda_backend.sh -c
+
+.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
+clippy_gpu: install_rs_check_toolchain clippy_cuda_backend
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
+		--all-targets \
+		-p $(TFHE_SPEC) -- --no-deps -D warnings
+
+.PHONY: fix_newline # Fix newline at end of file issues to be UNIX compliant
+fix_newline: check_linelint_installed
+	linelint -a .
+
+.PHONY: check_newline # Check for newline at end of file to be UNIX compliant
+check_newline: check_linelint_installed
+	linelint .
+
 .PHONY: clippy_core # Run clippy lints on core_crypto with and without experimental features
 clippy_core: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE) \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE),experimental \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
+		--features=$(TARGET_ARCH_FEATURE),nightly-avx512 \
+		-p $(TFHE_SPEC) -- --no-deps -D warnings
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
+		--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 \
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_boolean # Run clippy lints enabling the boolean features
 clippy_boolean: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE),boolean \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_shortint # Run clippy lints enabling the shortint features
 clippy_shortint: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE),shortint \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_integer # Run clippy lints enabling the integer features
 clippy_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE),integer \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy # Run clippy lints enabling the boolean, shortint, integer
 clippy: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
 clippy_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
 clippy_js_wasm_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_tasks # Run clippy lints on helper tasks crate.
 clippy_tasks:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		-p tasks -- --no-deps -D warnings

+.PHONY: clippy_trivium # Run clippy lints on Trivium app
+clippy_trivium: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
+		-p tfhe-trivium -- --no-deps -D warnings
+
 .PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
 clippy_all_targets:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings
+
+.PHONY: clippy_concrete_csprng # Run clippy lints on concrete-csprng
+clippy_concrete_csprng:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
+		--features=$(TARGET_ARCH_FEATURE) \
+		-p concrete-csprng -- --no-deps -D warnings

 .PHONY: clippy_all # Run all clippy targets
 clippy_all: clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets clippy_c_api \
-clippy_js_wasm_api clippy_tasks clippy_core
+clippy_js_wasm_api clippy_tasks clippy_core clippy_concrete_csprng clippy_trivium

 .PHONY: clippy_fast # Run main clippy targets
-clippy_fast: clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core
+clippy_fast: clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core \
+clippy_concrete_csprng

-.PHONY: gen_key_cache # Run the script to generate keys and cache them for shortint tests
-gen_key_cache: install_rs_build_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
-		--example generates_test_keys \
-		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe -- \
-		$(MULTI_BIT_ONLY)
+.PHONY: clippy_cuda_backend # Run clippy lints on the tfhe-cuda-backend
+clippy_cuda_backend: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
+		-p tfhe-cuda-backend -- --no-deps -D warnings

 .PHONY: build_core # Build core_crypto without experimental features
 build_core: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE) -p tfhe
+		--features=$(TARGET_ARCH_FEATURE) -p $(TFHE_SPEC)
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-			--features=$(TARGET_ARCH_FEATURE),$(AVX512_FEATURE) -p tfhe; \
+			--features=$(TARGET_ARCH_FEATURE),$(AVX512_FEATURE) -p $(TFHE_SPEC); \
 	fi

 .PHONY: build_core_experimental # Build core_crypto with experimental features
 build_core_experimental: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),experimental -p tfhe
+		--features=$(TARGET_ARCH_FEATURE),experimental -p $(TFHE_SPEC)
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-			--features=$(TARGET_ARCH_FEATURE),experimental,$(AVX512_FEATURE) -p tfhe; \
+			--features=$(TARGET_ARCH_FEATURE),experimental,$(AVX512_FEATURE) -p $(TFHE_SPEC); \
 	fi

 .PHONY: build_boolean # Build with boolean enabled
 build_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean -p tfhe --all-targets
+		--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) --all-targets

 .PHONY: build_shortint # Build with shortint enabled
 build_shortint: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),shortint -p tfhe --all-targets
+		--features=$(TARGET_ARCH_FEATURE),shortint -p $(TFHE_SPEC) --all-targets

 .PHONY: build_integer # Build with integer enabled
 build_integer: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer -p tfhe --all-targets
+		--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) --all-targets

 .PHONY: build_tfhe_full # Build with boolean, shortint and integer enabled
 build_tfhe_full: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p tfhe --all-targets
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
+
+.PHONY: symlink_c_libs_without_fingerprint # Link the .a and .so files without the changing hash part in target
+symlink_c_libs_without_fingerprint:
+	@./scripts/symlink_c_libs_without_fingerprint.sh \
+		--cargo-profile "$(CARGO_PROFILE)" \
+		--lib-name tfhe-c-api-dynamic-buffer

 .PHONY: build_c_api # Build the C API for boolean, shortint and integer
 build_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
-		-p tfhe
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,$(FORWARD_COMPAT_FEATURE) \
+		-p $(TFHE_SPEC)
+	@"$(MAKE)" symlink_c_libs_without_fingerprint
+
+.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
+build_c_api_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,gpu \
+		-p $(TFHE_SPEC)
+	@"$(MAKE)" symlink_c_libs_without_fingerprint

 .PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
 build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,experimental-force_fft_algo_dif4 \
-		-p tfhe
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,experimental-force_fft_algo_dif4,$(FORWARD_COMPAT_FEATURE) \
+		-p $(TFHE_SPEC)
+	@"$(MAKE)" symlink_c_libs_without_fingerprint

 .PHONY: build_web_js_api # Build the js API targeting the web browser
 build_web_js_api: install_rs_build_toolchain install_wasm_pack
@@ -230,25 +351,70 @@ build_node_js_api: install_rs_build_toolchain install_wasm_pack
 		wasm-pack build --release --target=nodejs \
 		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api

+.PHONY: build_concrete_csprng # Build concrete_csprng
+build_concrete_csprng: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-csprng --all-targets
+
 .PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
 test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),experimental -p tfhe -- core_crypto::
+		--features=$(TARGET_ARCH_FEATURE),experimental -p $(TFHE_SPEC) -- core_crypto::
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-			--features=$(TARGET_ARCH_FEATURE),experimental,$(AVX512_FEATURE) -p tfhe -- core_crypto::; \
+			--features=$(TARGET_ARCH_FEATURE),experimental,$(AVX512_FEATURE) -p $(TFHE_SPEC) -- core_crypto::; \
 	fi

+.PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
+test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain install_tarpaulin
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
+		--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
+		--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
+		--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,__coverage \
+		-p $(TFHE_SPEC) -- core_crypto::
+	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
+		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
+			--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
+			--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
+			--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,__coverage,$(AVX512_FEATURE) \
+			-p $(TFHE_SPEC) -- core_crypto::; \
+	fi
+
+.PHONY: test_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
+test_gpu: test_core_crypto_gpu test_integer_gpu
+
+.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
+test_core_crypto_gpu: install_rs_build_toolchain install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
+
+.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
+test_integer_gpu: install_rs_build_toolchain install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
+
 .PHONY: test_boolean # Run the tests of the boolean module
 test_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean -p tfhe -- boolean::
+		--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) -- boolean::
+
+.PHONY: test_boolean_cov # Run the tests of the boolean module with code coverage
+test_boolean_cov: install_rs_check_toolchain install_tarpaulin
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
+		--out xml --output-dir coverage/boolean --line --engine llvm --timeout 500 \
+		$(COVERAGE_EXCLUDED_FILES) \
+		--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,__coverage \
+		-p $(TFHE_SPEC) -- boolean::

 .PHONY: test_c_api_rs # Run the rust tests for the C API
 test_c_api_rs: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
 		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
-		-p tfhe \
+		-p $(TFHE_SPEC) \
 		c_api

 .PHONY: test_c_api_c # Run the C tests for the C API
@@ -258,56 +424,119 @@ test_c_api_c: build_c_api
 .PHONY: test_c_api # Run all the tests for the C API
 test_c_api: test_c_api_rs test_c_api_c

+.PHONY: test_c_api_gpu # Run the C tests for the C API
+test_c_api_gpu: build_c_api_gpu
+	./scripts/c_api_tests.sh --gpu
+
 .PHONY: test_shortint_ci # Run the tests for shortint ci
 test_shortint_ci: install_rs_build_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)"
+		--cargo-profile "$(CARGO_PROFILE)" --tfhe-package "$(TFHE_SPEC)"

 .PHONY: test_shortint_multi_bit_ci # Run the tests for shortint ci running only multibit tests
 test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
 	FAST_TESTS="$(FAST_TESTS)" \
 		./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)" --multi-bit
+		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --tfhe-package "$(TFHE_SPEC)"

 .PHONY: test_shortint # Run all the tests for shortint
 test_shortint: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe -- shortint::
+		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
+
+.PHONY: test_shortint_cov # Run the tests of the shortint module with code coverage
+test_shortint_cov: install_rs_check_toolchain install_tarpaulin
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
+		--out xml --output-dir coverage/shortint --line --engine llvm --timeout 500 \
+		$(COVERAGE_EXCLUDED_FILES) \
+		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,__coverage \
+		-p $(TFHE_SPEC) -- shortint::

 .PHONY: test_integer_ci # Run the tests for integer ci
-test_integer_ci: install_rs_build_toolchain install_cargo_nextest
+test_integer_ci: install_rs_check_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
 	FAST_TESTS="$(FAST_TESTS)" \
-		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)"
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
+		--tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_unsigned_integer_ci # Run the tests for unsigned integer ci
+test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
+		--unsigned-only --tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_signed_integer_ci # Run the tests for signed integer ci
+test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
+		--signed-only --tfhe-package "$(TFHE_SPEC)"

 .PHONY: test_integer_multi_bit_ci # Run the tests for integer ci running only multibit tests
-test_integer_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
+test_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
 	FAST_TESTS="$(FAST_TESTS)" \
-		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-		--cargo-profile "$(CARGO_PROFILE)" --multi-bit
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
+		--tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_unsigned_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
+test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
+		--unsigned-only --tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_signed_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
+test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
+		--signed-only --tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_safe_deserialization # Run the tests for safe deserialization
+test_safe_deserialization: install_rs_build_toolchain install_cargo_nextest
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_deserialization::

 .PHONY: test_integer # Run all the tests for integer
 test_integer: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache -p tfhe -- integer::
+		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache -p $(TFHE_SPEC) -- integer::

 .PHONY: test_high_level_api # Run all the tests for high_level_api
 test_high_level_api: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p tfhe \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) \
 		-- high_level_api::

 .PHONY: test_user_doc # Run tests from the .md documentation
 test_user_doc: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p tfhe \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) \
 		-- test_user_docs::

+.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
+test_user_doc_gpu: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu -p $(TFHE_SPEC) \
+		-- test_user_docs::
+
+.PHONY: test_fhe_strings # Run tests for fhe_strings example
+test_fhe_strings: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--example fhe_strings \
+		--features=$(TARGET_ARCH_FEATURE),integer
+
 .PHONY: test_regex_engine # Run tests for regex_engine example
 test_regex_engine: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
@@ -326,24 +555,36 @@ test_examples: test_sha256_bool test_regex_engine
 .PHONY: test_trivium # Run tests for trivium
 test_trivium: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		trivium --features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
-		-- --test-threads=1
+		-p tfhe-trivium -- --test-threads=1 trivium::

 .PHONY: test_kreyvium # Run tests for kreyvium
 test_kreyvium: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		kreyvium --features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
-		-- --test-threads=1
+		-p tfhe-trivium -- --test-threads=1 kreyvium::
+
+.PHONY: test_concrete_csprng # Run concrete-csprng tests
+test_concrete_csprng:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-csprng

 .PHONY: doc # Build rust doc
 doc: install_rs_check_toolchain
-	RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
+	RUSTDOCFLAGS="--html-in-header katex-header.html" \
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer --no-deps
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer --no-deps -p $(TFHE_SPEC)

 .PHONY: docs # Build rust doc alias for doc
 docs: doc

+.PHONY: lint_doc # Build rust doc with linting enabled
+lint_doc: install_rs_check_toolchain
+	RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
+	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --no-deps
+
+.PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
+lint_docs: lint_doc
+
 .PHONY: format_doc_latex # Format the documentation latex equations to avoid broken rendering.
 format_doc_latex:
 	cargo xtask format_latex_doc
@@ -357,17 +598,17 @@ format_doc_latex:
 check_compile_tests:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
 		--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache \
-		-p tfhe
+		-p $(TFHE_SPEC)

 	@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
-		"$(MAKE)" build_c_api; \
+		"$(MAKE)" build_c_api && \
 		./scripts/c_api_tests.sh --build-only; \
 	fi

 .PHONY: build_nodejs_test_docker # Build a docker image with tools to run nodejs tests for wasm API
 build_nodejs_test_docker:
 	DOCKER_BUILDKIT=1 docker build --build-arg RUST_TOOLCHAIN="$(RS_BUILD_TOOLCHAIN)" \
-		-f docker/Dockerfile.wasm_tests -t tfhe-wasm-tests .
+		-f docker/Dockerfile.wasm_tests --build-arg NODE_VERSION=$(NODE_VERSION) -t tfhe-wasm-tests .

 .PHONY: test_nodejs_wasm_api_in_docker # Run tests for the nodejs on wasm API in a docker container
 test_nodejs_wasm_api_in_docker: build_nodejs_test_docker
@@ -391,7 +632,8 @@ test_web_js_api_parallel: build_web_js_api_parallel
 .PHONY: ci_test_web_js_api_parallel # Run tests for the web wasm api
 ci_test_web_js_api_parallel: build_web_js_api_parallel
 	source ~/.nvm/nvm.sh && \
-	nvm use node && \
+	nvm install $(NODE_VERSION) && \
+	nvm use $(NODE_VERSION) && \
 	$(MAKE) -C tfhe/web_wasm_parallel_tests test-ci

 .PHONY: no_tfhe_typo # Check we did not invert the h and f in tfhe
@@ -402,42 +644,117 @@ no_tfhe_typo:
 no_dbg_log:
 	@./scripts/no_dbg_calls.sh

+.PHONY: dieharder_csprng # Run the dieharder test suite on our CSPRNG implementation
+dieharder_csprng: install_dieharder build_concrete_csprng
+	./scripts/dieharder_test.sh
+
 #
 # Benchmarks
 #

-.PHONY: bench_integer # Run benchmarks for integer
+.PHONY: bench_integer # Run benchmarks for unsigned integer
 bench_integer: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p tfhe --
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

-.PHONY: bench_integer_multi_bit # Run benchmarks for integer using multi-bit parameters
-bench_integer_multi_bit: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
+.PHONY: bench_signed_integer # Run benchmarks for signed integer
+bench_signed_integer: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-signed-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+
+.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
+bench_integer_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p tfhe --
+	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+
+.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
+bench_integer_multi_bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+
+.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
+bench_signed_integer_multi_bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-signed-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+
+.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
+bench_integer_multi_bit_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --

 .PHONY: bench_shortint # Run benchmarks for shortint
 bench_shortint: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench shortint-bench \
-	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,$(AVX512_FEATURE) -p tfhe
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+
+.PHONY: bench_oprf # Run benchmarks for shortint
+bench_oprf: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench oprf-shortint-bench \
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+	RUSTFLAGS="$(RUSTFLAGS)" \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench oprf-integer-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+
+
+
+.PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
+bench_shortint_multi_bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench shortint-bench \
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
+

 .PHONY: bench_boolean # Run benchmarks for boolean
 bench_boolean: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench boolean-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,$(AVX512_FEATURE) -p tfhe
+	--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_pbs # Run benchmarks for PBS
 bench_pbs: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench pbs-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,$(AVX512_FEATURE) -p tfhe
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+
+.PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
+bench_pbs_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench pbs-bench \
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+
+.PHONY: bench_ks # Run benchmarks for keyswitch
+bench_ks: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench ks-bench \
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
+
+.PHONY: bench_ks_gpu # Run benchmarks for PBS on GPU backend
+bench_ks_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench ks-bench \
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)

 .PHONY: bench_web_js_api_parallel # Run benchmarks for the web wasm api
 bench_web_js_api_parallel: build_web_js_api_parallel
@@ -452,6 +769,18 @@ ci_bench_web_js_api_parallel: build_web_js_api_parallel
 #
 # Utility tools
 #
+.PHONY: gen_key_cache # Run the script to generate keys and cache them for shortint tests
+gen_key_cache: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+		--example generates_test_keys \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache -- \
+		$(MULTI_BIT_ONLY) $(COVERAGE_ONLY)
+
+.PHONY: gen_key_cache_core_crypto # Run function to generate keys and cache them for core_crypto tests
+gen_key_cache_core_crypto: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --tests --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
+		core_crypto::keycache::generate_keys

 .PHONY: measure_hlapi_compact_pk_ct_sizes # Measure sizes of public keys and ciphertext for high-level API
 measure_hlapi_compact_pk_ct_sizes: install_rs_check_toolchain
@@ -484,6 +813,12 @@ parse_wasm_benchmarks: install_rs_check_toolchain
 	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
 	-- web_wasm_parallel_tests/test/benchmark_results

+.PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
+write_params_to_file: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+	--example write_params_to_file \
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache
+
 #
 # Real use case examples
 #
@@ -508,14 +843,17 @@ sha256_bool: install_rs_check_toolchain
 	--example sha256_bool \
 	--features=$(TARGET_ARCH_FEATURE),boolean

-.PHONY: pcc # pcc stands for pre commit checks
-pcc: no_tfhe_typo no_dbg_log check_fmt doc clippy_all check_compile_tests
+.PHONY: pcc # pcc stands for pre commit checks (except GPU)
+pcc: no_tfhe_typo no_dbg_log check_fmt lint_doc clippy_all check_compile_tests
+
+.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
+pcc_gpu: pcc clippy_gpu

 .PHONY: fpcc # pcc stands for pre commit checks, the f stands for fast
-fpcc: no_tfhe_typo no_dbg_log check_fmt doc clippy_fast check_compile_tests
+fpcc: no_tfhe_typo no_dbg_log check_fmt lint_doc clippy_fast check_compile_tests

 .PHONY: conformance # Automatically fix problems that can be fixed
-conformance: fmt
+conformance: fix_newline fmt

 .PHONY: help # Generate list of targets with descriptions
 help:
--- a/README.md
+++ b/README.md
@@ -2,36 +2,68 @@
 <!-- product name logo -->
  <img width=600 src="https://user-images.githubusercontent.com/5758427/231206749-8f146b97-3c5a-4201-8388-3ffa88580415.png">
 </p>
-<hr/>
-<p align="center">
-  <a href="https://docs.zama.ai/tfhe-rs"> 📒 Read documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a>
-</p>
-<p align="center">
-<!-- Version badge using shields.io -->
-  <a href="https://github.com/zama-ai/tfhe-rs/releases">
-    <img src="https://img.shields.io/github/v/release/zama-ai/tfhe-rs?style=flat-square">
-  </a>
-<!-- Zama Bounty Program -->
-  <a href="https://github.com/zama-ai/bounty-program">
-    <img src="https://img.shields.io/badge/Contribute-Zama%20Bounty%20Program-yellow?style=flat-square">
-  </a>
-</p>
+
 <hr/>

+<p align="center">
+  <a href="https://docs.zama.ai/tfhe-rs"> 📒 Documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a> | <a href="https://github.com/zama-ai/awesome-zama"> 📚 FHE resources by Zama</a>
+</p>

-**TFHE-rs** is a pure Rust implementation of TFHE for boolean and integer
-arithmetics over encrypted data. It includes:
- - a **Rust** API
- - a **C** API
- - and a **client-side WASM** API

-**TFHE-rs** is meant for developers and researchers who want full control over
-what they can do with TFHE, while not having to worry about the low level
+<p align="center">
+  <a href="https://github.com/zama-ai/tfhe-rs/releases"><img src="https://img.shields.io/github/v/release/zama-ai/tfhe-rs?style=flat-square"></a>
+  <a href="LICENSE"><img src="https://img.shields.io/badge/License-BSD--3--Clause--Clear-%23ffb243?style=flat-square"></a>
+  <a href="https://github.com/zama-ai/bounty-program"><img src="https://img.shields.io/badge/Contribute-Zama%20Bounty%20Program-%23ffd208?style=flat-square"></a>
+</p>
+
+## About
+
+### What is TFHE-rs
+
+**TFHE-rs** is a pure Rust implementation of TFHE for boolean and integer arithmetics over encrypted data.
+
+It includes:
+- a **Rust** API
+- a **C** API
+- and a **client-side WASM** API
+
+TFHE-rs is designed for developers and researchers who want full control over
+what they can do with TFHE, while not having to worry about the low-level
 implementation. The goal is to have a stable, simple, high-performance, and
 production-ready library for all the advanced features of TFHE.
+<br></br>
+
+### Main features
+
+- **Low-level cryptographic library** that implements Zama’s variant of TFHE, including programmable bootstrapping
+- **Implementation of the original TFHE boolean API** that can be used as a drop-in replacement for other TFHE libraries
+- **Short integer API** that enables exact, unbounded FHE integer arithmetics with up to 8 bits of message space
+- **Size-efficient public key encryption**
+- **Ciphertext and server key compression** for efficient data transfer
+- **Full Rust API, C bindings to the Rust High-Level API, and client-side Javascript API using WASM**.
+
+*Learn more about TFHE-rs features in the [documentation](https://docs.zama.ai/tfhe-rs/readme).*
+<br></br>
+
+## Table of Contents
+- **[Getting Started](#getting-started)**
+   - [Cargo.toml configuration](#cargotoml-configuration)
+   - [A simple example](#a-simple-example)
+- **[Resources](#resources)**
+   - [TFHE deep dive](#tfhe-deep-dive)
+   - [Tutorials](#tutorials)
+   - [Documentation](#documentation)
+- **[Working with TFHE-rs](#working-with-tfhe-rs)**
+   - [Disclaimers](#disclaimers)
+   - [Citations](#citations)
+   - [Contributing](#contributing)
+   - [License](#license)
+- **[Support](#support)**
+<br></br>

 ## Getting Started

+### Cargo.toml configuration
 To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:

 + For x86_64-based machines running Unix-like OSes:
@@ -45,130 +77,138 @@ tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64-un
 ```toml
 tfhe = { version = "*", features = ["boolean", "shortint", "integer", "aarch64-unix"] }
 ```
-Note: users with ARM devices must use `TFHE-rs` by compiling using the `nightly` toolchain.

-
-+ For x86_64-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) 
-running Windows:
+ For x86_64-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) running Windows:

 ```toml
 tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64"] }
 ```

-Note: aarch64-based machines are not yet supported for Windows as it's currently missing an entropy source to be able to seed the [CSPRNGs](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) used in TFHE-rs
+> [!Note]
+> Note: You need to use a Rust version >= 1.72 to compile TFHE-rs.

-Note that when running code that uses `tfhe-rs`, it is highly recommended
-to run in release mode with cargo's `--release` flag to have the best performances possible,
-eg: `cargo run --release`.
+> [!Note]
+> Note: aarch64-based machines are not yet supported for Windows as it's currently missing an entropy source to be able to seed the [CSPRNGs](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) used in TFHE-rs.

-Here is a full example evaluating a Boolean circuit:
+<p align="right">
+  <a href="#about" > ↑ Back to top </a> 
+</p>

-```rust
-use tfhe::boolean::prelude::*;
+### A simple example

-fn main() {
-    // We generate a set of client/server keys, using the default parameters:
-    let (client_key, server_key) = gen_keys();
+Here is a full example:

-    // We use the client secret key to encrypt two messages:
-    let ct_1 = client_key.encrypt(true);
-    let ct_2 = client_key.encrypt(false);
+``` rust
+use tfhe::prelude::*;
+use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint32, FheUint8};

-    // We use the server public key to execute a boolean circuit:
-    // if ((NOT ct_2) NAND (ct_1 AND ct_2)) then (NOT ct_2) else (ct_1 AND ct_2)
-    let ct_3 = server_key.not(&ct_2);
-    let ct_4 = server_key.and(&ct_1, &ct_2);
-    let ct_5 = server_key.nand(&ct_3, &ct_4);
-    let ct_6 = server_key.mux(&ct_5, &ct_3, &ct_4);
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Basic configuration to use homomorphic integers
+    let config = ConfigBuilder::default().build();

-    // We use the client key to decrypt the output of the circuit:
-    let output = client_key.decrypt(&ct_6);
-    assert_eq!(output, true);
+    // Key generation
+    let (client_key, server_keys) = generate_keys(config);
+
+    let clear_a = 1344u32;
+    let clear_b = 5u32;
+    let clear_c = 7u8;
+
+    // Encrypting the input data using the (private) client_key
+    // FheUint32: Encrypted equivalent to u32
+    let mut encrypted_a = FheUint32::try_encrypt(clear_a, &client_key)?;
+    let encrypted_b = FheUint32::try_encrypt(clear_b, &client_key)?;
+
+    // FheUint8: Encrypted equivalent to u8
+    let encrypted_c = FheUint8::try_encrypt(clear_c, &client_key)?;
+
+    // On the server side:
+    set_server_key(server_keys);
+
+    // Clear equivalent computations: 1344 * 5 = 6720
+    let encrypted_res_mul = &encrypted_a * &encrypted_b;
+
+    // Clear equivalent computations: 1344 >> 5 = 42
+    encrypted_a = &encrypted_res_mul >> &encrypted_b;
+
+    // Clear equivalent computations: let casted_a = a as u8;
+    let casted_a: FheUint8 = encrypted_a.cast_into();
+
+    // Clear equivalent computations: min(42, 7) = 7
+    let encrypted_res_min = &casted_a.min(&encrypted_c);
+
+    // Operation between clear and encrypted data:
+    // Clear equivalent computations: 7 & 1 = 1
+    let encrypted_res = encrypted_res_min & 1_u8;
+
+    // Decrypting on the client side:
+    let clear_res: u8 = encrypted_res.decrypt(&client_key);
+    assert_eq!(clear_res, 1_u8);
+
+    Ok(())
 }
 ```

-Another example of how the library can be used with shortints:
+To run this code, use the following command: 
+<p align="center"> <code> cargo run --release </code> </p>

-```rust
-use tfhe::shortint::prelude::*;
+> [!Note]
+> Note that when running code that uses `TFHE-rs`, it is highly recommended
+to run in release mode with cargo's `--release` flag to have the best performances possible.

-fn main() {
-    // Generate a set of client/server keys
-    // with 2 bits of message and 2 bits of carry
-    let (client_key, server_key) = gen_keys(PARAM_MESSAGE_2_CARRY_2_KS_PBS);
+*Find an example with more explanations in [this part of the documentation](https://docs.zama.ai/tfhe-rs/getting-started/quick_start)*

-    let msg1 = 3;
-    let msg2 = 2;
+<p align="right">
+  <a href="#about" > ↑ Back to top </a> 
+</p>

-    // Encrypt two messages using the (private) client key:
-    let ct_1 = client_key.encrypt(msg1);
-    let ct_2 = client_key.encrypt(msg2);

-    // Homomorphically compute an addition
-    let ct_add = server_key.unchecked_add(&ct_1, &ct_2);

-    // Define the Hamming weight function
-    // f: x -> sum of the bits of x
-    let f = |x:u64| x.count_ones() as u64;
+## Resources 

-    // Generate the lookup table for the function
-    let acc = server_key.generate_lookup_table(f);
+### TFHE deep dive
+- [TFHE Deep Dive - Part I - Ciphertext types](https://www.zama.ai/post/tfhe-deep-dive-part-1)
+- [TFHE Deep Dive - Part II - Encodings and linear leveled operations](https://www.zama.ai/post/tfhe-deep-dive-part-2)
+- [TFHE Deep Dive - Part III - Key switching and leveled multiplications](https://www.zama.ai/post/tfhe-deep-dive-part-3)
+- [TFHE Deep Dive - Part IV - Programmable Bootstrapping](https://www.zama.ai/post/tfhe-deep-dive-part-4)
+<br></br>

-    // Compute the function over the ciphertext using the PBS
-    let ct_res = server_key.apply_lookup_table(&ct_add, &acc);
+### Tutorials
+- [Homomorphic Parity Bit](https://docs.zama.ai/tfhe-rs/tutorials/parity_bit)
+- [Homomorphic Case Changing on Ascii String](https://docs.zama.ai/tfhe-rs/tutorials/ascii_fhe_string)
+- [Boolean SHA256 with TFHE-rs](https://www.zama.ai/post/boolean-sha256-tfhe-rs)
+- [Dark Market with TFHE-rs](https://www.zama.ai/post/dark-market-tfhe-rs)
+- [Regular Expression Engine with TFHE-rs](https://www.zama.ai/post/regex-engine-tfhe-rs)

-    // Decrypt the ciphertext using the (private) client key
-    let output = client_key.decrypt(&ct_res);
-    assert_eq!(output, f(msg1 + msg2));
-}
-```

-An example using integer:
+*Explore more useful resources in [TFHE-rs tutorials](https://docs.zama.ai/tfhe-rs/tutorials) and [Awesome Zama repo](https://github.com/zama-ai/awesome-zama)*
+<br></br>
+### Documentation

-```rust
-use tfhe::integer::gen_keys_radix;
-use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS;
+Full, comprehensive documentation is available here: [https://docs.zama.ai/tfhe-rs](https://docs.zama.ai/tfhe-rs).
+<p align="right">
+  <a href="#about" > ↑ Back to top </a> 
+</p>

-fn main() {
-    // We create keys to create 16 bits integers
-    // using 8 blocks of 2 bits
-    let (cks, sks) = gen_keys_radix(PARAM_MESSAGE_2_CARRY_2_KS_PBS, 8);

-    let clear_a = 2382u16;
-    let clear_b = 29374u16;
+## Working with TFHE-rs

-    let mut a = cks.encrypt(clear_a as u64);
-    let mut b = cks.encrypt(clear_b as u64);
+### Disclaimers

-    let encrypted_max = sks.smart_max_parallelized(&mut a, &mut b);
-    let decrypted_max: u64 = cks.decrypt(&encrypted_max);
+#### Security Estimation

-    assert_eq!(decrypted_max as u16, clear_a.max(clear_b))
-}
-```
+Security estimations are done using the
+[Lattice Estimator](https://github.com/malb/lattice-estimator)
+with `red_cost_model = reduction.RC.BDGL16`.

-## Contributing
+When a new update is published in the Lattice Estimator, we update parameters accordingly.

-There are two ways to contribute to TFHE-rs:
+#### Side-Channel Attacks

- you can open issues to report bugs or typos, or to suggest new ideas
- you can ask to become an official contributor by emailing [hello@zama.ai](mailto:hello@zama.ai).
-(becoming an approved contributor involves signing our Contributor License Agreement (CLA))
-
-Only approved contributors can send pull requests, so please make sure to get in touch before you do!
-
-## Credits
-
-This library uses several dependencies and we would like to thank the contributors of those
-libraries.
-
-## Need support?
-<a target="_blank" href="https://community.zama.ai">
-  <img src="https://user-images.githubusercontent.com/5758427/231115030-21195b55-2629-4c01-9809-be5059243999.png">
-</a>
-
-## Citing TFHE-rs
+Mitigation for side-channel attacks has not yet been implemented in TFHE-rs,
+and will be released in upcoming versions.
+<br></br>

+### Citations
 To cite TFHE-rs in academic papers, please use the following entry:

 ```text
@@ -180,22 +220,31 @@ To cite TFHE-rs in academic papers, please use the following entry:
 }
 ```

-## License
+### Contributing

-This software is distributed under the BSD-3-Clause-Clear license. If you have any questions,
-please contact us at `hello@zama.ai`.
+There are two ways to contribute to TFHE-rs:

-## Disclaimers
+- [Open issues](https://github.com/zama-ai/tfhe-rs/issues/new/choose) to report bugs and typos, or to suggest new ideas
+- Request to become an official contributor by emailing [hello@zama.ai](mailto:hello@zama.ai).

-### Security Estimation
+Becoming an approved contributor involves signing our Contributor License Agreement (CLA). Only approved contributors can send pull requests, so please make sure to get in touch before you do!
+<br></br>

-Security estimations are done using the
-[Lattice Estimator](https://github.com/malb/lattice-estimator)
-with `red_cost_model = reduction.RC.BDGL16`.
+### License
+This software is distributed under the **BSD-3-Clause-Clear** license. If you have any questions, please contact us at hello@zama.ai.
+<p align="right">
+  <a href="#about" > ↑ Back to top </a> 
+</p>

-When a new update is published in the Lattice Estimator, we update parameters accordingly.

-### Side-Channel Attacks
+## Support

-Mitigation for side channel attacks have not yet been implemented in TFHE-rs,
-and will be released in upcoming versions.
+<a target="_blank" href="https://community.zama.ai">
+  <img src="https://github.com/zama-ai/tfhe-rs/assets/157474013/8da6cf5b-51a0-4c86-9e75-fd0e4a4c64a4">
+</a>
+
+🌟 If you find this project helpful or interesting, please consider giving it a star on GitHub! Your support helps to grow the community and motivates further development. 
+
+<p align="right">
+  <a href="#about" > ↑ Back to top </a> 
+</p>
--- a/apps/trivium/Cargo.toml
+++ b/apps/trivium/Cargo.toml
@@ -17,7 +17,7 @@ path = "../../tfhe"
 features = [ "boolean", "shortint", "integer", "aarch64-unix" ]

 [dev-dependencies]
-criterion = { version = "0.4", features = [ "html_reports" ]}
+criterion = { version = "0.5.1", features = [ "html_reports" ]}

 [[bench]]
 name = "trivium"
--- a/apps/trivium/README.md
+++ b/apps/trivium/README.md
@@ -120,7 +120,7 @@ fn main() {

 # FHE byte Trivium implementation

-The same objects have also been implemented to stream bytes insead of booleans. They can be constructed and used in the same way via the functions `TriviumStreamByte::<u8>::new` and 
+The same objects have also been implemented to stream bytes instead of booleans. They can be constructed and used in the same way via the functions `TriviumStreamByte::<u8>::new` and 
 `TriviumStreamByte::<FheUint8>::new` with the same arguments as before. The `FheUint8` version is significantly slower than the `FheBool` version, because not running 
 with the same cryptographic parameters. Its interest lie in its trans-ciphering capabilities: `TriviumStreamByte<FheUint8>` implements the trait `TransCiphering`, 
 meaning it implements the functions `trans_encrypt_64`. This function takes as input a `FheUint64` and outputs a `FheUint64`, the output being
--- a/apps/trivium/benches/kreyvium_bool.rs
+++ b/apps/trivium/benches/kreyvium_bool.rs
@@ -6,7 +6,7 @@ use tfhe_trivium::KreyviumStream;
 use criterion::Criterion;

 pub fn kreyvium_bool_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled().enable_default_bool().build();
+    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -41,7 +41,7 @@ pub fn kreyvium_bool_gen(c: &mut Criterion) {
 }

 pub fn kreyvium_bool_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled().enable_default_bool().build();
+    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
--- a/apps/trivium/benches/kreyvium_byte.rs
+++ b/apps/trivium/benches/kreyvium_byte.rs
@@ -6,9 +6,8 @@ use tfhe_trivium::{KreyviumStreamByte, TransCiphering};
 use criterion::Criterion;

 pub fn kreyvium_byte_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .enable_function_evaluation_integers()
+    let config = ConfigBuilder::default()
+        .enable_function_evaluation()
        .build();
    let (client_key, server_key) = generate_keys(config);

@@ -36,9 +35,8 @@ pub fn kreyvium_byte_gen(c: &mut Criterion) {
 }

 pub fn kreyvium_byte_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .enable_function_evaluation_integers()
+    let config = ConfigBuilder::default()
+        .enable_function_evaluation()
        .build();
    let (client_key, server_key) = generate_keys(config);

@@ -67,9 +65,8 @@ pub fn kreyvium_byte_trans(c: &mut Criterion) {
 }

 pub fn kreyvium_byte_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .enable_function_evaluation_integers()
+    let config = ConfigBuilder::default()
+        .enable_function_evaluation()
        .build();
    let (client_key, server_key) = generate_keys(config);

--- a/apps/trivium/benches/kreyvium_shortint.rs
+++ b/apps/trivium/benches/kreyvium_shortint.rs
@@ -8,9 +8,7 @@ use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};
 use criterion::Criterion;

 pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
@@ -60,9 +58,7 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
 }

 pub fn kreyvium_shortint_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
@@ -107,9 +103,7 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
 }

 pub fn kreyvium_shortint_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
--- a/apps/trivium/benches/trivium_bool.rs
+++ b/apps/trivium/benches/trivium_bool.rs
@@ -6,7 +6,7 @@ use tfhe_trivium::TriviumStream;
 use criterion::Criterion;

 pub fn trivium_bool_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled().enable_default_bool().build();
+    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -41,7 +41,7 @@ pub fn trivium_bool_gen(c: &mut Criterion) {
 }

 pub fn trivium_bool_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled().enable_default_bool().build();
+    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);

    let key_string = "0053A6F94C9FF24598EB".to_string();
--- a/apps/trivium/benches/trivium_byte.rs
+++ b/apps/trivium/benches/trivium_byte.rs
@@ -6,9 +6,7 @@ use tfhe_trivium::{TransCiphering, TriviumStreamByte};
 use criterion::Criterion;

 pub fn trivium_byte_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -35,9 +33,7 @@ pub fn trivium_byte_gen(c: &mut Criterion) {
 }

 pub fn trivium_byte_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -65,9 +61,7 @@ pub fn trivium_byte_trans(c: &mut Criterion) {
 }

 pub fn trivium_byte_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);

    let key_string = "0053A6F94C9FF24598EB".to_string();
--- a/apps/trivium/benches/trivium_shortint.rs
+++ b/apps/trivium/benches/trivium_shortint.rs
@@ -8,9 +8,7 @@ use tfhe_trivium::{TransCiphering, TriviumStreamShortint};
 use criterion::Criterion;

 pub fn trivium_shortint_warmup(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
@@ -60,9 +58,7 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
 }

 pub fn trivium_shortint_gen(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
@@ -107,9 +103,7 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
 }

 pub fn trivium_shortint_trans(c: &mut Criterion) {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
--- a/apps/trivium/src/kreyvium/kreyvium.rs
+++ b/apps/trivium/src/kreyvium/kreyvium.rs
@@ -1,5 +1,5 @@
 //! This module implements the Kreyvium stream cipher, using booleans or FheBool
-//! for the representaion of the inner bits.
+//! for the representation of the inner bits.

 use crate::static_deque::StaticDeque;

@@ -35,7 +35,7 @@ pub struct KreyviumStream<T> {
 }

 impl KreyviumStream<bool> {
-    /// Contructor for `KreyviumStream<bool>`: arguments are the secret key and the input vector.
+    /// Constructor for `KreyviumStream<bool>`: arguments are the secret key and the input vector.
    /// Outputs a KreyviumStream object already initialized (1152 steps have been run before
    /// returning)
    pub fn new(mut key: [bool; 128], mut iv: [bool; 128]) -> KreyviumStream<bool> {
@@ -80,9 +80,9 @@ impl KreyviumStream<FheBool> {

        // Initialization of Kreyvium registers: a has the secret key, b the input vector,
        // and c a few ones.
-        let mut a_register = [false; 93].map(|x| FheBool::encrypt_trivial(x));
-        let mut b_register = [false; 84].map(|x| FheBool::encrypt_trivial(x));
-        let mut c_register = [false; 111].map(|x| FheBool::encrypt_trivial(x));
+        let mut a_register = [false; 93].map(FheBool::encrypt_trivial);
+        let mut b_register = [false; 84].map(FheBool::encrypt_trivial);
+        let mut c_register = [false; 111].map(FheBool::encrypt_trivial);

        for i in 0..93 {
            a_register[i] = key[128 - 93 + i].clone();
@@ -99,7 +99,7 @@ impl KreyviumStream<FheBool> {

        key.reverse();
        iv.reverse();
-        let iv = iv.map(|x| FheBool::encrypt_trivial(x));
+        let iv = iv.map(FheBool::encrypt_trivial);

        unset_server_key();
        KreyviumStream::<FheBool>::new_from_registers(
@@ -118,7 +118,7 @@ where
    T: KreyviumBoolInput<T> + std::marker::Send + std::marker::Sync,
    for<'a> &'a T: KreyviumBoolInput<T>,
 {
-    /// Internal generic contructor: arguments are already prepared registers, and an optional FHE
+    /// Internal generic constructor: arguments are already prepared registers, and an optional FHE
    /// server key
    fn new_from_registers(
        a_register: [T; 93],
@@ -149,7 +149,7 @@ where
    }

    /// Computes one turn of the stream, updating registers and outputting the new bit.
-    pub fn next(&mut self) -> T {
+    pub fn next_bool(&mut self) -> T {
        match &self.fhe_key {
            Some(sk) => set_server_key(sk.clone()),
            None => (),
--- a/apps/trivium/src/kreyvium/kreyvium_byte.rs
+++ b/apps/trivium/src/kreyvium/kreyvium_byte.rs
@@ -1,5 +1,5 @@
 //! This module implements the Kreyvium stream cipher, using u8 or FheUint8
-//! for the representaion of the inner bits.
+//! for the representation of the inner bits.

 use crate::static_deque::{StaticByteDeque, StaticByteDequeInput};

@@ -31,7 +31,7 @@ impl KreyviumByteInput<FheUint8> for &FheUint8 {}
 /// representation of bits (u8 or FheUint8). To be able to compute FHE operations, it also owns
 /// an Option for a ServerKey.
 /// Since the original Kreyvium registers' sizes are not a multiple of 8, these registers (which
-/// store byte-like objects) have a size that is the eigth of the closest multiple of 8 above the
+/// store byte-like objects) have a size that is the eighth of the closest multiple of 8 above the
 /// originals' sizes.
 pub struct KreyviumStreamByte<T> {
    a_byte: StaticByteDeque<12, T>,
@@ -43,7 +43,7 @@ pub struct KreyviumStreamByte<T> {
 }

 impl KreyviumStreamByte<u8> {
-    /// Contructor for `KreyviumStreamByte<u8>`: arguments are the secret key and the input vector.
+    /// Constructor for `KreyviumStreamByte<u8>`: arguments are the secret key and the input vector.
    /// Outputs a KreyviumStream object already initialized (1152 steps have been run before
    /// returning)
    pub fn new(key_bytes: [u8; 16], iv_bytes: [u8; 16]) -> KreyviumStreamByte<u8> {
@@ -54,18 +54,15 @@ impl KreyviumStreamByte<u8> {
        let mut c_byte_reg = [0u8; 14];

        // Copy key bits into a register
-        for b in 0..12 {
-            a_byte_reg[b] = key_bytes[b + 4];
-        }
+        a_byte_reg.copy_from_slice(&key_bytes[4..]);
+
        // Copy iv bits into a register
-        for b in 0..11 {
-            b_byte_reg[b] = iv_bytes[b + 5];
-        }
+        b_byte_reg.copy_from_slice(&iv_bytes[5..]);
+
        // Copy a lot of ones in the c register
        c_byte_reg[0] = 252;
-        for b in 1..8 {
-            c_byte_reg[b] = 255;
-        }
+        c_byte_reg[1..8].fill(255);
+
        // Copy iv bits in the c register
        c_byte_reg[8] = (iv_bytes[0] << 4) | 31;
        for b in 9..14 {
@@ -100,23 +97,22 @@ impl KreyviumStreamByte<FheUint8> {

        // Initialization of Kreyvium registers: a has the secret key, b the input vector,
        // and c a few ones.
-        let mut a_byte_reg = [0u8; 12].map(|x| FheUint8::encrypt_trivial(x));
-        let mut b_byte_reg = [0u8; 11].map(|x| FheUint8::encrypt_trivial(x));
-        let mut c_byte_reg = [0u8; 14].map(|x| FheUint8::encrypt_trivial(x));
+        let mut a_byte_reg = [0u8; 12].map(FheUint8::encrypt_trivial);
+        let mut b_byte_reg = [0u8; 11].map(FheUint8::encrypt_trivial);
+        let mut c_byte_reg = [0u8; 14].map(FheUint8::encrypt_trivial);

        // Copy key bits into a register
-        for b in 0..12 {
-            a_byte_reg[b] = key_bytes[b + 4].clone();
-        }
+        a_byte_reg.clone_from_slice(&key_bytes[4..]);
+
        // Copy iv bits into a register
        for b in 0..11 {
            b_byte_reg[b] = FheUint8::encrypt_trivial(iv_bytes[b + 5]);
        }
        // Copy a lot of ones in the c register
        c_byte_reg[0] = FheUint8::encrypt_trivial(252u8);
-        for b in 1..8 {
-            c_byte_reg[b] = FheUint8::encrypt_trivial(255u8);
-        }
+
+        c_byte_reg[1..8].fill_with(|| FheUint8::encrypt_trivial(255u8));
+
        // Copy iv bits in the c register
        c_byte_reg[8] = FheUint8::encrypt_trivial((&iv_bytes[0] << 4u8) | 31u8);
        for b in 9..14 {
@@ -150,7 +146,7 @@ where
    T: KreyviumByteInput<T> + Send,
    for<'a> &'a T: KreyviumByteInput<T>,
 {
-    /// Internal generic contructor: arguments are already prepared registers, and an optional FHE
+    /// Internal generic constructor: arguments are already prepared registers, and an optional FHE
    /// server key
    fn new_from_registers(
        a_register: [T; 12],
@@ -292,6 +288,6 @@ where

 impl KreyviumStreamByte<FheUint8> {
    pub fn get_server_key(&self) -> &ServerKey {
-        &self.fhe_key.as_ref().unwrap()
+        self.fhe_key.as_ref().unwrap()
    }
 }
--- a/apps/trivium/src/kreyvium/kreyvium_shortint.rs
+++ b/apps/trivium/src/kreyvium/kreyvium_shortint.rs
@@ -19,7 +19,7 @@ pub struct KreyviumStreamShortint {
 }

 impl KreyviumStreamShortint {
-    /// Contructor for KreyviumStreamShortint: arguments are the secret key and the input vector,
+    /// Constructor for KreyviumStreamShortint: arguments are the secret key and the input vector,
    /// and a ServerKey reference. Outputs a KreyviumStream object already initialized (1152
    /// steps have been run before returning)
    pub fn new(
@@ -75,7 +75,7 @@ impl KreyviumStreamShortint {
    }

    /// Computes one turn of the stream, updating registers and outputting the new bit.
-    pub fn next(&mut self) -> Ciphertext {
+    pub fn next_ct(&mut self) -> Ciphertext {
        let [o, a, b, c] = self.get_output_and_values(0);

        self.a.push(a);
@@ -149,7 +149,7 @@ impl KreyviumStreamShortint {
                            .unchecked_add_assign(&mut new_c, c5);
                        self.internal_server_key
                            .unchecked_add_assign(&mut new_c, &temp_b);
-                        self.internal_server_key.clear_carry_assign(&mut new_c);
+                        self.internal_server_key.message_extract_assign(&mut new_c);
                        new_c
                    },
                    || {
--- a/apps/trivium/src/kreyvium/mod.rs
+++ b/apps/trivium/src/kreyvium/mod.rs
@@ -1,3 +1,4 @@
+#[allow(clippy::module_inception)]
 mod kreyvium;
 pub use kreyvium::KreyviumStream;

--- a/apps/trivium/src/kreyvium/test.rs
+++ b/apps/trivium/src/kreyvium/test.rs
@@ -56,7 +56,7 @@ fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
            _ => (),
        };
    }
-    return hexadecimal;
+    hexadecimal
 }

 fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
@@ -65,7 +65,7 @@ fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
    for test in a {
        hexadecimal.push_str(&format!("{:02X?}", test));
    }
-    return hexadecimal;
+    hexadecimal
 }

 fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
@@ -73,7 +73,7 @@ fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
    for test in a {
        hexadecimal.push_str(&format!("{:016X?}", test));
    }
-    return hexadecimal;
+    hexadecimal
 }

 #[test]
@@ -86,7 +86,7 @@ fn kreyvium_test_1() {

    let mut vec = Vec::<bool>::with_capacity(64);
    while vec.len() < 64 {
-        vec.push(kreyvium.next());
+        vec.push(kreyvium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -105,7 +105,7 @@ fn kreyvium_test_2() {

    let mut vec = Vec::<bool>::with_capacity(64);
    while vec.len() < 64 {
-        vec.push(kreyvium.next());
+        vec.push(kreyvium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -124,7 +124,7 @@ fn kreyvium_test_3() {

    let mut vec = Vec::<bool>::with_capacity(64);
    while vec.len() < 64 {
-        vec.push(kreyvium.next());
+        vec.push(kreyvium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -161,7 +161,7 @@ fn kreyvium_test_4() {

    let mut vec = Vec::<bool>::with_capacity(64);
    while vec.len() < 64 {
-        vec.push(kreyvium.next());
+        vec.push(kreyvium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -170,7 +170,7 @@ fn kreyvium_test_4() {

 #[test]
 fn kreyvium_test_fhe_long() {
-    let config = ConfigBuilder::all_disabled().enable_default_bool().build();
+    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);

    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -217,9 +217,7 @@ use tfhe::shortint::prelude::*;

 #[test]
 fn kreyvium_test_shortint_long() {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
@@ -302,9 +300,8 @@ fn kreyvium_test_clear_byte() {

 #[test]
 fn kreyvium_test_byte_long() {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .enable_function_evaluation_integers()
+    let config = ConfigBuilder::default()
+        .enable_function_evaluation()
        .build();
    let (client_key, server_key) = generate_keys(config);

@@ -342,9 +339,8 @@ fn kreyvium_test_byte_long() {

 #[test]
 fn kreyvium_test_fhe_byte_transciphering_long() {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .enable_function_evaluation_integers()
+    let config = ConfigBuilder::default()
+        .enable_function_evaluation()
        .build();
    let (client_key, server_key) = generate_keys(config);

--- a/apps/trivium/src/static_deque/mod.rs
+++ b/apps/trivium/src/static_deque/mod.rs
@@ -1,3 +1,4 @@
+#[allow(clippy::module_inception)]
 mod static_deque;
 pub use static_deque::StaticDeque;
 mod static_byte_deque;
--- a/apps/trivium/src/static_deque/static_byte_deque.rs
+++ b/apps/trivium/src/static_deque/static_byte_deque.rs
@@ -1,6 +1,6 @@
 //! This module implements the StaticByteDeque struct: a deque of bytes. The idea
 //! is that this is a wrapper around StaticDeque, but StaticByteDeque has an additional
-//! functionnality: it can construct the "intermediate" bytes, made of parts of other bytes.
+//! functionality: it can construct the "intermediate" bytes, made of parts of other bytes.
 //! This is pretending to store bits, and allows accessing bits in chunks of 8 consecutive.

 use crate::static_deque::StaticDeque;
@@ -77,7 +77,7 @@ where
        }

        let byte_next: &T = &self.deque[i / 8 + 1];
-        return (byte << bit_idx) | (byte_next >> (8 - bit_idx as u8));
+        (byte << bit_idx) | (byte_next >> (8 - bit_idx))
    }
 }

@@ -101,7 +101,7 @@ mod tests {
        assert!(deque.bit(7) == 0);

        // second youngest: 128
-        assert!(deque.bit(8 + 0) == 0);
+        assert!(deque.bit(8) == 0);
        assert!(deque.bit(8 + 1) == 0);
        assert!(deque.bit(8 + 2) == 0);
        assert!(deque.bit(8 + 3) == 0);
@@ -111,7 +111,7 @@ mod tests {
        assert!(deque.bit(8 + 7) > 0);

        // oldest: 64
-        assert!(deque.bit(16 + 0) == 0);
+        assert!(deque.bit(16) == 0);
        assert!(deque.bit(16 + 1) == 0);
        assert!(deque.bit(16 + 2) == 0);
        assert!(deque.bit(16 + 3) == 0);
--- a/apps/trivium/src/static_deque/static_deque.rs
+++ b/apps/trivium/src/static_deque/static_deque.rs
@@ -5,7 +5,7 @@
 use core::ops::{Index, IndexMut};

 /// StaticDeque: a struct implementing a deque whose size is known at compile time.
-/// It has 2 members: the static array conatining the data (never empty), and a cursor
+/// It has 2 members: the static array containing the data (never empty), and a cursor
 /// equal to the index of the oldest element (and the next one to be overwritten).
 #[derive(Clone)]
 pub struct StaticDeque<const N: usize, T> {
--- a/apps/trivium/src/trans_ciphering/mod.rs
+++ b/apps/trivium/src/trans_ciphering/mod.rs
@@ -4,6 +4,7 @@
 use crate::{KreyviumStreamByte, KreyviumStreamShortint, TriviumStreamByte, TriviumStreamShortint};
 use tfhe::shortint::Ciphertext;

+use tfhe::prelude::*;
 use tfhe::{set_server_key, unset_server_key, FheUint64, FheUint8, ServerKey};

 use rayon::prelude::*;
--- a/apps/trivium/src/trivium/mod.rs
+++ b/apps/trivium/src/trivium/mod.rs
@@ -1,5 +1,5 @@
-mod trivium;
-pub use trivium::TriviumStream;
+mod trivium_bool;
+pub use trivium_bool::TriviumStream;

 mod trivium_byte;
 pub use trivium_byte::TriviumStreamByte;
--- a/apps/trivium/src/trivium/test.rs
+++ b/apps/trivium/src/trivium/test.rs
@@ -56,7 +56,7 @@ fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
            _ => (),
        };
    }
-    return hexadecimal;
+    hexadecimal
 }

 fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
@@ -65,7 +65,7 @@ fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
    for test in a {
        hexadecimal.push_str(&format!("{:02X?}", test));
    }
-    return hexadecimal;
+    hexadecimal
 }

 fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
@@ -73,7 +73,7 @@ fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
    for test in a {
        hexadecimal.push_str(&format!("{:016X?}", test));
    }
-    return hexadecimal;
+    hexadecimal
 }

 #[test]
@@ -89,7 +89,7 @@ fn trivium_test_1() {

    let mut vec = Vec::<bool>::with_capacity(512 * 8);
    while vec.len() < 512 * 8 {
-        vec.push(trivium.next());
+        vec.push(trivium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -114,7 +114,7 @@ fn trivium_test_2() {

    let mut vec = Vec::<bool>::with_capacity(512 * 8);
    while vec.len() < 512 * 8 {
-        vec.push(trivium.next());
+        vec.push(trivium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -139,7 +139,7 @@ fn trivium_test_3() {

    let mut vec = Vec::<bool>::with_capacity(512 * 8);
    while vec.len() < 512 * 8 {
-        vec.push(trivium.next());
+        vec.push(trivium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -182,7 +182,7 @@ fn trivium_test_4() {

    let mut vec = Vec::<bool>::with_capacity(131072 * 8);
    while vec.len() < 131072 * 8 {
-        vec.push(trivium.next());
+        vec.push(trivium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -232,7 +232,7 @@ fn trivium_test_clear_byte() {

 #[test]
 fn trivium_test_fhe_long() {
-    let config = ConfigBuilder::all_disabled().enable_default_bool().build();
+    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -277,9 +277,7 @@ fn trivium_test_fhe_long() {

 #[test]
 fn trivium_test_fhe_byte_long() {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -316,9 +314,7 @@ fn trivium_test_fhe_byte_long() {

 #[test]
 fn trivium_test_fhe_byte_transciphering_long() {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (client_key, server_key) = generate_keys(config);

    let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -357,9 +353,7 @@ use tfhe::shortint::prelude::*;

 #[test]
 fn trivium_test_shortint_long() {
-    let config = ConfigBuilder::all_disabled()
-        .enable_default_integers()
-        .build();
+    let config = ConfigBuilder::default().build();
    let (hl_client_key, hl_server_key) = generate_keys(config);
    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
--- a/apps/trivium/src/trivium/trivium_bool.rs
+++ b/apps/trivium/src/trivium/trivium_bool.rs
@@ -1,5 +1,5 @@
 //! This module implements the Trivium stream cipher, using booleans or FheBool
-//! for the representaion of the inner bits.
+//! for the representation of the inner bits.

 use crate::static_deque::StaticDeque;

@@ -33,7 +33,7 @@ pub struct TriviumStream<T> {
 }

 impl TriviumStream<bool> {
-    /// Contructor for `TriviumStream<bool>`: arguments are the secret key and the input vector.
+    /// Constructor for `TriviumStream<bool>`: arguments are the secret key and the input vector.
    /// Outputs a TriviumStream object already initialized (1152 steps have been run before
    /// returning)
    pub fn new(key: [bool; 80], iv: [bool; 80]) -> TriviumStream<bool> {
@@ -66,9 +66,9 @@ impl TriviumStream<FheBool> {

        // Initialization of Trivium registers: a has the secret key, b the input vector,
        // and c a few ones.
-        let mut a_register = [false; 93].map(|x| FheBool::encrypt_trivial(x));
-        let mut b_register = [false; 84].map(|x| FheBool::encrypt_trivial(x));
-        let mut c_register = [false; 111].map(|x| FheBool::encrypt_trivial(x));
+        let mut a_register = [false; 93].map(FheBool::encrypt_trivial);
+        let mut b_register = [false; 84].map(FheBool::encrypt_trivial);
+        let mut c_register = [false; 111].map(FheBool::encrypt_trivial);

        for i in 0..80 {
            a_register[93 - 80 + i] = key[i].clone();
@@ -94,7 +94,7 @@ where
    T: TriviumBoolInput<T> + std::marker::Send + std::marker::Sync,
    for<'a> &'a T: TriviumBoolInput<T>,
 {
-    /// Internal generic contructor: arguments are already prepared registers, and an optional FHE
+    /// Internal generic constructor: arguments are already prepared registers, and an optional FHE
    /// server key
    fn new_from_registers(
        a_register: [T; 93],
@@ -121,7 +121,7 @@ where
    }

    /// Computes one turn of the stream, updating registers and outputting the new bit.
-    pub fn next(&mut self) -> T {
+    pub fn next_bool(&mut self) -> T {
        match &self.fhe_key {
            Some(sk) => set_server_key(sk.clone()),
            None => (),
--- a/apps/trivium/src/trivium/trivium_byte.rs
+++ b/apps/trivium/src/trivium/trivium_byte.rs
@@ -1,5 +1,5 @@
 //! This module implements the Trivium stream cipher, using u8 or FheUint8
-//! for the representaion of the inner bits.
+//! for the representation of the inner bits.

 use crate::static_deque::{StaticByteDeque, StaticByteDequeInput};

@@ -31,7 +31,7 @@ impl TriviumByteInput<FheUint8> for &FheUint8 {}
 /// representation of bits (u8 or FheUint8). To be able to compute FHE operations, it also owns
 /// an Option for a ServerKey.
 /// Since the original Trivium registers' sizes are not a multiple of 8, these registers (which
-/// store byte-like objects) have a size that is the eigth of the closest multiple of 8 above the
+/// store byte-like objects) have a size that is the eighth of the closest multiple of 8 above the
 /// originals' sizes.
 pub struct TriviumStreamByte<T> {
    a_byte: StaticByteDeque<12, T>,
@@ -41,7 +41,7 @@ pub struct TriviumStreamByte<T> {
 }

 impl TriviumStreamByte<u8> {
-    /// Contructor for `TriviumStreamByte<u8>`: arguments are the secret key and the input vector.
+    /// Constructor for `TriviumStreamByte<u8>`: arguments are the secret key and the input vector.
    /// Outputs a TriviumStream object already initialized (1152 steps have been run before
    /// returning)
    pub fn new(key: [u8; 10], iv: [u8; 10]) -> TriviumStreamByte<u8> {
@@ -81,9 +81,9 @@ impl TriviumStreamByte<FheUint8> {

        // Initialization of Trivium registers: a has the secret key, b the input vector,
        // and c a few ones.
-        let mut a_byte_reg = [0u8; 12].map(|x| FheUint8::encrypt_trivial(x));
-        let mut b_byte_reg = [0u8; 11].map(|x| FheUint8::encrypt_trivial(x));
-        let mut c_byte_reg = [0u8; 14].map(|x| FheUint8::encrypt_trivial(x));
+        let mut a_byte_reg = [0u8; 12].map(FheUint8::encrypt_trivial);
+        let mut b_byte_reg = [0u8; 11].map(FheUint8::encrypt_trivial);
+        let mut c_byte_reg = [0u8; 14].map(FheUint8::encrypt_trivial);

        for i in 0..10 {
            a_byte_reg[12 - 10 + i] = key[i].clone();
@@ -111,7 +111,7 @@ where
    T: TriviumByteInput<T> + Send,
    for<'a> &'a T: TriviumByteInput<T>,
 {
-    /// Internal generic contructor: arguments are already prepared registers, and an optional FHE
+    /// Internal generic constructor: arguments are already prepared registers, and an optional FHE
    /// server key
    fn new_from_registers(
        a_register: [T; 12],
@@ -236,6 +236,6 @@ where

 impl TriviumStreamByte<FheUint8> {
    pub fn get_server_key(&self) -> &ServerKey {
-        &self.fhe_key.as_ref().unwrap()
+        self.fhe_key.as_ref().unwrap()
    }
 }
--- a/apps/trivium/src/trivium/trivium_shortint.rs
+++ b/apps/trivium/src/trivium/trivium_shortint.rs
@@ -17,9 +17,9 @@ pub struct TriviumStreamShortint {
 }

 impl TriviumStreamShortint {
-    /// Contructor for TriviumStreamShortint: arguments are the secret key and the input vector, and
-    /// a ServerKey reference. Outputs a TriviumStream object already initialized (1152 steps
-    /// have been run before returning)
+    /// Constructor for TriviumStreamShortint: arguments are the secret key and the input vector,
+    /// and a ServerKey reference. Outputs a TriviumStream object already initialized (1152
+    /// steps have been run before returning)
    pub fn new(
        key: [Ciphertext; 80],
        iv: [u64; 80],
@@ -63,7 +63,7 @@ impl TriviumStreamShortint {
    }

    /// Computes one turn of the stream, updating registers and outputting the new bit.
-    pub fn next(&mut self) -> Ciphertext {
+    pub fn next_ct(&mut self) -> Ciphertext {
        let [o, a, b, c] = self.get_output_and_values(0);

        self.a.push(a);
@@ -113,7 +113,7 @@ impl TriviumStreamShortint {
                            .unchecked_add_assign(&mut new_a, a5);
                        self.internal_server_key
                            .unchecked_add_assign(&mut new_a, &temp_c);
-                        self.internal_server_key.clear_carry_assign(&mut new_a);
+                        self.internal_server_key.message_extract_assign(&mut new_a);
                        new_a
                    },
                    || {
@@ -122,7 +122,7 @@ impl TriviumStreamShortint {
                            .unchecked_add_assign(&mut new_b, b5);
                        self.internal_server_key
                            .unchecked_add_assign(&mut new_b, &temp_a);
-                        self.internal_server_key.clear_carry_assign(&mut new_b);
+                        self.internal_server_key.message_extract_assign(&mut new_b);
                        new_b
                    },
                )
@@ -135,7 +135,7 @@ impl TriviumStreamShortint {
                            .unchecked_add_assign(&mut new_c, c5);
                        self.internal_server_key
                            .unchecked_add_assign(&mut new_c, &temp_b);
-                        self.internal_server_key.clear_carry_assign(&mut new_c);
+                        self.internal_server_key.message_extract_assign(&mut new_c);
                        new_c
                    },
                    || {
--- a/backends/tfhe-cuda-backend/Cargo.toml
+++ b/backends/tfhe-cuda-backend/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "tfhe-cuda-backend"
+version = "0.1.2"
+edition = "2021"
+authors = ["Zama team"]
+license = "BSD-3-Clause-Clear"
+description = "Cuda implementation of TFHE-rs primitives."
+homepage = "https://www.zama.ai/"
+documentation = "https://docs.zama.ai/tfhe-rs"
+repository = "https://github.com/zama-ai/tfhe-rs"
+readme = "README.md"
+keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
+
+[build-dependencies]
+cmake = { version = "0.1" }
+
+[dependencies]
+thiserror = "1.0"
--- a/backends/tfhe-cuda-backend/LICENSE
+++ b/backends/tfhe-cuda-backend/LICENSE
@@ -0,0 +1,28 @@
+BSD 3-Clause Clear License
+
+Copyright © 2024 ZAMA.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this
+list of conditions and the following disclaimer in the documentation and/or other
+materials provided with the distribution.
+
+3. Neither the name of ZAMA nor the names of its contributors may be used to endorse
+or promote products derived from this software without specific prior written permission.
+
+NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE.
+THIS SOFTWARE IS PROVIDED BY THE ZAMA AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ZAMA OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/backends/tfhe-cuda-backend/README.md
+++ b/backends/tfhe-cuda-backend/README.md
@@ -0,0 +1,52 @@
+# TFHE Cuda backend
+
+## Introduction
+
+The `tfhe-cuda-backend` holds the code for GPU acceleration of Zama's variant of TFHE.
+It implements CUDA/C++ functions to perform homomorphic operations on LWE ciphertexts.
+
+It provides functions to allocate memory on the GPU, to copy data back 
+and forth between the CPU and the GPU, to create and destroy Cuda streams, etc.:
+- `cuda_create_stream`, `cuda_destroy_stream`
+- `cuda_malloc`, `cuda_check_valid_malloc`
+- `cuda_memcpy_async_to_cpu`, `cuda_memcpy_async_to_gpu`
+- `cuda_get_number_of_gpus`
+- `cuda_synchronize_device`
+The cryptographic operations it provides are:
+- an amortized implementation of the TFHE programmable bootstrap: `cuda_bootstrap_amortized_lwe_ciphertext_vector_32` and `cuda_bootstrap_amortized_lwe_ciphertext_vector_64`
+- a low latency implementation of the TFHE programmable bootstrap: `cuda_bootstrap_low latency_lwe_ciphertext_vector_32` and `cuda_bootstrap_low_latency_lwe_ciphertext_vector_64`
+- the keyswitch: `cuda_keyswitch_lwe_ciphertext_vector_32` and `cuda_keyswitch_lwe_ciphertext_vector_64`
+- the larger precision programmable bootstrap (wop PBS, which supports up to 16 bits of message while the classical PBS only supports up to 8 bits of message) and its sub-components: `cuda_wop_pbs_64`, `cuda_extract_bits_64`, `cuda_circuit_bootstrap_64`, `cuda_cmux_tree_64`, `cuda_blind_rotation_sample_extraction_64`
+- acceleration for leveled operations: `cuda_negate_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_plaintext_vector_64`, `cuda_mult_lwe_ciphertext_vector_cleartext_vector`.
+
+## Dependencies
+
+**Disclaimer**: Compilation on Windows/Mac is not supported yet. Only Nvidia GPUs are supported. 
+
+- nvidia driver - for example, if you're running Ubuntu 20.04 check this [page](https://linuxconfig.org/how-to-install-the-nvidia-drivers-on-ubuntu-20-04-focal-fossa-linux) for installation
+- [nvcc](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) >= 10.0
+- [gcc](https://gcc.gnu.org/) >= 8.0 - check this [page](https://gist.github.com/ax3l/9489132) for more details about nvcc/gcc compatible versions
+- [cmake](https://cmake.org/) >= 3.24
+
+## Build
+
+The Cuda project held in `tfhe-cuda-backend` can be compiled independently from TFHE-rs in the following way:
+```
+git clone git@github.com:zama-ai/tfhe-rs
+cd backends/tfhe-cuda-backend/cuda
+mkdir build
+cd build
+cmake ..
+make
+```
+The compute capability is detected automatically (with the first GPU information) and set accordingly.
+If your machine does not have an available Nvidia GPU, the compilation will work if you have the nvcc compiler installed. The generated executable will target a 7.0 compute capability (sm_70).
+
+## Links
+
+- [TFHE](https://eprint.iacr.org/2018/421.pdf)
+
+## License
+
+This software is distributed under the BSD-3-Clause-Clear license. If you have any questions,
+please contact us at `hello@zama.ai`.
--- a/backends/tfhe-cuda-backend/build.rs
+++ b/backends/tfhe-cuda-backend/build.rs
@@ -0,0 +1,28 @@
+use std::env;
+use std::process::Command;
+
+fn main() {
+    println!("Build tfhe-cuda-backend");
+    if env::consts::OS == "linux" {
+        let output = Command::new("./get_os_name.sh").output().unwrap();
+        let distribution = String::from_utf8(output.stdout).unwrap();
+        if distribution != "Ubuntu\n" {
+            println!(
+                "cargo:warning=This Linux distribution is not officially supported. \
+                Only Ubuntu is supported by tfhe-cuda-backend at this time. Build may fail\n"
+            );
+        }
+        let dest = cmake::build("cuda");
+        println!("cargo:rustc-link-search=native={}", dest.display());
+        println!("cargo:rustc-link-lib=static=tfhe_cuda_backend");
+        println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
+        println!("cargo:rustc-link-lib=gomp");
+        println!("cargo:rustc-link-lib=cudart");
+        println!("cargo:rustc-link-search=native=/usr/lib/x86_64-linux-gnu/");
+        println!("cargo:rustc-link-lib=stdc++");
+    } else {
+        panic!(
+            "Error: platform not supported, tfhe-cuda-backend not built (only Linux is supported)"
+        );
+    }
+}
--- a/backends/tfhe-cuda-backend/cuda/.cmake-format-config.py
+++ b/backends/tfhe-cuda-backend/cuda/.cmake-format-config.py
@@ -0,0 +1,10 @@
+# -----------------------------
+# Options effecting formatting.
+# -----------------------------
+with section("format"):
+
+  # How wide to allow formatted cmake files
+  line_width = 120
+  
+  # How many spaces to tab for indent
+  tab_size = 2
--- a/backends/tfhe-cuda-backend/cuda/CMakeLists.txt
+++ b/backends/tfhe-cuda-backend/cuda/CMakeLists.txt
@@ -0,0 +1,90 @@
+cmake_minimum_required(VERSION 3.24 FATAL_ERROR)
+project(tfhe_cuda_backend LANGUAGES CXX)
+
+# See if the minimum CUDA version is available. If not, only enable documentation building.
+set(MINIMUM_SUPPORTED_CUDA_VERSION 10.0)
+include(CheckLanguage)
+# See if CUDA is available
+check_language(CUDA)
+# If so, enable CUDA to check the version.
+if(CMAKE_CUDA_COMPILER)
+  enable_language(CUDA)
+endif()
+# If CUDA is not available, or the minimum version is too low do not build
+if(NOT CMAKE_CUDA_COMPILER)
+  message(FATAL_ERROR "Cuda compiler not found.")
+endif()
+
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS ${MINIMUM_SUPPORTED_CUDA_VERSION})
+  message(FATAL_ERROR "CUDA ${MINIMUM_SUPPORTED_CUDA_VERSION} or greater is required for compilation.")
+endif()
+# Get CUDA compute capability
+set(OUTPUTFILE ${CMAKE_CURRENT_SOURCE_DIR}/cuda_script) # No suffix required
+set(CUDAFILE ${CMAKE_CURRENT_SOURCE_DIR}/check_cuda.cu)
+execute_process(COMMAND nvcc -lcuda ${CUDAFILE} -o ${OUTPUTFILE})
+execute_process(
+  COMMAND ${OUTPUTFILE}
+  RESULT_VARIABLE CUDA_RETURN_CODE
+  OUTPUT_VARIABLE ARCH)
+file(REMOVE ${OUTPUTFILE})
+
+if(${CUDA_RETURN_CODE} EQUAL 0)
+  set(CUDA_SUCCESS "TRUE")
+else()
+  set(CUDA_SUCCESS "FALSE")
+endif()
+
+if(${CUDA_SUCCESS})
+  message(STATUS "CUDA Architecture: ${ARCH}")
+  message(STATUS "CUDA Version: ${CUDA_VERSION_STRING}")
+  message(STATUS "CUDA Path: ${CUDA_TOOLKIT_ROOT_DIR}")
+  message(STATUS "CUDA Libraries: ${CUDA_LIBRARIES}")
+  message(STATUS "CUDA Performance Primitives: ${CUDA_npp_LIBRARY}")
+else()
+  message(WARNING ${ARCH})
+endif()
+
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif()
+
+# Add OpenMP support
+find_package(OpenMP REQUIRED)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -g")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler ${OpenMP_CXX_FLAGS}")
+if(${CUDA_SUCCESS})
+  set(CMAKE_CUDA_ARCHITECTURES native)
+else()
+  set(CMAKE_CUDA_ARCHITECTURES 70)
+endif()
+
+# in production, should use -arch=sm_70 --ptxas-options=-v to see register spills -lineinfo for better debugging
+set(CMAKE_CUDA_FLAGS
+    "${CMAKE_CUDA_FLAGS} -ccbin ${CMAKE_CXX_COMPILER} -O3 \
+  -std=c++17 --no-exceptions  --expt-relaxed-constexpr -rdc=true \
+  --use_fast_math -Xcompiler -fPIC")
+
+set(INCLUDE_DIR include)
+
+add_subdirectory(src)
+target_include_directories(tfhe_cuda_backend PRIVATE ${INCLUDE_DIR})
+
+# This is required for rust cargo build
+install(TARGETS tfhe_cuda_backend DESTINATION .)
+install(TARGETS tfhe_cuda_backend DESTINATION lib)
+
+# Define a function to add a lint target.
+find_file(CPPLINT NAMES cpplint cpplint.exe)
+if(CPPLINT)
+  # Add a custom target to lint all child projects. Dependencies are specified in child projects.
+  add_custom_target(all_lint)
+  # Don't trigger this target on ALL_BUILD or Visual Studio 'Rebuild Solution'
+  set_target_properties(all_lint PROPERTIES EXCLUDE_FROM_ALL TRUE)
+  # set_target_properties(all_lint PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE)
+endif()
+
+enable_testing()
--- a/backends/tfhe-cuda-backend/cuda/CPPLINT.cfg
+++ b/backends/tfhe-cuda-backend/cuda/CPPLINT.cfg
@@ -0,0 +1,3 @@
+set noparent 
+linelength=240
+filter=-legal/copyright,-readability/todo,-runtime/references,-build/c++17
--- a/backends/tfhe-cuda-backend/cuda/check_cuda.cu
+++ b/backends/tfhe-cuda-backend/cuda/check_cuda.cu
@@ -0,0 +1,22 @@
+#include <stdio.h>
+
+int main(int argc, char **argv) {
+  cudaDeviceProp dP;
+  float min_cc = 3.0;
+
+  int rc = cudaGetDeviceProperties(&dP, 0);
+  if (rc != cudaSuccess) {
+    cudaError_t error = cudaGetLastError();
+    printf("CUDA error: %s", cudaGetErrorString(error));
+    return rc; /* Failure */
+  }
+  if ((dP.major + (dP.minor / 10)) < min_cc) {
+    printf("Min Compute Capability of %2.1f required:  %d.%d found\n Not "
+           "Building CUDA Code",
+           min_cc, dP.major, dP.minor);
+    return 1; /* Failure */
+  } else {
+    printf("-arch=sm_%d%d", dP.major, dP.minor);
+    return 0; /* Success */
+  }
+}
--- a/backends/tfhe-cuda-backend/cuda/format_tfhe_cuda_backend.sh
+++ b/backends/tfhe-cuda-backend/cuda/format_tfhe_cuda_backend.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+set -e
+
+while getopts ":c" option; do
+  case $option in
+    c)
+      # code to execute when flag1 is provided
+      find ./{include,src} -iregex '^.*\.\(cpp\|cu\|h\|cuh\)$' -print | xargs clang-format-15 -i -style='file' --dry-run --Werror
+      cmake-format -i CMakeLists.txt -c .cmake-format-config.py
+      find ./{include,src} -type f -name "CMakeLists.txt" | xargs -I % sh -c 'cmake-format -i % -c .cmake-format-config.py'
+      git diff --exit-code
+      exit
+      ;;
+  esac
+done
+find ./{include,src} -iregex '^.*\.\(cpp\|cu\|h\|cuh\)$' -print | xargs clang-format-15 -i -style='file'
+cmake-format -i CMakeLists.txt -c .cmake-format-config.py
+find ./{include,src} -type f -name "CMakeLists.txt" | xargs -I % sh -c 'cmake-format -i % -c .cmake-format-config.py'
--- a/backends/tfhe-cuda-backend/cuda/include/bootstrap.h
+++ b/backends/tfhe-cuda-backend/cuda/include/bootstrap.h
@@ -0,0 +1,118 @@
+#ifndef CUDA_BOOTSTRAP_H
+#define CUDA_BOOTSTRAP_H
+
+#include "device.h"
+#include <cstdint>
+
+enum PBS_TYPE { MULTI_BIT = 0, LOW_LAT = 1, AMORTIZED = 2 };
+
+extern "C" {
+void cuda_fourier_polynomial_mul(void *input1, void *input2, void *output,
+                                 cuda_stream_t *stream,
+                                 uint32_t polynomial_size,
+                                 uint32_t total_polynomials);
+
+void cuda_convert_lwe_bootstrap_key_32(void *dest, void *src,
+                                       cuda_stream_t *stream,
+                                       uint32_t input_lwe_dim,
+                                       uint32_t glwe_dim, uint32_t level_count,
+                                       uint32_t polynomial_size);
+
+void cuda_convert_lwe_bootstrap_key_64(void *dest, void *src,
+                                       cuda_stream_t *stream,
+                                       uint32_t input_lwe_dim,
+                                       uint32_t glwe_dim, uint32_t level_count,
+                                       uint32_t polynomial_size);
+
+void scratch_cuda_bootstrap_amortized_32(
+    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
+    uint32_t max_shared_memory, bool allocate_gpu_memory);
+
+void scratch_cuda_bootstrap_amortized_64(
+    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
+    uint32_t max_shared_memory, bool allocate_gpu_memory);
+
+void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
+    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
+    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
+
+void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
+    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
+    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
+
+void cleanup_cuda_bootstrap_amortized(cuda_stream_t *stream,
+                                      int8_t **pbs_buffer);
+
+void scratch_cuda_bootstrap_low_latency_32(
+    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t level_count,
+    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
+    bool allocate_gpu_memory);
+
+void scratch_cuda_bootstrap_low_latency_64(
+    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t level_count,
+    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
+    bool allocate_gpu_memory);
+
+void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
+    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
+    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
+
+void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
+    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
+    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
+
+void cleanup_cuda_bootstrap_low_latency(cuda_stream_t *stream,
+                                        int8_t **pbs_buffer);
+
+uint64_t get_buffer_size_bootstrap_amortized_64(
+    uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory);
+
+uint64_t get_buffer_size_bootstrap_low_latency_64(
+    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
+    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory);
+}
+
+#ifdef __CUDACC__
+__device__ inline int get_start_ith_ggsw(int i, uint32_t polynomial_size,
+                                         int glwe_dimension,
+                                         uint32_t level_count);
+
+template <typename T>
+__device__ T *get_ith_mask_kth_block(T *ptr, int i, int k, int level,
+                                     uint32_t polynomial_size,
+                                     int glwe_dimension, uint32_t level_count);
+
+template <typename T>
+__device__ T *get_ith_body_kth_block(T *ptr, int i, int k, int level,
+                                     uint32_t polynomial_size,
+                                     int glwe_dimension, uint32_t level_count);
+
+template <typename T>
+__device__ T *get_multi_bit_ith_lwe_gth_group_kth_block(
+    T *ptr, int g, int i, int k, int level, uint32_t grouping_factor,
+    uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t level_count);
+
+#endif
+
+#endif // CUDA_BOOTSTRAP_H
--- a/backends/tfhe-cuda-backend/cuda/include/bootstrap_multibit.h
+++ b/backends/tfhe-cuda-backend/cuda/include/bootstrap_multibit.h
@@ -0,0 +1,46 @@
+#ifndef CUDA_MULTI_BIT_H
+#define CUDA_MULTI_BIT_H
+
+#include <cstdint>
+
+extern "C" {
+void cuda_convert_lwe_multi_bit_bootstrap_key_64(
+    void *dest, void *src, cuda_stream_t *stream, uint32_t input_lwe_dim,
+    uint32_t glwe_dim, uint32_t level_count, uint32_t polynomial_size,
+    uint32_t grouping_factor);
+
+void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
+    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t grouping_factor, uint32_t base_log, uint32_t level_count,
+    uint32_t num_samples, uint32_t num_luts, uint32_t lwe_idx,
+    uint32_t max_shared_memory, uint32_t chunk_size = 0);
+
+void scratch_cuda_multi_bit_pbs_64(
+    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t lwe_dimension,
+    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
+    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
+    uint32_t max_shared_memory, bool allocate_gpu_memory,
+    uint32_t chunk_size = 0);
+
+void cleanup_cuda_multi_bit_pbs(cuda_stream_t *stream, int8_t **pbs_buffer);
+}
+#ifdef __CUDACC__
+__host__ uint32_t get_lwe_chunk_size(uint32_t lwe_dimension,
+                                     uint32_t level_count,
+                                     uint32_t glwe_dimension,
+                                     uint32_t num_samples);
+
+__host__ uint32_t get_average_lwe_chunk_size(uint32_t lwe_dimension,
+                                             uint32_t level_count,
+                                             uint32_t glwe_dimension,
+                                             uint32_t ct_count);
+
+__host__ uint64_t get_max_buffer_size_multibit_bootstrap(
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t level_count, uint32_t max_input_lwe_ciphertext_count);
+#endif
+
+#endif // CUDA_MULTI_BIT_H
--- a/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
+++ b/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
@@ -0,0 +1,18 @@
+#ifndef CUDA_CIPHERTEXT_H
+#define CUDA_CIPHERTEXT_H
+
+#include <cstdint>
+
+extern "C" {
+void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *dest, void *src,
+                                                  void *v_stream,
+                                                  uint32_t gpu_index,
+                                                  uint32_t number_of_cts,
+                                                  uint32_t lwe_dimension);
+void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *dest, void *src,
+                                                  void *v_stream,
+                                                  uint32_t gpu_index,
+                                                  uint32_t number_of_cts,
+                                                  uint32_t lwe_dimension);
+};
+#endif
--- a/backends/tfhe-cuda-backend/cuda/include/device.h
+++ b/backends/tfhe-cuda-backend/cuda/include/device.h
@@ -0,0 +1,88 @@
+#ifndef DEVICE_H
+#define DEVICE_H
+
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cuda_runtime.h>
+
+#define synchronize_threads_in_block() __syncthreads()
+
+extern "C" {
+
+#define check_cuda_error(ans)                                                  \
+  { cuda_error((ans), __FILE__, __LINE__); }
+inline void cuda_error(cudaError_t code, const char *file, int line) {
+  if (code != cudaSuccess) {
+    std::fprintf(stderr, "Cuda error: %s %s %d\n", cudaGetErrorString(code),
+                 file, line);
+    std::abort();
+  }
+}
+#define PANIC(format, ...)                                                     \
+  {                                                                            \
+    std::fprintf(stderr, "%s::%d::%s: panic.\n" format "\n", __FILE__,         \
+                 __LINE__, __func__, ##__VA_ARGS__);                           \
+    std::abort();                                                              \
+  }
+
+struct cuda_stream_t {
+  cudaStream_t stream;
+  uint32_t gpu_index;
+
+  cuda_stream_t(uint32_t gpu_index) {
+    this->gpu_index = gpu_index;
+
+    check_cuda_error(cudaStreamCreate(&stream));
+  }
+
+  void release() {
+    check_cuda_error(cudaSetDevice(gpu_index));
+    check_cuda_error(cudaStreamDestroy(stream));
+  }
+
+  void synchronize() { check_cuda_error(cudaStreamSynchronize(stream)); }
+};
+
+cuda_stream_t *cuda_create_stream(uint32_t gpu_index);
+
+void cuda_destroy_stream(cuda_stream_t *stream);
+
+void *cuda_malloc(uint64_t size, uint32_t gpu_index);
+
+void *cuda_malloc_async(uint64_t size, cuda_stream_t *stream);
+
+void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
+
+bool cuda_check_support_cooperative_groups();
+
+void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
+                              cuda_stream_t *stream);
+
+void cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size,
+                                  cuda_stream_t *stream);
+
+void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
+                              cuda_stream_t *stream);
+
+void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
+                       cuda_stream_t *stream);
+
+int cuda_get_number_of_gpus();
+
+void cuda_synchronize_device(uint32_t gpu_index);
+
+void cuda_drop(void *ptr, uint32_t gpu_index);
+
+void cuda_drop_async(void *ptr, cuda_stream_t *stream);
+
+int cuda_get_max_shared_memory(uint32_t gpu_index);
+
+void cuda_synchronize_stream(cuda_stream_t *stream);
+}
+
+template <typename Torus>
+void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value,
+                          Torus n);
+#endif
--- a/backends/tfhe-cuda-backend/cuda/include/helper_debug.cuh
+++ b/backends/tfhe-cuda-backend/cuda/include/helper_debug.cuh
@@ -0,0 +1,100 @@
+#include "cuComplex.h"
+#include "thrust/complex.h"
+#include <iostream>
+#include <string>
+#include <type_traits>
+
+#define PRINT_VARS
+#ifdef PRINT_VARS
+#define PRINT_DEBUG_5(var, begin, end, step, cond)                             \
+  _print_debug(var, #var, begin, end, step, cond, "", false)
+#define PRINT_DEBUG_6(var, begin, end, step, cond, text)                       \
+  _print_debug(var, #var, begin, end, step, cond, text, true)
+#define CAT(A, B) A##B
+#define PRINT_SELECT(NAME, NUM) CAT(NAME##_, NUM)
+#define GET_COUNT(_1, _2, _3, _4, _5, _6, COUNT, ...) COUNT
+#define VA_SIZE(...) GET_COUNT(__VA_ARGS__, 6, 5, 4, 3, 2, 1)
+#define PRINT_DEBUG(...)                                                       \
+  PRINT_SELECT(PRINT_DEBUG, VA_SIZE(__VA_ARGS__))(__VA_ARGS__)
+#else
+#define PRINT_DEBUG(...)
+#endif
+
+template <typename T>
+__device__ typename std::enable_if<std::is_unsigned<T>::value, void>::type
+_print_debug(T *var, const char *var_name, int start, int end, int step,
+             bool cond, const char *text, bool has_text) {
+  __syncthreads();
+  if (cond) {
+    if (has_text)
+      printf("%s\n", text);
+    for (int i = start; i < end; i += step) {
+      printf("%s[%u]: %u\n", var_name, i, var[i]);
+    }
+  }
+  __syncthreads();
+}
+
+template <typename T>
+__device__ typename std::enable_if<std::is_signed<T>::value, void>::type
+_print_debug(T *var, const char *var_name, int start, int end, int step,
+             bool cond, const char *text, bool has_text) {
+  __syncthreads();
+  if (cond) {
+    if (has_text)
+      printf("%s\n", text);
+    for (int i = start; i < end; i += step) {
+      printf("%s[%u]: %d\n", var_name, i, var[i]);
+    }
+  }
+  __syncthreads();
+}
+
+template <typename T>
+__device__ typename std::enable_if<std::is_floating_point<T>::value, void>::type
+_print_debug(T *var, const char *var_name, int start, int end, int step,
+             bool cond, const char *text, bool has_text) {
+  __syncthreads();
+  if (cond) {
+    if (has_text)
+      printf("%s\n", text);
+    for (int i = start; i < end; i += step) {
+      printf("%s[%u]: %.15f\n", var_name, i, var[i]);
+    }
+  }
+  __syncthreads();
+}
+
+template <typename T>
+__device__
+    typename std::enable_if<std::is_same<T, thrust::complex<double>>::value,
+                            void>::type
+    _print_debug(T *var, const char *var_name, int start, int end, int step,
+                 bool cond, const char *text, bool has_text) {
+  __syncthreads();
+  if (cond) {
+    if (has_text)
+      printf("%s\n", text);
+    for (int i = start; i < end; i += step) {
+      printf("%s[%u]: %.15f , %.15f\n", var_name, i, var[i].real(),
+             var[i].imag());
+    }
+  }
+  __syncthreads();
+}
+
+template <typename T>
+__device__
+    typename std::enable_if<std::is_same<T, cuDoubleComplex>::value, void>::type
+    _print_debug(T *var, const char *var_name, int start, int end, int step,
+                 bool cond, const char *text, bool has_text) {
+  __syncthreads();
+  if (cond) {
+    if (has_text)
+      printf("%s\n", text);
+    for (int i = start; i < end; i += step) {
+      printf("%s[%u]: %.15f , %.15f\n", var_name, i, var[i].x, var[i].y);
+    }
+  }
+  __syncthreads();
+}
--- a/backends/tfhe-cuda-backend/cuda/include/integer.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer.h
--- a/backends/tfhe-cuda-backend/cuda/include/keyswitch.h
+++ b/backends/tfhe-cuda-backend/cuda/include/keyswitch.h
@@ -0,0 +1,21 @@
+#ifndef CNCRT_KS_H_
+#define CNCRT_KS_H_
+
+#include <cstdint>
+
+extern "C" {
+
+void cuda_keyswitch_lwe_ciphertext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
+    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
+    uint32_t level_count, uint32_t num_samples);
+
+void cuda_keyswitch_lwe_ciphertext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
+    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
+    uint32_t level_count, uint32_t num_samples);
+}
+
+#endif // CNCRT_KS_H_
--- a/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
+++ b/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
@@ -0,0 +1,50 @@
+#ifndef CUDA_LINALG_H_
+#define CUDA_LINALG_H_
+
+#include "bootstrap.h"
+#include <cstdint>
+#include <device.h>
+
+extern "C" {
+
+void cuda_negate_lwe_ciphertext_vector_32(cuda_stream_t *stream,
+                                          void *lwe_array_out,
+                                          void *lwe_array_in,
+                                          uint32_t input_lwe_dimension,
+                                          uint32_t input_lwe_ciphertext_count);
+void cuda_negate_lwe_ciphertext_vector_64(cuda_stream_t *stream,
+                                          void *lwe_array_out,
+                                          void *lwe_array_in,
+                                          uint32_t input_lwe_dimension,
+                                          uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_32(cuda_stream_t *stream,
+                                       void *lwe_array_out,
+                                       void *lwe_array_in_1,
+                                       void *lwe_array_in_2,
+                                       uint32_t input_lwe_dimension,
+                                       uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_64(cuda_stream_t *stream,
+                                       void *lwe_array_out,
+                                       void *lwe_array_in_1,
+                                       void *lwe_array_in_2,
+                                       uint32_t input_lwe_dimension,
+                                       uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
+    void *plaintext_array_in, uint32_t input_lwe_dimension,
+    uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
+    void *plaintext_array_in, uint32_t input_lwe_dimension,
+    uint32_t input_lwe_ciphertext_count);
+void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
+    void *cleartext_array_in, uint32_t input_lwe_dimension,
+    uint32_t input_lwe_ciphertext_count);
+void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
+    void *cleartext_array_in, uint32_t input_lwe_dimension,
+    uint32_t input_lwe_ciphertext_count);
+}
+
+#endif // CUDA_LINALG_H_
--- a/backends/tfhe-cuda-backend/cuda/src/CMakeLists.txt
+++ b/backends/tfhe-cuda-backend/cuda/src/CMakeLists.txt
@@ -0,0 +1,18 @@
+set(SOURCES
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bit_extraction.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bitwise_ops.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bootstrap.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bootstrap_multibit.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/ciphertext.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/circuit_bootstrap.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/device.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/integer.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/keyswitch.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/linear_algebra.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/shifts.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/vertical_packing.h)
+file(GLOB_RECURSE SOURCES "*.cu")
+add_library(tfhe_cuda_backend STATIC ${SOURCES})
+set_target_properties(tfhe_cuda_backend PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+target_link_libraries(tfhe_cuda_backend PUBLIC cudart OpenMP::OpenMP_CXX)
+target_include_directories(tfhe_cuda_backend PRIVATE .)
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu
@@ -0,0 +1 @@
+#include "ciphertext.cuh"
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
@@ -0,0 +1,44 @@
+#ifndef CUDA_CIPHERTEXT_CUH
+#define CUDA_CIPHERTEXT_CUH
+
+#include "ciphertext.h"
+#include "device.h"
+#include <cstdint>
+
+template <typename T>
+void cuda_convert_lwe_ciphertext_vector_to_gpu(T *dest, T *src,
+                                               cuda_stream_t *stream,
+                                               uint32_t number_of_cts,
+                                               uint32_t lwe_dimension) {
+  cudaSetDevice(stream->gpu_index);
+  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
+  cuda_memcpy_async_to_gpu(dest, src, size, stream);
+}
+
+void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *dest, void *src,
+                                                  cuda_stream_t *stream,
+                                                  uint32_t number_of_cts,
+                                                  uint32_t lwe_dimension) {
+  cuda_convert_lwe_ciphertext_vector_to_gpu<uint64_t>(
+      (uint64_t *)dest, (uint64_t *)src, stream, number_of_cts, lwe_dimension);
+}
+
+template <typename T>
+void cuda_convert_lwe_ciphertext_vector_to_cpu(T *dest, T *src,
+                                               cuda_stream_t *stream,
+                                               uint32_t number_of_cts,
+                                               uint32_t lwe_dimension) {
+  cudaSetDevice(stream->gpu_index);
+  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
+  cuda_memcpy_async_to_cpu(dest, src, size, stream);
+}
+
+void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *dest, void *src,
+                                                  cuda_stream_t *stream,
+                                                  uint32_t number_of_cts,
+                                                  uint32_t lwe_dimension) {
+  cuda_convert_lwe_ciphertext_vector_to_cpu<uint64_t>(
+      (uint64_t *)dest, (uint64_t *)src, stream, number_of_cts, lwe_dimension);
+}
+
+#endif
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/gadget.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/gadget.cuh
@@ -0,0 +1,162 @@
+#ifndef CNCRT_CRYPTO_CUH
+#define CNCRT_CRPYTO_CUH
+
+#include "device.h"
+#include <cstdint>
+
+/**
+ * GadgetMatrix implements the iterator design pattern to decompose a set of
+ * num_poly consecutive polynomials with degree params::degree. A total of
+ * level_count levels is expected and each call to decompose_and_compress_next()
+ * writes to the result the next level. It is also possible to advance an
+ * arbitrary amount of levels by using decompose_and_compress_level().
+ *
+ * This class always decomposes the entire set of num_poly polynomials.
+ * By default, it works on a single polynomial.
+ */
+#pragma once
+template <typename T, class params> class GadgetMatrix {
+private:
+  uint32_t level_count;
+  uint32_t base_log;
+  uint32_t mask;
+  uint32_t halfbg;
+  uint32_t num_poly;
+  T offset;
+  int current_level;
+  T mask_mod_b;
+  T *state;
+
+public:
+  __device__ GadgetMatrix(uint32_t base_log, uint32_t level_count, T *state,
+                          uint32_t num_poly = 1)
+      : base_log(base_log), level_count(level_count), num_poly(num_poly),
+        state(state) {
+
+    mask_mod_b = (1ll << base_log) - 1ll;
+    current_level = level_count;
+    int tid = threadIdx.x;
+    for (int i = 0; i < num_poly * params::opt; i++) {
+      state[tid] >>= (sizeof(T) * 8 - base_log * level_count);
+      tid += params::degree / params::opt;
+    }
+    synchronize_threads_in_block();
+  }
+
+  // Decomposes all polynomials at once
+  __device__ void decompose_and_compress_next(double2 *result) {
+    for (int j = 0; j < num_poly; j++) {
+      auto result_slice = result + j * params::degree / 2;
+      decompose_and_compress_next_polynomial(result_slice, j);
+    }
+  }
+
+  // Decomposes a single polynomial
+  __device__ void decompose_and_compress_next_polynomial(double2 *result,
+                                                         int j) {
+    if (j == 0)
+      current_level -= 1;
+
+    int tid = threadIdx.x;
+    auto state_slice = state + j * params::degree;
+    for (int i = 0; i < params::opt / 2; i++) {
+      T res_re = state_slice[tid] & mask_mod_b;
+      T res_im = state_slice[tid + params::degree / 2] & mask_mod_b;
+      state_slice[tid] >>= base_log;
+      state_slice[tid + params::degree / 2] >>= base_log;
+      T carry_re = ((res_re - 1ll) | state_slice[tid]) & res_re;
+      T carry_im =
+          ((res_im - 1ll) | state_slice[tid + params::degree / 2]) & res_im;
+      carry_re >>= (base_log - 1);
+      carry_im >>= (base_log - 1);
+      state_slice[tid] += carry_re;
+      state_slice[tid + params::degree / 2] += carry_im;
+      res_re -= carry_re << base_log;
+      res_im -= carry_im << base_log;
+
+      result[tid].x = (int32_t)res_re;
+      result[tid].y = (int32_t)res_im;
+
+      tid += params::degree / params::opt;
+    }
+    synchronize_threads_in_block();
+  }
+
+  // Decomposes a single polynomial
+  __device__ void
+  decompose_and_compress_next_polynomial_elements(double2 *result, int j) {
+    if (j == 0)
+      current_level -= 1;
+
+    int tid = threadIdx.x;
+    auto state_slice = state + j * params::degree;
+    for (int i = 0; i < params::opt / 2; i++) {
+      T res_re = state_slice[tid] & mask_mod_b;
+      T res_im = state_slice[tid + params::degree / 2] & mask_mod_b;
+      state_slice[tid] >>= base_log;
+      state_slice[tid + params::degree / 2] >>= base_log;
+      T carry_re = ((res_re - 1ll) | state_slice[tid]) & res_re;
+      T carry_im =
+          ((res_im - 1ll) | state_slice[tid + params::degree / 2]) & res_im;
+      carry_re >>= (base_log - 1);
+      carry_im >>= (base_log - 1);
+      state_slice[tid] += carry_re;
+      state_slice[tid + params::degree / 2] += carry_im;
+      res_re -= carry_re << base_log;
+      res_im -= carry_im << base_log;
+
+      result[i].x = (int32_t)res_re;
+      result[i].y = (int32_t)res_im;
+
+      tid += params::degree / params::opt;
+    }
+    synchronize_threads_in_block();
+  }
+
+  __device__ void decompose_and_compress_level(double2 *result, int level) {
+    for (int i = 0; i < level_count - level; i++)
+      decompose_and_compress_next(result);
+  }
+};
+
+template <typename T> class GadgetMatrixSingle {
+private:
+  uint32_t level_count;
+  uint32_t base_log;
+  uint32_t mask;
+  uint32_t halfbg;
+  T offset;
+
+public:
+  __device__ GadgetMatrixSingle(uint32_t base_log, uint32_t level_count)
+      : base_log(base_log), level_count(level_count) {
+    uint32_t bg = 1 << base_log;
+    this->halfbg = bg / 2;
+    this->mask = bg - 1;
+    T temp = 0;
+    for (int i = 0; i < this->level_count; i++) {
+      temp += 1ULL << (sizeof(T) * 8 - (i + 1) * this->base_log);
+    }
+    this->offset = temp * this->halfbg;
+  }
+
+  __device__ T decompose_one_level_single(T element, uint32_t level) {
+    T s = element + this->offset;
+    uint32_t decal = (sizeof(T) * 8 - (level + 1) * this->base_log);
+    T temp1 = (s >> decal) & this->mask;
+    return (T)(temp1 - this->halfbg);
+  }
+};
+
+template <typename Torus>
+__device__ Torus decompose_one(Torus &state, Torus mask_mod_b, int base_log) {
+  Torus res = state & mask_mod_b;
+  state >>= base_log;
+  Torus carry = ((res - 1ll) | state) & res;
+  carry >>= base_log - 1;
+  state += carry;
+  res -= carry << base_log;
+  return res;
+}
+
+#endif // CNCRT_CRPYTO_H
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
@@ -0,0 +1,74 @@
+#ifndef CNCRT_GGSW_CUH
+#define CNCRT_GGSW_CUH
+
+#include "device.h"
+#include "fft/bnsmfft.cuh"
+#include "polynomial/parameters.cuh"
+
+template <typename T, typename ST, class params, sharedMemDegree SMD>
+__global__ void device_batch_fft_ggsw_vector(double2 *dest, T *src,
+                                             int8_t *device_mem) {
+
+  extern __shared__ int8_t sharedmem[];
+  double2 *selected_memory;
+
+  if constexpr (SMD == FULLSM)
+    selected_memory = (double2 *)sharedmem;
+  else
+    selected_memory = (double2 *)device_mem[blockIdx.x * params::degree];
+
+  // Compression
+  int offset = blockIdx.x * blockDim.x;
+
+  int tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    ST x = src[(tid) + params::opt * offset];
+    ST y = src[(tid + params::degree / 2) + params::opt * offset];
+    selected_memory[tid].x = x / (double)std::numeric_limits<T>::max();
+    selected_memory[tid].y = y / (double)std::numeric_limits<T>::max();
+    tid += params::degree / params::opt;
+  }
+  synchronize_threads_in_block();
+
+  // Switch to the FFT space
+  NSMFFT_direct<HalfDegree<params>>(selected_memory);
+  synchronize_threads_in_block();
+
+  // Write the output to global memory
+  tid = threadIdx.x;
+#pragma unroll
+  for (int j = 0; j < params::opt / 2; j++) {
+    dest[tid + (params::opt >> 1) * offset] = selected_memory[tid];
+    tid += params::degree / params::opt;
+  }
+}
+
+/**
+ * Applies the FFT transform on sequence of GGSW ciphertexts already in the
+ * global memory
+ */
+template <typename T, typename ST, class params>
+void batch_fft_ggsw_vector(cuda_stream_t *stream, double2 *dest, T *src,
+                           int8_t *d_mem, uint32_t r, uint32_t glwe_dim,
+                           uint32_t polynomial_size, uint32_t level_count,
+                           uint32_t gpu_index, uint32_t max_shared_memory) {
+  cudaSetDevice(stream->gpu_index);
+
+  int shared_memory_size = sizeof(double) * polynomial_size;
+
+  int gridSize = r * (glwe_dim + 1) * (glwe_dim + 1) * level_count;
+  int blockSize = polynomial_size / params::opt;
+
+  if (max_shared_memory < shared_memory_size) {
+    device_batch_fft_ggsw_vector<T, ST, params, NOSM>
+        <<<gridSize, blockSize, 0, stream->stream>>>(dest, src, d_mem);
+  } else {
+    device_batch_fft_ggsw_vector<T, ST, params, FULLSM>
+        <<<gridSize, blockSize, shared_memory_size, stream->stream>>>(dest, src,
+                                                                      d_mem);
+  }
+  check_cuda_error(cudaGetLastError());
+}
+
+#endif // CNCRT_GGSW_CUH
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
@@ -0,0 +1,48 @@
+#include "keyswitch.cuh"
+#include "keyswitch.h"
+#include <cstdint>
+
+/* Perform keyswitch on a batch of 32 bits input LWE ciphertexts.
+ * Head out to the equivalent operation on 64 bits for more details.
+ */
+void cuda_keyswitch_lwe_ciphertext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
+    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
+    uint32_t level_count, uint32_t num_samples) {
+  cuda_keyswitch_lwe_ciphertext_vector(
+      stream, static_cast<uint32_t *>(lwe_array_out),
+      static_cast<uint32_t *>(lwe_output_indexes),
+      static_cast<uint32_t *>(lwe_array_in),
+      static_cast<uint32_t *>(lwe_input_indexes), static_cast<uint32_t *>(ksk),
+      lwe_dimension_in, lwe_dimension_out, base_log, level_count, num_samples);
+}
+
+/* Perform keyswitch on a batch of 64 bits input LWE ciphertexts.
+ *
+ * - `v_stream` is a void pointer to the Cuda stream to be used in the kernel
+ * launch
+ * - `gpu_index` is the index of the GPU to be used in the kernel launch
+ *  - lwe_array_out: output batch of num_samples keyswitched ciphertexts c =
+ * (a0,..an-1,b) where n is the output LWE dimension (lwe_dimension_out)
+ *  - lwe_array_in: input batch of num_samples LWE ciphertexts, containing
+ * lwe_dimension_in mask values + 1 body value
+ *  - ksk: the keyswitch key to be used in the operation
+ *  - base log: the log of the base used in the decomposition (should be the one
+ * used to create the ksk)
+ *
+ * This function calls a wrapper to a device kernel that performs the keyswitch
+ * 	- num_samples blocks of threads are launched
+ */
+void cuda_keyswitch_lwe_ciphertext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
+    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
+    uint32_t level_count, uint32_t num_samples) {
+  cuda_keyswitch_lwe_ciphertext_vector(
+      stream, static_cast<uint64_t *>(lwe_array_out),
+      static_cast<uint64_t *>(lwe_output_indexes),
+      static_cast<uint64_t *>(lwe_array_in),
+      static_cast<uint64_t *>(lwe_input_indexes), static_cast<uint64_t *>(ksk),
+      lwe_dimension_in, lwe_dimension_out, base_log, level_count, num_samples);
+}
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
@@ -0,0 +1,144 @@
+#ifndef CNCRT_KS_CUH
+#define CNCRT_KS_CUH
+
+#include "device.h"
+#include "gadget.cuh"
+#include "polynomial/polynomial_math.cuh"
+#include "torus.cuh"
+#include <thread>
+#include <vector>
+
+template <typename Torus>
+__device__ Torus *get_ith_block(Torus *ksk, int i, int level,
+                                uint32_t lwe_dimension_out,
+                                uint32_t level_count) {
+  int pos = i * level_count * (lwe_dimension_out + 1) +
+            level * (lwe_dimension_out + 1);
+  Torus *ptr = &ksk[pos];
+  return ptr;
+}
+
+/*
+ * keyswitch kernel
+ * Each thread handles a piece of the following equation:
+ * $$GLWE_s2(\Delta.m+e) = (0,0,..,0,b) - \sum_{i=0,k-1} <Dec(a_i),
+ * (GLWE_s2(s1_i q/beta),..,GLWE(s1_i q/beta^l)>$$ where k is the dimension of
+ * the GLWE ciphertext. If the polynomial dimension in GLWE is > 1, this
+ * equation is solved for each polynomial coefficient. where Dec denotes the
+ * decomposition with base beta and l levels and the inner product is done
+ * between the decomposition of a_i and l GLWE encryptions of s1_i q/\beta^j,
+ * with j in [1,l] We obtain a GLWE encryption of Delta.m (with Delta the
+ * scaling factor) under key s2 instead of s1, with an increased noise
+ *
+ */
+template <typename Torus>
+__global__ void
+keyswitch(Torus *lwe_array_out, Torus *lwe_output_indexes, Torus *lwe_array_in,
+          Torus *lwe_input_indexes, Torus *ksk, uint32_t lwe_dimension_in,
+          uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
+          int lwe_lower, int lwe_upper, int cutoff) {
+  int tid = threadIdx.x;
+
+  extern __shared__ int8_t sharedmem[];
+
+  Torus *local_lwe_array_out = (Torus *)sharedmem;
+
+  auto block_lwe_array_in = get_chunk(
+      lwe_array_in, lwe_input_indexes[blockIdx.x], lwe_dimension_in + 1);
+  auto block_lwe_array_out = get_chunk(
+      lwe_array_out, lwe_output_indexes[blockIdx.x], lwe_dimension_out + 1);
+
+  auto gadget = GadgetMatrixSingle<Torus>(base_log, level_count);
+
+  int lwe_part_per_thd;
+  if (tid < cutoff) {
+    lwe_part_per_thd = lwe_upper;
+  } else {
+    lwe_part_per_thd = lwe_lower;
+  }
+  __syncthreads();
+
+  for (int k = 0; k < lwe_part_per_thd; k++) {
+    int idx = tid + k * blockDim.x;
+    local_lwe_array_out[idx] = 0;
+  }
+  __syncthreads();
+
+  if (tid == 0) {
+    local_lwe_array_out[lwe_dimension_out] =
+        block_lwe_array_in[lwe_dimension_in];
+  }
+
+  for (int i = 0; i < lwe_dimension_in; i++) {
+
+    __syncthreads();
+
+    Torus a_i =
+        round_to_closest_multiple(block_lwe_array_in[i], base_log, level_count);
+
+    Torus state = a_i >> (sizeof(Torus) * 8 - base_log * level_count);
+    Torus mask_mod_b = (1ll << base_log) - 1ll;
+
+    for (int j = 0; j < level_count; j++) {
+      auto ksk_block = get_ith_block(ksk, i, j, lwe_dimension_out, level_count);
+      Torus decomposed = decompose_one<Torus>(state, mask_mod_b, base_log);
+      for (int k = 0; k < lwe_part_per_thd; k++) {
+        int idx = tid + k * blockDim.x;
+        local_lwe_array_out[idx] -= (Torus)ksk_block[idx] * decomposed;
+      }
+    }
+  }
+
+  for (int k = 0; k < lwe_part_per_thd; k++) {
+    int idx = tid + k * blockDim.x;
+    block_lwe_array_out[idx] = local_lwe_array_out[idx];
+  }
+}
+
+/// assume lwe_array_in in the gpu
+template <typename Torus>
+__host__ void cuda_keyswitch_lwe_ciphertext_vector(
+    cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
+    Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *ksk,
+    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
+    uint32_t level_count, uint32_t num_samples) {
+
+  cudaSetDevice(stream->gpu_index);
+  constexpr int ideal_threads = 128;
+
+  int lwe_dim = lwe_dimension_out + 1;
+  int lwe_lower, lwe_upper, cutoff;
+  if (lwe_dim % ideal_threads == 0) {
+    lwe_lower = lwe_dim / ideal_threads;
+    lwe_upper = lwe_dim / ideal_threads;
+    cutoff = 0;
+  } else {
+    int y =
+        ceil((double)lwe_dim / (double)ideal_threads) * ideal_threads - lwe_dim;
+    cutoff = ideal_threads - y;
+    lwe_lower = lwe_dim / ideal_threads;
+    lwe_upper = (int)ceil((double)lwe_dim / (double)ideal_threads);
+  }
+
+  int lwe_size_after = (lwe_dimension_out + 1) * num_samples;
+
+  int shared_mem = sizeof(Torus) * (lwe_dimension_out + 1);
+
+  cuda_memset_async(lwe_array_out, 0, sizeof(Torus) * lwe_size_after, stream);
+  check_cuda_error(cudaGetLastError());
+
+  dim3 grid(num_samples, 1, 1);
+  dim3 threads(ideal_threads, 1, 1);
+
+  //    cudaFuncSetAttribute(keyswitch<Torus>,
+  //                         cudaFuncAttributeMaxDynamicSharedMemorySize,
+  //                         shared_mem);
+
+  keyswitch<<<grid, threads, shared_mem, stream->stream>>>(
+      lwe_array_out, lwe_output_indexes, lwe_array_in, lwe_input_indexes, ksk,
+      lwe_dimension_in, lwe_dimension_out, base_log, level_count, lwe_lower,
+      lwe_upper, cutoff);
+  check_cuda_error(cudaGetLastError());
+}
+
+#endif
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
@@ -0,0 +1,74 @@
+#ifndef CNCRT_TORUS_CUH
+#define CNCRT_TORUS_CUH
+
+#include "types/int128.cuh"
+#include <limits>
+
+template <typename T>
+__device__ inline void typecast_double_to_torus(double x, T &r) {
+  r = T(x);
+}
+
+template <>
+__device__ inline void typecast_double_to_torus<uint32_t>(double x,
+                                                          uint32_t &r) {
+  r = __double2uint_rn(x);
+}
+
+template <>
+__device__ inline void typecast_double_to_torus<uint64_t>(double x,
+                                                          uint64_t &r) {
+  // The ull intrinsic does not behave in the same way on all architectures and
+  // on some platforms this causes the cmux tree test to fail
+  // Hence the intrinsic is not used here
+  uint128 nnnn = make_uint128_from_float(x);
+  uint64_t lll = nnnn.lo_;
+  r = lll;
+}
+
+template <typename T>
+__device__ inline T round_to_closest_multiple(T x, uint32_t base_log,
+                                              uint32_t level_count) {
+  T shift = sizeof(T) * 8 - level_count * base_log;
+  T mask = 1ll << (shift - 1);
+  T b = (x & mask) >> (shift - 1);
+  T res = x >> shift;
+  res += b;
+  res <<= shift;
+  return res;
+}
+
+template <typename T>
+__device__ __forceinline__ void rescale_torus_element(T element, T &output,
+                                                      uint32_t log_shift) {
+  output =
+      round((double)element / (double(std::numeric_limits<T>::max()) + 1.0) *
+            (double)log_shift);
+}
+
+template <typename T>
+__device__ __forceinline__ T rescale_torus_element(T element,
+                                                   uint32_t log_shift) {
+  return round((double)element / (double(std::numeric_limits<T>::max()) + 1.0) *
+               (double)log_shift);
+}
+
+template <>
+__device__ __forceinline__ void
+rescale_torus_element<uint32_t>(uint32_t element, uint32_t &output,
+                                uint32_t log_shift) {
+  output =
+      round(__uint2double_rn(element) /
+            (__uint2double_rn(std::numeric_limits<uint32_t>::max()) + 1.0) *
+            __uint2double_rn(log_shift));
+}
+
+template <>
+__device__ __forceinline__ void
+rescale_torus_element<uint64_t>(uint64_t element, uint64_t &output,
+                                uint32_t log_shift) {
+  output = round(__ull2double_rn(element) /
+                 (__ull2double_rn(std::numeric_limits<uint64_t>::max()) + 1.0) *
+                 __uint2double_rn(log_shift));
+}
+#endif // CNCRT_TORUS_H
--- a/backends/tfhe-cuda-backend/cuda/src/device.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/device.cu
@@ -0,0 +1,230 @@
+#include "device.h"
+#include <cstdint>
+#include <cuda_runtime.h>
+
+/// Unsafe function to create a CUDA stream, must check first that GPU exists
+cuda_stream_t *cuda_create_stream(uint32_t gpu_index) {
+  check_cuda_error(cudaSetDevice(gpu_index));
+  cuda_stream_t *stream = new cuda_stream_t(gpu_index);
+  return stream;
+}
+
+/// Unsafe function to destroy CUDA stream, must check first the GPU exists
+void cuda_destroy_stream(cuda_stream_t *stream) { stream->release(); }
+
+/// Unsafe function that will try to allocate even if gpu_index is invalid
+/// or if there's not enough memory. A safe wrapper around it must call
+/// cuda_check_valid_malloc() first
+void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
+  check_cuda_error(cudaSetDevice(gpu_index));
+  void *ptr;
+  check_cuda_error(cudaMalloc((void **)&ptr, size));
+
+  return ptr;
+}
+
+/// Allocates a size-byte array at the device memory. Tries to do it
+/// asynchronously.
+void *cuda_malloc_async(uint64_t size, cuda_stream_t *stream) {
+  check_cuda_error(cudaSetDevice(stream->gpu_index));
+  void *ptr;
+
+#ifndef CUDART_VERSION
+#error CUDART_VERSION Undefined!
+#elif (CUDART_VERSION >= 11020)
+  int support_async_alloc;
+  check_cuda_error(cudaDeviceGetAttribute(&support_async_alloc,
+                                          cudaDevAttrMemoryPoolsSupported,
+                                          stream->gpu_index));
+
+  if (support_async_alloc) {
+    check_cuda_error(cudaMallocAsync((void **)&ptr, size, stream->stream));
+  } else {
+    check_cuda_error(cudaMalloc((void **)&ptr, size));
+  }
+#else
+  check_cuda_error(cudaMalloc((void **)&ptr, size));
+#endif
+  return ptr;
+}
+
+/// Check that allocation is valid
+void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
+  check_cuda_error(cudaSetDevice(gpu_index));
+  size_t total_mem, free_mem;
+  check_cuda_error(cudaMemGetInfo(&free_mem, &total_mem));
+  if (size > free_mem) {
+    PANIC("Cuda error: not enough memory on device. "
+          "Available: %zu vs Requested: %lu",
+          free_mem, size)
+  }
+}
+
+/// Returns
+///  false if Cooperative Groups is not supported.
+///  true otherwise
+bool cuda_check_support_cooperative_groups() {
+  int cooperative_groups_supported = 0;
+  check_cuda_error(cudaDeviceGetAttribute(&cooperative_groups_supported,
+                                          cudaDevAttrCooperativeLaunch, 0));
+
+  return cooperative_groups_supported > 0;
+}
+
+/// Copy memory to the GPU asynchronously
+void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
+                              cuda_stream_t *stream) {
+  if (size == 0)
+    return;
+  cudaPointerAttributes attr;
+  check_cuda_error(cudaPointerGetAttributes(&attr, dest));
+  if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
+    PANIC("Cuda error: invalid device pointer in async copy to GPU.")
+  }
+
+  check_cuda_error(cudaSetDevice(stream->gpu_index));
+  check_cuda_error(
+      cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream->stream));
+}
+
+/// Copy memory within a GPU asynchronously
+void cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size,
+                                  cuda_stream_t *stream) {
+  if (size == 0)
+    return;
+  cudaPointerAttributes attr_dest;
+  check_cuda_error(cudaPointerGetAttributes(&attr_dest, dest));
+  if (attr_dest.device != stream->gpu_index &&
+      attr_dest.type != cudaMemoryTypeDevice) {
+    PANIC("Cuda error: invalid dest device pointer in copy from GPU to GPU.")
+  }
+  cudaPointerAttributes attr_src;
+  check_cuda_error(cudaPointerGetAttributes(&attr_src, src));
+  if (attr_src.device != stream->gpu_index &&
+      attr_src.type != cudaMemoryTypeDevice) {
+    PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
+  }
+  if (attr_src.device != attr_dest.device) {
+    PANIC("Cuda error: different devices specified in copy from GPU to GPU.")
+  }
+
+  check_cuda_error(cudaSetDevice(stream->gpu_index));
+  check_cuda_error(cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice,
+                                   stream->stream));
+}
+
+/// Synchronizes device
+void cuda_synchronize_device(uint32_t gpu_index) {
+  check_cuda_error(cudaSetDevice(gpu_index));
+  check_cuda_error(cudaDeviceSynchronize());
+}
+
+void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
+                       cuda_stream_t *stream) {
+  if (size == 0)
+    return;
+  cudaPointerAttributes attr;
+  check_cuda_error(cudaPointerGetAttributes(&attr, dest));
+  if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
+    PANIC("Cuda error: invalid dest device pointer in cuda memset.")
+  }
+  check_cuda_error(cudaSetDevice(stream->gpu_index));
+  check_cuda_error(cudaMemsetAsync(dest, val, size, stream->stream));
+}
+
+template <typename Torus>
+__global__ void cuda_set_value_kernel(Torus *array, Torus value, Torus n) {
+  int index = threadIdx.x + blockIdx.x * blockDim.x;
+  if (index < n)
+    array[index] = value;
+}
+
+template <typename Torus>
+void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value,
+                          Torus n) {
+  cudaPointerAttributes attr;
+  check_cuda_error(cudaPointerGetAttributes(&attr, d_array));
+  if (attr.type != cudaMemoryTypeDevice) {
+    PANIC("Cuda error: invalid dest device pointer in cuda set value.")
+  }
+  int block_size = 256;
+  int num_blocks = (n + block_size - 1) / block_size;
+
+  // Launch the kernel
+  cuda_set_value_kernel<<<num_blocks, block_size, 0, *stream>>>(d_array, value,
+                                                                n);
+  check_cuda_error(cudaGetLastError());
+}
+
+/// Explicitly instantiate cuda_set_value_async for 32 and 64 bits
+template void cuda_set_value_async(cudaStream_t *stream, uint64_t *d_array,
+                                   uint64_t value, uint64_t n);
+template void cuda_set_value_async(cudaStream_t *stream, uint32_t *d_array,
+                                   uint32_t value, uint32_t n);
+
+/// Copy memory to the CPU asynchronously
+void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
+                              cuda_stream_t *stream) {
+  if (size == 0)
+    return;
+  cudaPointerAttributes attr;
+  check_cuda_error(cudaPointerGetAttributes(&attr, src));
+  if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
+    PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
+  }
+
+  check_cuda_error(cudaSetDevice(stream->gpu_index));
+  check_cuda_error(
+      cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream->stream));
+}
+
+/// Return number of GPUs available
+int cuda_get_number_of_gpus() {
+  int num_gpus;
+  check_cuda_error(cudaGetDeviceCount(&num_gpus));
+  return num_gpus;
+}
+
+/// Drop a cuda array
+void cuda_drop(void *ptr, uint32_t gpu_index) {
+  check_cuda_error(cudaSetDevice(gpu_index));
+  check_cuda_error(cudaFree(ptr));
+}
+
+/// Drop a cuda array asynchronously, if supported on the device
+void cuda_drop_async(void *ptr, cuda_stream_t *stream) {
+
+  check_cuda_error(cudaSetDevice(stream->gpu_index));
+#ifndef CUDART_VERSION
+#error CUDART_VERSION Undefined!
+#elif (CUDART_VERSION >= 11020)
+  int support_async_alloc;
+  check_cuda_error(cudaDeviceGetAttribute(&support_async_alloc,
+                                          cudaDevAttrMemoryPoolsSupported,
+                                          stream->gpu_index));
+
+  if (support_async_alloc) {
+    check_cuda_error(cudaFreeAsync(ptr, stream->stream));
+  } else {
+    check_cuda_error(cudaFree(ptr));
+  }
+#else
+  check_cuda_error(cudaFree(ptr));
+#endif
+}
+
+/// Get the maximum size for the shared memory
+int cuda_get_max_shared_memory(uint32_t gpu_index) {
+  check_cuda_error(cudaSetDevice(gpu_index));
+  cudaDeviceProp prop;
+  check_cuda_error(cudaGetDeviceProperties(&prop, gpu_index));
+  int max_shared_memory = 0;
+  if (prop.major >= 6) {
+    max_shared_memory = prop.sharedMemPerMultiprocessor;
+  } else {
+    max_shared_memory = prop.sharedMemPerBlock;
+  }
+  return max_shared_memory;
+}
+
+void cuda_synchronize_stream(cuda_stream_t *stream) { stream->synchronize(); }
--- a/backends/tfhe-cuda-backend/cuda/src/fft/bnsmfft.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/fft/bnsmfft.cuh
@@ -0,0 +1,725 @@
+#ifndef GPU_BOOTSTRAP_FFT_CUH
+#define GPU_BOOTSTRAP_FFT_CUH
+
+#include "polynomial/functions.cuh"
+#include "polynomial/parameters.cuh"
+#include "twiddles.cuh"
+#include "types/complex/operations.cuh"
+
+/*
+ * Direct negacyclic FFT:
+ *   - before the FFT the N real coefficients are stored into a
+ *     N/2 sized complex with the even coefficients in the real part
+ *     and the odd coefficients in the imaginary part. This is referred to
+ *     as the half-size FFT
+ *   - when calling BNSMFFT_direct for the forward negacyclic FFT of PBS,
+ *     opt is divided by 2 because the butterfly pattern is always applied
+ *     between pairs of coefficients
+ *   - instead of twisting each coefficient A_j before the FFT by
+ *     multiplying by the w^j roots of unity (aka twiddles, w=exp(-i pi /N)),
+ *     the FFT is modified, and for each level k of the FFT the twiddle:
+ *     w_j,k = exp(-i pi j/2^k)
+ *     is replaced with:
+ *     \zeta_j,k = exp(-i pi (2j-1)/2^k)
+ */
+template <class params> __device__ void NSMFFT_direct(double2 *A) {
+
+  /* We don't make bit reverse here, since twiddles are already reversed
+   *  Each thread is always in charge of "opt/2" pairs of coefficients,
+   *  which is why we always loop through N/2 by N/opt strides
+   *  The pragma unroll instruction tells the compiler to unroll the
+   *  full loop, which should increase performance
+   */
+
+  size_t tid = threadIdx.x;
+  size_t twid_id;
+  size_t i1, i2;
+  double2 u, v, w;
+  // level 1
+  // we don't make actual complex multiplication on level1 since we have only
+  // one twiddle, it's real and image parts are equal, so we can multiply
+  // it with simpler operations
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    i1 = tid;
+    i2 = tid + params::degree / 2;
+
+    u = A[i1];
+    v = A[i2] * (double2){0.707106781186547461715008466854,
+                          0.707106781186547461715008466854};
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 2
+  // from this level there are more than one twiddles and none of them has equal
+  // real and imag parts, so complete complex multiplication is needed
+  // for each level params::degree / 2^level represents number of coefficients
+  // inside divided chunk of specific level
+  //
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 4);
+    i1 = 2 * (params::degree / 4) * twid_id + (tid & (params::degree / 4 - 1));
+    i2 = i1 + params::degree / 4;
+
+    w = negtwiddles[twid_id + 2];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 3
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 8);
+    i1 = 2 * (params::degree / 8) * twid_id + (tid & (params::degree / 8 - 1));
+    i2 = i1 + params::degree / 8;
+
+    w = negtwiddles[twid_id + 4];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 4
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 16);
+    i1 =
+        2 * (params::degree / 16) * twid_id + (tid & (params::degree / 16 - 1));
+    i2 = i1 + params::degree / 16;
+
+    w = negtwiddles[twid_id + 8];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 5
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 32);
+    i1 =
+        2 * (params::degree / 32) * twid_id + (tid & (params::degree / 32 - 1));
+    i2 = i1 + params::degree / 32;
+
+    w = negtwiddles[twid_id + 16];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 6
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 64);
+    i1 =
+        2 * (params::degree / 64) * twid_id + (tid & (params::degree / 64 - 1));
+    i2 = i1 + params::degree / 64;
+
+    w = negtwiddles[twid_id + 32];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 7
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 128);
+    i1 = 2 * (params::degree / 128) * twid_id +
+         (tid & (params::degree / 128 - 1));
+    i2 = i1 + params::degree / 128;
+
+    w = negtwiddles[twid_id + 64];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // from level 8, we need to check size of params degree, because we support
+  // minimum actual polynomial size = 256,  when compressed size is halfed and
+  // minimum supported compressed size is 128, so we always need first 7
+  // levels of butterfy operation, since butterfly levels are hardcoded
+  // we need to check if polynomial size is big enough to require specific level
+  // of butterfly.
+  if constexpr (params::degree >= 256) {
+    // level 8
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 256);
+      i1 = 2 * (params::degree / 256) * twid_id +
+           (tid & (params::degree / 256 - 1));
+      i2 = i1 + params::degree / 256;
+
+      w = negtwiddles[twid_id + 128];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 512) {
+    // level 9
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 512);
+      i1 = 2 * (params::degree / 512) * twid_id +
+           (tid & (params::degree / 512 - 1));
+      i2 = i1 + params::degree / 512;
+
+      w = negtwiddles[twid_id + 256];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 1024) {
+    // level 10
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 1024);
+      i1 = 2 * (params::degree / 1024) * twid_id +
+           (tid & (params::degree / 1024 - 1));
+      i2 = i1 + params::degree / 1024;
+
+      w = negtwiddles[twid_id + 512];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 2048) {
+    // level 11
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 2048);
+      i1 = 2 * (params::degree / 2048) * twid_id +
+           (tid & (params::degree / 2048 - 1));
+      i2 = i1 + params::degree / 2048;
+
+      w = negtwiddles[twid_id + 1024];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 4096) {
+    // level 12
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 4096);
+      i1 = 2 * (params::degree / 4096) * twid_id +
+           (tid & (params::degree / 4096 - 1));
+      i2 = i1 + params::degree / 4096;
+
+      w = negtwiddles[twid_id + 2048];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  // compressed size = 8192 is actual polynomial size = 16384.
+  // from this size, twiddles can't fit in constant memory,
+  // so from here, butterfly operation access device memory.
+  if constexpr (params::degree >= 8192) {
+    // level 13
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 8192);
+      i1 = 2 * (params::degree / 8192) * twid_id +
+           (tid & (params::degree / 8192 - 1));
+      i2 = i1 + params::degree / 8192;
+
+      w = negtwiddles13[twid_id];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+}
+
+/*
+ * negacyclic inverse fft
+ */
+template <class params> __device__ void NSMFFT_inverse(double2 *A) {
+
+  /* We don't make bit reverse here, since twiddles are already reversed
+   *  Each thread is always in charge of "opt/2" pairs of coefficients,
+   *  which is why we always loop through N/2 by N/opt strides
+   *  The pragma unroll instruction tells the compiler to unroll the
+   *  full loop, which should increase performance
+   */
+
+  size_t tid = threadIdx.x;
+  size_t twid_id;
+  size_t i1, i2;
+  double2 u, w;
+
+  // divide input by compressed polynomial size
+  tid = threadIdx.x;
+  for (size_t i = 0; i < params::opt; ++i) {
+    A[tid] /= params::degree;
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // none of the twiddles have equal real and imag part, so
+  // complete complex multiplication has to be done
+  // here we have more than one twiddle
+  // mapping in backward fft is reversed
+  // butterfly operation is started from last level
+
+  // compressed size = 8192 is actual polynomial size = 16384.
+  // twiddles for this size can't fit in constant memory so
+  // butterfly operation for this level acess device memory to fetch
+  // twiddles
+  if constexpr (params::degree >= 8192) {
+    // level 13
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 8192);
+      i1 = 2 * (params::degree / 8192) * twid_id +
+           (tid & (params::degree / 8192 - 1));
+      i2 = i1 + params::degree / 8192;
+
+      w = negtwiddles13[twid_id];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 4096) {
+    // level 12
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 4096);
+      i1 = 2 * (params::degree / 4096) * twid_id +
+           (tid & (params::degree / 4096 - 1));
+      i2 = i1 + params::degree / 4096;
+
+      w = negtwiddles[twid_id + 2048];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 2048) {
+    // level 11
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 2048);
+      i1 = 2 * (params::degree / 2048) * twid_id +
+           (tid & (params::degree / 2048 - 1));
+      i2 = i1 + params::degree / 2048;
+
+      w = negtwiddles[twid_id + 1024];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 1024) {
+    // level 10
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 1024);
+      i1 = 2 * (params::degree / 1024) * twid_id +
+           (tid & (params::degree / 1024 - 1));
+      i2 = i1 + params::degree / 1024;
+
+      w = negtwiddles[twid_id + 512];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 512) {
+    // level 9
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 512);
+      i1 = 2 * (params::degree / 512) * twid_id +
+           (tid & (params::degree / 512 - 1));
+      i2 = i1 + params::degree / 512;
+
+      w = negtwiddles[twid_id + 256];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 256) {
+    // level 8
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 256);
+      i1 = 2 * (params::degree / 256) * twid_id +
+           (tid & (params::degree / 256 - 1));
+      i2 = i1 + params::degree / 256;
+
+      w = negtwiddles[twid_id + 128];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  // below level 8, we don't need to check size of params degree, because we
+  // support minimum actual polynomial size = 256,  when compressed size is
+  // halfed and minimum supported compressed size is 128, so we always need
+  // last 7 levels of butterfy operation, since butterfly levels are hardcoded
+  // we don't need to check if polynomial size is big enough to require
+  // specific level of butterfly.
+  // level 7
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 128);
+    i1 = 2 * (params::degree / 128) * twid_id +
+         (tid & (params::degree / 128 - 1));
+    i2 = i1 + params::degree / 128;
+
+    w = negtwiddles[twid_id + 64];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 6
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 64);
+    i1 =
+        2 * (params::degree / 64) * twid_id + (tid & (params::degree / 64 - 1));
+    i2 = i1 + params::degree / 64;
+
+    w = negtwiddles[twid_id + 32];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 5
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 32);
+    i1 =
+        2 * (params::degree / 32) * twid_id + (tid & (params::degree / 32 - 1));
+    i2 = i1 + params::degree / 32;
+
+    w = negtwiddles[twid_id + 16];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 4
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 16);
+    i1 =
+        2 * (params::degree / 16) * twid_id + (tid & (params::degree / 16 - 1));
+    i2 = i1 + params::degree / 16;
+
+    w = negtwiddles[twid_id + 8];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 3
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 8);
+    i1 = 2 * (params::degree / 8) * twid_id + (tid & (params::degree / 8 - 1));
+    i2 = i1 + params::degree / 8;
+
+    w = negtwiddles[twid_id + 4];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 2
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 4);
+    i1 = 2 * (params::degree / 4) * twid_id + (tid & (params::degree / 4 - 1));
+    i2 = i1 + params::degree / 4;
+
+    w = negtwiddles[twid_id + 2];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 1
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 2);
+    i1 = 2 * (params::degree / 2) * twid_id + (tid & (params::degree / 2 - 1));
+    i2 = i1 + params::degree / 2;
+
+    w = negtwiddles[twid_id + 1];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+}
+
+/*
+ * global batch fft
+ * does fft in half size
+ * unrolling half size fft result in half size + 1 elements
+ * this function must be called with actual degree
+ * function takes as input already compressed input
+ */
+template <class params, sharedMemDegree SMD>
+__global__ void batch_NSMFFT(double2 *d_input, double2 *d_output,
+                             double2 *buffer) {
+  extern __shared__ double2 sharedMemoryFFT[];
+  double2 *fft = (SMD == NOSM) ? &buffer[blockIdx.x * params::degree / 2]
+                               : sharedMemoryFFT;
+  int tid = threadIdx.x;
+
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    fft[tid] = d_input[blockIdx.x * (params::degree / 2) + tid];
+    tid = tid + params::degree / params::opt;
+  }
+  __syncthreads();
+  NSMFFT_direct<HalfDegree<params>>(fft);
+  __syncthreads();
+
+  tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    d_output[blockIdx.x * (params::degree / 2) + tid] = fft[tid];
+    tid = tid + params::degree / params::opt;
+  }
+}
+
+/*
+ * global batch polynomial multiplication
+ * only used for fft tests
+ * d_input1 and d_output must not have the same pointer
+ * d_input1 can be modified inside the function
+ */
+template <class params, sharedMemDegree SMD>
+__global__ void batch_polynomial_mul(double2 *d_input1, double2 *d_input2,
+                                     double2 *d_output, double2 *buffer) {
+  extern __shared__ double2 sharedMemoryFFT[];
+  double2 *fft = (SMD == NOSM) ? &buffer[blockIdx.x * params::degree / 2]
+                               : sharedMemoryFFT;
+
+  // Move first polynomial into shared memory(if possible otherwise it will
+  // be moved in device buffer)
+  int tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    fft[tid] = d_input1[blockIdx.x * (params::degree / 2) + tid];
+    tid = tid + params::degree / params::opt;
+  }
+
+  // Perform direct negacyclic fourier transform
+  __syncthreads();
+  NSMFFT_direct<HalfDegree<params>>(fft);
+  __syncthreads();
+
+  // Put the result of direct fft inside input1
+  tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    d_input1[blockIdx.x * (params::degree / 2) + tid] = fft[tid];
+    tid = tid + params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // Move first polynomial into shared memory(if possible otherwise it will
+  // be moved in device buffer)
+  tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    fft[tid] = d_input2[blockIdx.x * (params::degree / 2) + tid];
+    tid = tid + params::degree / params::opt;
+  }
+
+  // Perform direct negacyclic fourier transform on the second polynomial
+  __syncthreads();
+  NSMFFT_direct<HalfDegree<params>>(fft);
+  __syncthreads();
+
+  // calculate pointwise multiplication inside fft buffer
+  tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    fft[tid] *= d_input1[blockIdx.x * (params::degree / 2) + tid];
+    tid = tid + params::degree / params::opt;
+  }
+
+  // Perform backward negacyclic fourier transform
+  __syncthreads();
+  NSMFFT_inverse<HalfDegree<params>>(fft);
+  __syncthreads();
+
+  // copy results in output buffer
+  tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    d_output[blockIdx.x * (params::degree / 2) + tid] = fft[tid];
+    tid = tid + params::degree / params::opt;
+  }
+}
+
+#endif // GPU_BOOTSTRAP_FFT_CUH
--- a/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu
--- a/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cuh
@@ -0,0 +1,13 @@
+#ifndef GPU_BOOTSTRAP_TWIDDLES_CUH
+#define GPU_BOOTSTRAP_TWIDDLES_CUH
+
+/*
+ * 'negtwiddles' are stored in constant memory for faster access times
+ * because of it's limitied size, only twiddles for up to 2^12 polynomial size
+ * can be stored there, twiddles for 2^13 are stored in device memory
+ * 'negtwiddles13'
+ */
+
+extern __constant__ double2 negtwiddles[4096];
+extern __device__ double2 negtwiddles13[4096];
+#endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
@@ -0,0 +1,51 @@
+#include "integer/bitwise_ops.cuh"
+
+void scratch_cuda_integer_radix_bitop_kb_64(
+    cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t big_lwe_dimension,
+    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
+    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
+    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
+    uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
+    bool allocate_gpu_memory) {
+
+  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
+                          big_lwe_dimension, small_lwe_dimension, ks_level,
+                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
+                          message_modulus, carry_modulus);
+
+  scratch_cuda_integer_radix_bitop_kb<uint64_t>(
+      stream, (int_bitop_buffer<uint64_t> **)mem_ptr, lwe_ciphertext_count,
+      params, op_type, allocate_gpu_memory);
+}
+
+void cuda_bitop_integer_radix_ciphertext_kb_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_1,
+    void *lwe_array_2, int8_t *mem_ptr, void *bsk, void *ksk,
+    uint32_t lwe_ciphertext_count) {
+
+  host_integer_radix_bitop_kb<uint64_t>(
+      stream, static_cast<uint64_t *>(lwe_array_out),
+      static_cast<uint64_t *>(lwe_array_1),
+      static_cast<uint64_t *>(lwe_array_2),
+      (int_bitop_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
+      lwe_ciphertext_count);
+}
+
+void cuda_bitnot_integer_radix_ciphertext_kb_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
+    int8_t *mem_ptr, void *bsk, void *ksk, uint32_t lwe_ciphertext_count) {
+
+  host_integer_radix_bitnot_kb<uint64_t>(
+      stream, static_cast<uint64_t *>(lwe_array_out),
+      static_cast<uint64_t *>(lwe_array_in),
+      (int_bitop_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
+      lwe_ciphertext_count);
+}
+
+void cleanup_cuda_integer_bitop(cuda_stream_t *stream, int8_t **mem_ptr_void) {
+
+  int_bitop_buffer<uint64_t> *mem_ptr =
+      (int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
+  mem_ptr->release(stream);
+}
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
@@ -0,0 +1,52 @@
+#ifndef CUDA_INTEGER_BITWISE_OPS_CUH
+#define CUDA_INTEGER_BITWISE_OPS_CUH
+
+#include "crypto/keyswitch.cuh"
+#include "device.h"
+#include "integer.cuh"
+#include "integer.h"
+#include "pbs/bootstrap_low_latency.cuh"
+#include "pbs/bootstrap_multibit.cuh"
+#include "polynomial/functions.cuh"
+#include "utils/kernel_dimensions.cuh"
+#include <omp.h>
+
+template <typename Torus>
+__host__ void
+host_integer_radix_bitop_kb(cuda_stream_t *stream, Torus *lwe_array_out,
+                            Torus *lwe_array_1, Torus *lwe_array_2,
+                            int_bitop_buffer<Torus> *mem_ptr, void *bsk,
+                            Torus *ksk, uint32_t num_radix_blocks) {
+
+  auto lut = mem_ptr->lut;
+
+  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+      stream, lwe_array_out, lwe_array_1, lwe_array_2, bsk, ksk,
+      num_radix_blocks, lut);
+}
+
+template <typename Torus>
+__host__ void
+host_integer_radix_bitnot_kb(cuda_stream_t *stream, Torus *lwe_array_out,
+                             Torus *lwe_array_in,
+                             int_bitop_buffer<Torus> *mem_ptr, void *bsk,
+                             Torus *ksk, uint32_t num_radix_blocks) {
+
+  auto lut = mem_ptr->lut;
+
+  integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      stream, lwe_array_out, lwe_array_in, bsk, ksk, num_radix_blocks, lut);
+}
+
+template <typename Torus>
+__host__ void scratch_cuda_integer_radix_bitop_kb(
+    cuda_stream_t *stream, int_bitop_buffer<Torus> **mem_ptr,
+    uint32_t num_radix_blocks, int_radix_params params, BITOP_TYPE op,
+    bool allocate_gpu_memory) {
+
+  cudaSetDevice(stream->gpu_index);
+  *mem_ptr = new int_bitop_buffer<Torus>(stream, op, params, num_radix_blocks,
+                                         allocate_gpu_memory);
+}
+
+#endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
@@ -0,0 +1,45 @@
+#include "integer/cmux.cuh"
+
+void scratch_cuda_integer_radix_cmux_kb_64(
+    cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t big_lwe_dimension,
+    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
+    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
+    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
+    uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
+
+  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
+                          big_lwe_dimension, small_lwe_dimension, ks_level,
+                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
+                          message_modulus, carry_modulus);
+
+  std::function<uint64_t(uint64_t)> predicate_lut_f =
+      [](uint64_t x) -> uint64_t { return x == 1; };
+
+  scratch_cuda_integer_radix_cmux_kb(
+      stream, (int_cmux_buffer<uint64_t> **)mem_ptr, predicate_lut_f,
+      lwe_ciphertext_count, params, allocate_gpu_memory);
+}
+
+void cuda_cmux_integer_radix_ciphertext_kb_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_condition,
+    void *lwe_array_true, void *lwe_array_false, int8_t *mem_ptr, void *bsk,
+    void *ksk, uint32_t lwe_ciphertext_count) {
+
+  host_integer_radix_cmux_kb<uint64_t>(
+      stream, static_cast<uint64_t *>(lwe_array_out),
+      static_cast<uint64_t *>(lwe_condition),
+      static_cast<uint64_t *>(lwe_array_true),
+      static_cast<uint64_t *>(lwe_array_false),
+      (int_cmux_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
+
+      lwe_ciphertext_count);
+}
+
+void cleanup_cuda_integer_radix_cmux(cuda_stream_t *stream,
+                                     int8_t **mem_ptr_void) {
+
+  int_cmux_buffer<uint64_t> *mem_ptr =
+      (int_cmux_buffer<uint64_t> *)(*mem_ptr_void);
+  mem_ptr->release(stream);
+}
--- a/Show More
+++ b/Show More