GITBOOK-5: Update TOC

GITBOOK-4: V2 design details
GITBOOK-3: correct a typo
2026-01-11 15:48:20 -05:00 · 2024-03-05 14:32:28 +00:00 · 2024-02-28 15:27:05 +00:00 · 2024-02-28 14:54:38 +00:00 · 2024-02-28 14:23:50 +00:00 · 2024-02-28 14:11:06 +00:00
835 changed files with 157512 additions and 31114 deletions
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -5,13 +5,3 @@ failure-output = "final"
 fail-fast = false
 retries = 0
 slow-timeout = "5m"
-
-
-[[profile.ci.overrides]]
-filter = 'test(/^.*param_message_1_carry_[567]$/) or test(/^.*param_message_4_carry_4$/)'
-retries = 3
-
-[[profile.ci.overrides]]
-filter = 'test(/^.*param_message_[23]_carry_[23]$/)'
-retries = 1
-
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -0,0 +1,13 @@
+<!-- Feel free to delete the template if the PR (bumping a version e.g.) does not fit the template -->
+closes: _please link all relevant issues_
+
+### PR content/description
+
+### Check-list:
+
+* [ ] Tests for the changes have been added (for bug fixes / features)
+* [ ] Docs have been added / updated (for bug fixes / features)
+* [ ] Relevant issues are marked as resolved/closed, related issues are linked in the description
+* [ ] Check for breaking changes (including serialization changes) and add them to commit message following the conventional commit [specification][conventional-breaking]
+
+[conventional-breaking]: https://www.conventionalcommits.org/en/v1.0.0/#commit-message-with-description-and-breaking-change-footer
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -0,0 +1,127 @@
+# Run a small subset of shortint and integer tests to ensure quick feedback.
+name: Fast AWS Tests on CPU
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
+        type: string
+
+jobs:
+  fast-tests:
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
+      cancel-in-progress: true
+    runs-on: ${{ inputs.runner_name }}
+    steps:
+      # Step used for log purpose.
+      - name: Instance configuration used
+        run: |
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
+
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: stable
+
+      - name: Run concrete-csprng tests
+        run: |
+          make test_concrete_csprng
+
+      - name: Run core tests
+        run: |
+          AVX512_SUPPORT=ON make test_core_crypto
+
+      - name: Run boolean tests
+        run: |
+          make test_boolean
+
+      - name: Run user docs tests
+        run: |
+          make test_user_doc
+
+      - name: Run js on wasm API tests
+        run: |
+          make test_nodejs_wasm_api_in_docker
+
+      - name: Gen Keys if required
+        run: |
+          make gen_key_cache
+
+      - name: Run shortint tests
+        run: |
+          BIG_TESTS_INSTANCE=TRUE FAST_TESTS=TRUE make test_shortint_ci
+
+      - name: Run integer tests
+        run: |
+          BIG_TESTS_INSTANCE=TRUE FAST_TESTS=TRUE make test_integer_ci
+
+      - name: Run shortint multi-bit tests
+        run: |
+          BIG_TESTS_INSTANCE=TRUE FAST_TESTS=TRUE make test_shortint_multi_bit_ci
+
+      - name: Run integer multi-bit tests
+        run: |
+          BIG_TESTS_INSTANCE=TRUE FAST_TESTS=TRUE make test_integer_multi_bit_ci
+
+      - name: Run high-level API tests
+        run: |
+          make test_high_level_api
+
+      - name: Run safe deserialization tests
+        run: |
+          make test_safe_deserialization
+
+      - name: Slack Notification
+        if: ${{ always() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/aws_tfhe_gpu_tests.yml
+++ b/.github/workflows/aws_tfhe_gpu_tests.yml
@@ -0,0 +1,112 @@
+# Compile and test Concrete-cuda on an AWS instance
+name: Concrete Cuda - Full tests
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
+        type: string
+
+jobs:
+  run-cuda-tests-linux:
+    concurrency:
+      group: tfhe_cuda_backend_test-${{ github.ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    name: Test code in EC2
+    runs-on: ${{ inputs.runner_name }}
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.2"
+            gcc: 9
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+
+    steps:
+      # Step used for log purpose.
+      - name: Instance configuration used
+        run: |
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
+
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: stable
+
+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Run clippy checks
+        run: |
+          make clippy_gpu
+
+      - name: Run all tests
+        run: |
+          make test_gpu
+
+      - name: Run user docs tests
+        run: |
+          make test_user_doc_gpu
+
+      - name: Test C API
+        run: |
+          make test_c_api_gpu
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -1,9 +1,10 @@
-name: AWS Integer Tests on CPU
+name: AWS Unsigned Integer Tests on CPU

 env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -23,51 +24,67 @@ on:
        description: "Action runner name"
        type: string
      request_id:
-        description: 'Slab request ID'
+        description: "Slab request ID"
        type: string
-      matrix_item:
-        description: 'Build matrix item'
+      fork_repo:
+        description: "Name of forked repo as user/repo"
+        type: string
+      fork_git_sha:
+        description: "Git SHA to checkout from fork"
        type: string

 jobs:
  integer-tests:
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}_${{ github.event.inputs.instance_image_id }}_${{ github.event.inputs.instance_type }}
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
      cancel-in-progress: true
-    runs-on: ${{ github.event.inputs.runner_name }}
+    runs-on: ${{ inputs.runner_name }}
    steps:
      # Step used for log purpose.
      - name: Instance configuration used
        run: |
-          echo "ID: ${{ github.event.inputs.instance_id }}"
-          echo "AMI: ${{ github.event.inputs.instance_image_id }}"
-          echo "Type: ${{ github.event.inputs.instance_type }}"
-          echo "Request ID: ${{ github.event.inputs.request_id }}"
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"

-      - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}

      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: stable
-          default: true
+
+      - name: Gen Keys if required
+        run: |
+          make GEN_KEY_CACHE_MULTI_BIT_ONLY=TRUE gen_key_cache
+
+      - name: Run unsigned integer multi-bit tests
+        run: |
+          AVX512_SUPPORT=ON make test_unsigned_integer_multi_bit_ci

      - name: Gen Keys if required
        run: |
          make gen_key_cache

-      - name: Run integer tests
+      - name: Run unsigned integer tests
        run: |
-          BIG_TESTS_INSTANCE=TRUE make test_integer_ci
+          AVX512_SUPPORT=ON BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_ci

      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -0,0 +1,98 @@
+name: AWS Signed Integer Tests on CPU
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      fork_repo:
+        description: "Name of forked repo as user/repo"
+        type: string
+      fork_git_sha:
+        description: "Git SHA to checkout from fork"
+        type: string
+
+jobs:
+  multi-bit-tests:
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
+      cancel-in-progress: true
+    runs-on: ${{ inputs.runner_name }}
+    steps:
+      # Step used for log purpose.
+      - name: Instance configuration used
+        run: |
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
+
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: stable
+
+      - name: Gen Keys if required
+        run: |
+          make GEN_KEY_CACHE_MULTI_BIT_ONLY=TRUE gen_key_cache
+
+      - name: Run shortint multi-bit tests
+        run: |
+          make test_shortint_multi_bit_ci
+
+      - name: Run signed integer multi-bit tests
+        run: |
+          AVX512_SUPPORT=ON make test_signed_integer_multi_bit_ci
+
+      - name: Gen Keys if required
+        run: |
+          make gen_key_cache
+
+      - name: Run signed integer tests
+        run: |
+          AVX512_SUPPORT=ON BIG_TESTS_INSTANCE=TRUE make test_signed_integer_ci
+
+      - name: Slack Notification
+        if: ${{ always() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Shortint tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -4,6 +4,7 @@ env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -25,36 +26,48 @@ on:
      request_id:
        description: 'Slab request ID'
        type: string
-      matrix_item:
-        description: 'Build matrix item'
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
        type: string

 jobs:
  shortint-tests:
    concurrency:
-      group: ${{ github.workflow }}_${{ github.ref }}_${{ github.event.inputs.instance_image_id }}_${{ github.event.inputs.instance_type }}
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
      cancel-in-progress: true
-    runs-on: ${{ github.event.inputs.runner_name }}
+    runs-on: ${{ inputs.runner_name }}
    steps:
      # Step used for log purpose.
      - name: Instance configuration used
        run: |
-          echo "ID: ${{ github.event.inputs.instance_id }}"
-          echo "AMI: ${{ github.event.inputs.instance_image_id }}"
-          echo "Type: ${{ github.event.inputs.instance_type }}"
-          echo "Request ID: ${{ github.event.inputs.request_id }}"
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"

-      - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}

      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install latest stable
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: stable
-          default: true
+
+      - name: Run concrete-csprng tests
+        run: |
+          make test_concrete_csprng

      - name: Run core tests
        run: |
@@ -72,10 +85,6 @@ jobs:
        run: |
          make test_user_doc

-      - name: Run js on wasm API tests
-        run: |
-          make test_nodejs_wasm_api_in_docker
-
      - name: Gen Keys if required
        run: |
          make gen_key_cache
@@ -88,10 +97,20 @@ jobs:
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_high_level_api

+      - name: Run example tests
+        run: |
+          make test_examples
+          make dark_market
+
+      - name: Run apps tests
+        run: |
+          make test_trivium
+          make test_kreyvium
+
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -0,0 +1,87 @@
+name: AWS WASM Tests on CPU
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
+        type: string
+
+jobs:
+  wasm-tests:
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
+      cancel-in-progress: true
+    runs-on: ${{ inputs.runner_name }}
+    steps:
+      # Step used for log purpose.
+      - name: Instance configuration used
+        run: |
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
+
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: stable
+
+      - name: Run js on wasm API tests
+        run: |
+          make test_nodejs_wasm_api_in_docker
+
+      - name: Run parallel wasm tests
+        run: |
+          make install_node
+          make ci_test_web_js_api_parallel
+
+      - name: Slack Notification
+        if: ${{ always() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/boolean_benchmark.yml
+++ b/.github/workflows/boolean_benchmark.yml
@@ -5,24 +5,34 @@ on:
  workflow_dispatch:
    inputs:
      instance_id:
-        description: 'Instance ID'
+        description: "Instance ID"
        type: string
      instance_image_id:
-        description: 'Instance AMI ID'
+        description: "Instance AMI ID"
        type: string
      instance_type:
-        description: 'Instance product type'
+        description: "Instance product type"
        type: string
      runner_name:
-        description: 'Action runner name'
+        description: "Action runner name"
        type: string
      request_id:
-        description: 'Slab request ID'
+        description: "Slab request ID"
        type: string
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"

 env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"

 jobs:
  run-boolean-benchmarks:
@@ -42,7 +52,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -52,14 +62,13 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: nightly
-          override: true

-      - name: Run benchmarks
+      - name: Run benchmarks with AVX512
        run: |
-          make bench_boolean
+          make AVX512_SUPPORT=ON bench_boolean

      - name: Parse results
        run: |
@@ -73,24 +82,8 @@ jobs:
          --commit-date "${COMMIT_DATE}" \
          --bench-date "${{ env.BENCH_DATE }}" \
          --walk-subdirs \
-          --throughput
-
-      - name: Remove previous raw results
-        run: |
-          rm -rf target/criterion
-
-      - name: Run benchmarks with AVX512
-        run: |
-          make AVX512_SUPPORT=ON bench_boolean
-
-      - name: Parse AVX512 results
-        run: |
-          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
-          --hardware ${{ inputs.instance_type }} \
          --name-suffix avx512 \
-          --walk-subdirs \
-          --throughput \
-          --append-results
+          --throughput

      - name: Measure key sizes
        run: |
@@ -101,15 +94,15 @@ jobs:
          python3 ./ci/benchmark_parser.py tfhe/boolean_key_sizes.csv ${{ env.RESULTS_FILENAME }} \
          --key-sizes \
          --append-results
-  
+
      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
        with:
          name: ${{ github.sha }}_boolean
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
@@ -117,8 +110,6 @@ jobs:

      - name: Send data to Slab
        shell: bash
-        env:
-          COMPRESSED_RESULTS : ${{ env.RESULTS_FILENAME }}.gz
        run: |
          echo "Computing HMac on results file"
          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
@@ -130,3 +121,15 @@ jobs:
          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
          -d @${{ env.RESULTS_FILENAME }} \
          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Boolean benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/cargo_build.yml
+++ b/.github/workflows/cargo_build.yml
@@ -6,6 +6,7 @@ on:
 env:
  CARGO_TERM_COLOR: always
  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"

 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref }}
@@ -17,16 +18,30 @@ jobs:

    strategy:
      matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-latest-large, windows-latest]
      fail-fast: false

    steps:
-      - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+
+      - name: Install and run newline linter checks
+        if: matrix.os == 'ubuntu-latest'
+        run: |
+          wget https://github.com/fernandrone/linelint/releases/download/0.0.6/linelint-linux-amd64
+          echo "16b70fb7b471d6f95cbdc0b4e5dc2b0ac9e84ba9ecdc488f7bdf13df823aca4b linelint-linux-amd64" > checksum
+          sha256sum -c checksum || exit 1
+          chmod +x linelint-linux-amd64
+          mv linelint-linux-amd64 /usr/local/bin/linelint
+          make check_newline

      - name: Run pcc checks
        run: |
          make pcc

+      - name: Build concrete-csprng
+        run: |
+          make build_concrete_csprng
+
      - name: Build Release core
        run: |
          make build_core AVX512_SUPPORT=ON
--- a/.github/workflows/check_commit.yml
+++ b/.github/workflows/check_commit.yml
@@ -10,7 +10,7 @@ jobs:
      - name: Check first line
        uses: gsactions/commit-message-checker@16fa2d5de096ae0d35626443bcd24f1e756cafee
        with:
-          pattern: '^((feat|fix|chore|refactor|style|test|docs|doc)\(\w+\)\:) .+$'
+          pattern: '^((feat|fix|chore|refactor|style|test|docs|doc)(\(\w+\))?\:) .+$'
          flags: "gs"
          error: 'Your first line has to contain a commit type and scope like "feat(my_feature): msg".'
          excludeDescription: "true" # optional: this excludes the description body of a pull request
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -0,0 +1,119 @@
+name: Code Coverage
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
+        type: string
+
+jobs:
+  code-coverage:
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
+      cancel-in-progress: true
+    runs-on: ${{ inputs.runner_name }}
+    timeout-minutes: 1080
+    steps:
+      # Step used for log purpose.
+      - name: Instance configuration used
+        run: |
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
+
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: stable
+
+      - name: Check for file changes
+        id: changed-files
+        uses: tj-actions/changed-files@90a06d6ba9543371ab4df8eeca0be07ca6054959
+        with:
+          files_yaml: |
+            tfhe:
+              - tfhe/src/**
+            concrete_csprng:
+              - concrete-csprng/src/**
+
+      - name: Generate Keys
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
+        run: |
+          make GEN_KEY_CACHE_COVERAGE_ONLY=TRUE gen_key_cache
+          make gen_key_cache_core_crypto
+
+      - name: Run coverage for core_crypto
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
+        run: |
+          make test_core_crypto_cov AVX512_SUPPORT=ON
+
+      - name: Run coverage for boolean
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
+        run: |
+          make test_boolean_cov
+
+      - name: Run coverage for shortint
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
+        run: |
+          make test_shortint_cov
+
+      - name: Upload tfhe coverage to Codecov
+        uses: codecov/codecov-action@4fe8c5f003fae66aa5ebb77cfd3e7bfbbda0b6b0
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          directory: ./coverage/
+          fail_ci_if_error: true
+          files: shortint/cobertura.xml,boolean/cobertura.xml,core_crypto/cobertura.xml,core_crypto_avx512/cobertura.xml
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Code coverage finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/csprng_randomness_testing.yml
+++ b/.github/workflows/csprng_randomness_testing.yml
@@ -0,0 +1,74 @@
+name: CSPRNG randomness testing Workflow
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
+        type: string
+
+jobs:
+  csprng-randomness-teting:
+    name: CSPRNG randomness testing
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
+      cancel-in-progress: true
+    runs-on: ${{ inputs.runner_name }}
+
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: stable
+
+      - name: Dieharder randomness test suite
+        run: |
+          make dieharder_csprng
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "concrete-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_benchmark.yml
+++ b/.github/workflows/integer_benchmark.yml
@@ -5,24 +5,27 @@ on:
  workflow_dispatch:
    inputs:
      instance_id:
-        description: 'Instance ID'
+        description: "Instance ID"
        type: string
      instance_image_id:
-        description: 'Instance AMI ID'
+        description: "Instance AMI ID"
        type: string
      instance_type:
-        description: 'Instance product type'
+        description: "Instance product type"
        type: string
      runner_name:
-        description: 'Action runner name'
+        description: "Action runner name"
        type: string
      request_id:
-        description: 'Slab request ID'
+        description: "Slab request ID"
        type: string

 env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"

 jobs:
  run-integer-benchmarks:
@@ -42,7 +45,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -52,14 +55,24 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: nightly
-          override: true

-      - name: Run benchmarks
+      - name: Run benchmarks with AVX512
        run: |
-          make bench_integer
+          make AVX512_SUPPORT=ON FAST_BENCH=TRUE bench_integer
+
+      - name: Parse benchmarks to csv
+        run: |
+          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
+            parse_integer_benches
+
+      - name: Upload csv results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_csv_integer
+          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}

      - name: Parse results
        run: |
@@ -73,33 +86,17 @@ jobs:
          --commit-date "${COMMIT_DATE}" \
          --bench-date "${{ env.BENCH_DATE }}" \
          --walk-subdirs \
+          --name-suffix avx512 \
          --throughput

-      - name: Remove previous raw results
-        run: |
-          rm -rf target/criterion
-
-      - name: Run benchmarks with AVX512
-        run: |
-          make AVX512_SUPPORT=ON bench_integer
-
-      - name: Parse AVX512 results
-        run: |
-          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
-          --hardware ${{ inputs.instance_type }} \
-          --walk-subdirs \
-          --name-suffix avx512 \
-          --throughput \
-          --append-results
-
      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
        with:
          name: ${{ github.sha }}_integer
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
@@ -118,3 +115,15 @@ jobs:
          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
          -d @${{ env.RESULTS_FILENAME }} \
          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_full_benchmark.yml
+++ b/.github/workflows/integer_full_benchmark.yml
@@ -0,0 +1,155 @@
+# Run all integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: Integer full benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  prepare-matrix:
+    name: Prepare operations matrix
+    runs-on: ubuntu-latest
+    outputs:
+      op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
+    steps:
+      - name: Weekly benchmarks
+        if: ${{ github.event.inputs.user_inputs == 'weekly_benchmarks' }}
+        run: |
+          echo "OP_FLAVOR=[\"default\"]" >> ${GITHUB_ENV}
+
+      - name: Quarterly benchmarks
+        if: ${{ github.event.inputs.user_inputs == 'quarterly_benchmarks' }}
+        run: |
+          echo "OP_FLAVOR=[\"default\", \"smart\", \"unchecked\", \"misc\"]" >> ${GITHUB_ENV}
+
+      -  name: Set operation flavor output
+         id: set_op_flavor
+         run: |
+          echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> ${GITHUB_OUTPUT}
+
+  integer-benchmarks:
+    name: Execute integer benchmarks for all operations flavor
+    needs: prepare-matrix
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    continue-on-error: true
+    timeout-minutes: 1440  # 24 hours
+    strategy:
+      max-parallel: 1
+      matrix:
+        command: [ integer, integer_multi_bit]
+        op_flavor: ${{ fromJson(needs.prepare-matrix.outputs.op_flavor) }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Get benchmark details
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make AVX512_SUPPORT=ON BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${{ env.COMMIT_HASH }}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${{ env.COMMIT_DATE }}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+  slack-notification:
+    name: Slack Notification
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ failure() }}
+    needs: integer-benchmarks
+    steps:
+      - name: Notify
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Integer full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_gpu_benchmark.yml
+++ b/.github/workflows/integer_gpu_benchmark.yml
@@ -0,0 +1,157 @@
+# Run integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
+name: Integer GPU benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  run-integer-benchmarks:
+    name: Execute integer benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.2"
+            gcc: 9
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make AVX512_SUPPORT=ON FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_gpu
+
+      - name: Parse benchmarks to csv
+        run: |
+          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
+            parse_integer_benches
+
+      - name: Upload csv results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_csv_integer
+          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --backend gpu \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_integer
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Integer GPU benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_gpu_full_benchmark.yml
+++ b/.github/workflows/integer_gpu_full_benchmark.yml
@@ -0,0 +1,162 @@
+# Run all integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
+name: Integer GPU full benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  integer-benchmarks:
+    name: Execute integer benchmarks for all operations flavor
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    continue-on-error: true
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+      matrix:
+        command: [ integer, integer_multi_bit]
+        op_flavor: [ default, unchecked ]
+        # explicit include-based build matrix, of known valid options
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.2"
+            gcc: 9
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Get benchmark details
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make AVX512_SUPPORT=ON BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --backend gpu \
+          --project-version "${{ env.COMMIT_HASH }}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${{ env.COMMIT_DATE }}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+  slack-notification:
+    name: Slack Notification
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ failure() }}
+    needs: integer-benchmarks
+    steps:
+      - name: Notify
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Integer GPU full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_multi_bit_benchmark.yml
+++ b/.github/workflows/integer_multi_bit_benchmark.yml
@@ -0,0 +1,129 @@
+# Run integer benchmarks with multi-bit cryptographic parameters on an AWS instance and return parsed results to Slab CI bot.
+name: Integer Multi-bit benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  run-integer-benchmarks:
+    name: Execute integer multi-bit benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Run multi-bit benchmarks with AVX512
+        run: |
+          make AVX512_SUPPORT=ON FAST_BENCH=TRUE bench_integer_multi_bit
+
+      - name: Parse benchmarks to csv
+        run: |
+          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
+            parse_integer_benches
+
+      - name: Upload csv results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_csv_integer
+          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_integer
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_multi_bit_gpu_benchmark.yml
+++ b/.github/workflows/integer_multi_bit_gpu_benchmark.yml
@@ -0,0 +1,158 @@
+# Run integer benchmarks with multi-bit cryptographic parameters on an AWS instance and return parsed results to Slab CI bot.
+name: Integer Multi-bit benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  run-integer-benchmarks:
+    name: Execute integer multi-bit benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "11.8"
+            cuda_arch: "70"
+            gcc: 9
+    env:
+      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Run multi-bit benchmarks with AVX512
+        run: |
+          make AVX512_SUPPORT=ON FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_multi_bit_gpu
+
+      - name: Parse benchmarks to csv
+        run: |
+          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
+            parse_integer_benches
+
+      - name: Upload csv results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_csv_integer
+          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --backend gpu \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_integer
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Integer GPU benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/m1_tests.yml
+++ b/.github/workflows/m1_tests.yml
@@ -4,11 +4,19 @@ on:
  workflow_dispatch:
  pull_request:
    types: [labeled]
+  # Have a nightly build for M1 tests
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    # At 22:00 every day
+    # Timezone is UTC, so Paris time is +2 during the summer and +1 during winter
+    - cron: "0 22 * * *"

 env:
  CARGO_TERM_COLOR: always
  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  FAST_TESTS: "TRUE"

 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref }}
@@ -16,22 +24,25 @@ concurrency:

 jobs:
  cargo-builds:
-    if: "github.event_name != 'pull_request' || contains(github.event.label.name, 'm1_test')"
+    if: ${{ (github.event_name == 'schedule' &&  github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'm1_test') }}
    runs-on: ["self-hosted", "m1mac"]

    steps:
-      - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11

      - name: Install latest stable
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: stable
-          default: true

      - name: Run pcc checks
        run: |
          make pcc

+      - name: Build concrete-csprng
+        run: |
+          make build_concrete_csprng
+
      - name: Build Release core
        run: |
          make build_core
@@ -56,6 +67,10 @@ jobs:
        run: |
          make build_c_api

+      - name: Run concrete-csprng tests
+        run: |
+          make test_concrete_csprng
+
      - name: Run core tests
        run: |
          make test_core_crypto
@@ -87,6 +102,18 @@ jobs:
        run: |
          make test_integer_ci

+      - name: Gen Keys if required
+        run: |
+          make GEN_KEY_CACHE_MULTI_BIT_ONLY=TRUE gen_key_cache
+
+      - name: Run shortint multi bit tests
+        run: |
+          make test_shortint_multi_bit_ci
+
+      - name: Run integer multi bit tests
+        run: |
+          make test_integer_multi_bit_ci
+
  remove_label:
    name: Remove m1_test label
    runs-on: ubuntu-latest
@@ -95,6 +122,7 @@ jobs:
    if: ${{ always() }}
    steps:
      - uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
+        if: ${{ github.event_name == 'pull_request' }}
        with:
          labels: m1_test
          github_token: ${{ secrets.GITHUB_TOKEN }}
@@ -102,7 +130,7 @@ jobs:
      - name: Slack Notification
        if: ${{ needs.cargo-builds.result != 'skipped' }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ needs.cargo-builds.result }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release.yml
+++ b/.github/workflows/make_release.yml
@@ -0,0 +1,84 @@
+# Publish new release of tfhe-rs on various platform.
+name: Publish release
+
+on:
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: "Dry-run"
+        type: boolean
+        default: true
+      push_to_crates:
+        description: "Push to crate"
+        type: boolean
+        default: true
+      push_web_package:
+        description: "Push web js package"
+        type: boolean
+        default: true
+      push_node_package:
+        description: "Push node js package"
+        type: boolean
+        default: true
+
+env:
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  publish_release:
+    name: Publish Release
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Publish crate.io package
+        if: ${{ inputs.push_to_crates }}
+        env:
+          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
+        run: |
+          cargo publish -p tfhe --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+
+      - name: Build web package
+        if: ${{ inputs.push_web_package }}
+        run: |
+          make build_web_js_api
+
+      - name: Publish web package
+        if: ${{ inputs.push_web_package }}
+        uses: JS-DevTools/npm-publish@4b07b26a2f6e0a51846e1870223e545bae91c552
+        with:
+          token: ${{ secrets.NPM_TOKEN }}
+          package: tfhe/pkg/package.json
+          dry-run: ${{ inputs.dry_run }}
+
+      - name: Build Node package
+        if: ${{ inputs.push_node_package }}
+        run: |
+          rm -rf tfhe/pkg
+
+          make build_node_js_api
+          sed -i 's/"tfhe"/"node-tfhe"/g' tfhe/pkg/package.json
+
+      - name: Publish Node package
+        if: ${{ inputs.push_node_package }}
+        uses: JS-DevTools/npm-publish@4b07b26a2f6e0a51846e1870223e545bae91c552
+        with:
+          token: ${{ secrets.NPM_TOKEN }}
+          package: tfhe/pkg/package.json
+          dry-run: ${{ inputs.dry_run }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "tfhe release failed: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_concrete_csprng.yml
+++ b/.github/workflows/make_release_concrete_csprng.yml
@@ -0,0 +1,42 @@
+# Publish new release of tfhe-rs on various platform.
+name: Publish concrete-csprng release
+
+on:
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: "Dry-run"
+        type: boolean
+        default: true
+
+env:
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  publish_release:
+    name: Publish concrete-csprng Release
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Publish crate.io package
+        env:
+          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
+        run: |
+          cargo publish -p concrete-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "concrete-csprng release failed: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -0,0 +1,51 @@
+# Perform a security check on all the cryptographic parameters set
+name: Parameters curves security check
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+
+on:
+  push:
+    branches:
+      - "main"
+  workflow_dispatch:
+
+jobs:
+  params-curves-security-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+
+      - name: Checkout lattice-estimator
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: malb/lattice-estimator
+          path: lattice_estimator
+
+      - name: Install Sage
+        run: |
+          sudo apt update
+          sudo apt install -y sagemath
+
+      - name: Collect parameters
+        run: |
+          CARGO_PROFILE=devo make write_params_to_file
+
+      - name: Perform security check
+        run: |
+          PYTHONPATH=lattice_estimator sage ci/lattice_estimator.sage
+
+      - name: Slack Notification
+        if: ${{ always() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Security check for parameters finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/pbs_benchmark.yml
+++ b/.github/workflows/pbs_benchmark.yml
@@ -5,25 +5,34 @@ on:
  workflow_dispatch:
    inputs:
      instance_id:
-        description: 'Instance ID'
+        description: "Instance ID"
        type: string
      instance_image_id:
-        description: 'Instance AMI ID'
+        description: "Instance AMI ID"
        type: string
      instance_type:
-        description: 'Instance product type'
+        description: "Instance product type"
        type: string
      runner_name:
-        description: 'Action runner name'
+        description: "Action runner name"
        type: string
      request_id:
-        description: 'Slab request ID'
+        description: "Slab request ID"
        type: string
-
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"

 env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"

 jobs:
  run-pbs-benchmarks:
@@ -43,7 +52,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -53,10 +62,9 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: nightly
-          override: true

      - name: Run benchmarks with AVX512
        run: |
@@ -78,13 +86,13 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
        with:
          name: ${{ github.sha }}_pbs
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
@@ -103,3 +111,15 @@ jobs:
          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
          -d @${{ env.RESULTS_FILENAME }} \
          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "PBS benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/pbs_gpu_benchmark.yml
+++ b/.github/workflows/pbs_gpu_benchmark.yml
@@ -0,0 +1,142 @@
+# Run PBS benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
+name: PBS GPU benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  run-pbs-benchmarks:
+    name: Execute PBS benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Export CUDA variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+      # Specify the correct host compilers
+      - name: Export gcc and g++ variables
+        if: ${{ !cancelled() }}
+        run: |
+          echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> "${GITHUB_ENV}"
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make AVX512_SUPPORT=ON bench_pbs_gpu
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --backend gpu \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --name-suffix avx512 \
+          --walk-subdirs \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_pbs
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on downloaded artifact"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "PBS GPU benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/placeholder_workflow.yml
+++ b/.github/workflows/placeholder_workflow.yml
@@ -0,0 +1,14 @@
+# Placeholder workflow file allowing running it without having to merge to main first
+name: Placeholder Workflow
+
+on:
+  workflow_dispatch:
+
+jobs:
+  placeholder:
+    name: Placeholder
+    runs-on: ubuntu-latest
+
+    steps:
+      - run: |
+          echo "Hello this is a Placeholder Workflow"
--- a/.github/workflows/shortint_benchmark.yml
+++ b/.github/workflows/shortint_benchmark.yml
@@ -5,25 +5,26 @@ on:
  workflow_dispatch:
    inputs:
      instance_id:
-        description: 'Instance ID'
+        description: "Instance ID"
        type: string
      instance_image_id:
-        description: 'Instance AMI ID'
+        description: "Instance AMI ID"
        type: string
      instance_type:
-        description: 'Instance product type'
+        description: "Instance product type"
        type: string
      runner_name:
-        description: 'Action runner name'
+        description: "Action runner name"
        type: string
      request_id:
-        description: 'Slab request ID'
+        description: "Slab request ID"
        type: string

-
 env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"

 jobs:
  run-shortint-benchmarks:
@@ -43,7 +44,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0

@@ -53,14 +54,13 @@ jobs:
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

      - name: Install rust
-        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
        with:
          toolchain: nightly
-          override: true

-      - name: Run benchmarks
+      - name: Run benchmarks with AVX512
        run: |
-          make bench_shortint
+          make AVX512_SUPPORT=ON bench_shortint

      - name: Parse results
        run: |
@@ -74,24 +74,8 @@ jobs:
          --commit-date "${COMMIT_DATE}" \
          --bench-date "${{ env.BENCH_DATE }}" \
          --walk-subdirs \
-          --throughput
-
-      - name: Remove previous raw results
-        run: |
-          rm -rf target/criterion
-
-      - name: Run benchmarks with AVX512
-        run: |
-          make AVX512_SUPPORT=ON bench_shortint
-
-      - name: Parse AVX512 results
-        run: |
-          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
-          --hardware ${{ inputs.instance_type }} \
-          --walk-subdirs \
          --name-suffix avx512 \
-          --throughput \
-          --append-results
+          --throughput

      - name: Measure key sizes
        run: |
@@ -104,13 +88,13 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
        with:
          name: ${{ github.sha }}_shortint
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
@@ -129,3 +113,15 @@ jobs:
          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
          -d @${{ env.RESULTS_FILENAME }} \
          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Shortint benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/shortint_full_benchmark.yml
+++ b/.github/workflows/shortint_full_benchmark.yml
@@ -0,0 +1,149 @@
+# Run all shortint benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: Shortint full benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  shortint-benchmarks:
+    name: Execute shortint benchmarks for all operations flavor
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    strategy:
+      max-parallel: 1
+      matrix:
+        op_flavor: [ default, smart, unchecked ]
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Get benchmark details
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make AVX512_SUPPORT=ON BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_shortint
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      # This small benchmark needs to be executed only once.
+      - name: Measure key sizes
+        if: matrix.op_flavor == 'default'
+        run: |
+          make measure_shortint_key_sizes
+
+      - name: Parse key sizes results
+        if: matrix.op_flavor == 'default'
+        run: |
+          python3 ./ci/benchmark_parser.py tfhe/shortint_key_sizes.csv ${{ env.RESULTS_FILENAME }} \
+          --key-sizes \
+          --append-results
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+  slack-notification:
+    name: Slack Notification
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ failure() }}
+    needs: shortint-benchmarks
+    steps:
+      - name: Notify
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Shortint full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/signed_integer_benchmark.yml
+++ b/.github/workflows/signed_integer_benchmark.yml
@@ -0,0 +1,129 @@
+# Run signed integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: Signed Integer benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  run-integer-benchmarks:
+    name: Execute signed integer benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make AVX512_SUPPORT=ON FAST_BENCH=TRUE bench_signed_integer
+
+      - name: Parse benchmarks to csv
+        run: |
+          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
+            parse_integer_benches
+
+      - name: Upload csv results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_csv_integer
+          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_integer
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Signed integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/signed_integer_full_benchmark.yml
+++ b/.github/workflows/signed_integer_full_benchmark.yml
@@ -0,0 +1,133 @@
+# Run all signed integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: Signed Integer full benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  integer-benchmarks:
+    name: Execute signed integer benchmarks for all operations flavor
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    continue-on-error: true
+    timeout-minutes: 1440  # 24 hours
+    strategy:
+      max-parallel: 1
+      matrix:
+        command: [ integer, integer_multi_bit ]
+        op_flavor: [ default, unchecked ]
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Get benchmark details
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+          echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})" >> "${GITHUB_ENV}"
+          echo "COMMIT_HASH=$(git describe --tags --dirty)" >> "${GITHUB_ENV}"
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make AVX512_SUPPORT=ON BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_signed_${{ matrix.command }}
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${{ env.COMMIT_HASH }}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${{ env.COMMIT_DATE }}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+  slack-notification:
+    name: Slack Notification
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ failure() }}
+    needs: integer-benchmarks
+    steps:
+      - name: Notify
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Signed integer full benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/signed_integer_multi_bit_benchmark.yml
+++ b/.github/workflows/signed_integer_multi_bit_benchmark.yml
@@ -0,0 +1,129 @@
+# Run signed integer benchmarks with multi-bit cryptographic parameters on an AWS instance and return parsed results to Slab CI bot.
+name: Signed Integer Multi-bit benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  run-integer-benchmarks:
+    name: Execute signed integer multi-bit benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Run multi-bit benchmarks with AVX512
+        run: |
+          make AVX512_SUPPORT=ON FAST_BENCH=TRUE bench_signed_integer_multi_bit
+
+      - name: Parse benchmarks to csv
+        run: |
+          make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
+            parse_integer_benches
+
+      - name: Upload csv results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_csv_integer
+          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
+
+      - name: Parse results
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --throughput
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_integer
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Signed integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/start_benchmarks.yml
+++ b/.github/workflows/start_benchmarks.yml
@@ -4,24 +4,123 @@ name: Start all benchmarks
 on:
  push:
    branches:
-      - 'main'
+      - "main"
  workflow_dispatch:
+    inputs:
+      # The input name must be the name of the slab command to launch
+      boolean_bench:
+        description: "Run Boolean benches"
+        type: boolean
+        default: true
+      shortint_bench:
+        description: "Run shortint benches"
+        type: boolean
+        default: true
+      integer_bench:
+        description: "Run integer benches"
+        type: boolean
+        default: true
+      signed_integer_bench:
+        description: "Run signed integer benches"
+        type: boolean
+        default: true
+      integer_multi_bit_bench:
+        description: "Run integer multi bit benches"
+        type: boolean
+        default: true
+      signed_integer_multi_bit_bench:
+        description: "Run signed integer multi bit benches"
+        type: boolean
+        default: true
+      pbs_bench:
+        description: "Run PBS benches"
+        type: boolean
+        default: true
+      pbs_gpu_bench:
+        description: "Run PBS benches on GPU"
+        type: boolean
+        default: true
+      wasm_client_bench:
+        description: "Run WASM client benches"
+        type: boolean
+        default: true

 jobs:
  start-benchmarks:
+    if: ${{ (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' }}
    strategy:
      matrix:
-        command: [boolean_bench, shortint_bench, integer_bench, pbs_bench]
+        command: [ boolean_bench, shortint_bench,
+                   integer_bench, integer_multi_bit_bench,
+                   signed_integer_bench, signed_integer_multi_bit_bench,
+                   integer_gpu_bench, integer_multi_bit_gpu_bench,
+                   pbs_bench, pbs_gpu_bench, wasm_client_bench ]
    runs-on: ubuntu-latest
    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Check for file changes
+        id: changed-files
+        uses: tj-actions/changed-files@90a06d6ba9543371ab4df8eeca0be07ca6054959
+        with:
+          files_yaml: |
+            common_benches:
+              - toolchain.txt
+              - Makefile
+              - ci/slab.toml
+              - tfhe/Cargo.toml
+              - tfhe/src/core_crypto/**
+              - .github/workflows/start_benchmarks.yml
+            boolean_bench:
+              - tfhe/src/boolean/**
+              - tfhe/benches/boolean/**
+              - .github/workflows/boolean_benchmark.yml
+            shortint_bench:
+              - tfhe/src/shortint/**
+              - tfhe/benches/shortint/**
+              - .github/workflows/shortint_benchmark.yml
+            integer_bench:
+              - tfhe/src/shortint/**
+              - tfhe/src/integer/**
+              - tfhe/benches/integer/bench.rs
+              - .github/workflows/integer_benchmark.yml
+            integer_multi_bit_bench:
+              - tfhe/src/shortint/**
+              - tfhe/src/integer/**
+              - tfhe/benches/integer/bench.rs
+              - .github/workflows/integer_multi_bit_benchmark.yml
+            signed_integer_bench:
+              - tfhe/src/shortint/**
+              - tfhe/src/integer/**
+              - tfhe/benches/integer/signed_bench.rs
+              - .github/workflows/signed_integer_benchmark.yml
+            signed_integer_multi_bit_bench:
+              - tfhe/src/shortint/**
+              - tfhe/src/integer/**
+              - tfhe/benches/integer/signed_bench.rs
+              - .github/workflows/signed_integer_multi_bit_benchmark.yml
+            pbs_bench:
+              - tfhe/src/core_crypto/**
+              - tfhe/benches/core_crypto/**
+              - .github/workflows/pbs_benchmark.yml
+            wasm_client_bench:
+              - tfhe/web_wasm_parallel_tests/**
+              - .github/workflows/wasm_client_benchmark.yml
+
      - name: Checkout Slab repo
-        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          repository: zama-ai/slab
          path: slab
          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}

      - name: Start AWS job in Slab
+        # If manually triggered check that the current bench has been requested
+        # Otherwise if it's on push check that files relevant to benchmarks have changed
+        if: (github.event_name == 'workflow_dispatch' && github.event.inputs[matrix.command] == 'true') || (github.event_name == 'push' && (steps.changed-files.outputs.common_benches_any_changed == 'true' || steps.changed-files.outputs[format('{0}_any_changed', matrix.command)] == 'true'))
        shell: bash
        run: |
          echo -n '{"command": "${{ matrix.command }}", "git_ref": "${{ github.ref }}", "sha": "${{ github.sha }}"}' > command.json
--- a/.github/workflows/start_full_benchmarks.yml
+++ b/.github/workflows/start_full_benchmarks.yml
@@ -0,0 +1,66 @@
+# Start all benchmark jobs, including full shortint and integer, on Slab CI bot.
+name: Start full suite benchmarks
+
+on:
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 1a.m.
+    - cron: '0 1 * * 6'
+    # Quarterly benchmarks will be triggered right before end of quarter, the 25th of the current month at 4a.m.
+    # These benchmarks are far longer to execute hence the reason to run them only four time a year.
+    - cron: '0 4 25 MAR,JUN,SEP,DEC *'
+  workflow_dispatch:
+    inputs:
+      benchmark_type:
+        description: 'Benchmark type'
+        required: true
+        default: 'weekly'
+        type: choice
+        options:
+          - weekly
+          - quarterly
+
+jobs:
+  start-benchmarks:
+    if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' }}
+    strategy:
+      matrix:
+        command: [ boolean_bench, shortint_full_bench,
+                   integer_full_bench, signed_integer_full_bench, integer_gpu_full_bench,
+                   pbs_bench, pbs_gpu_bench, wasm_client_bench ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Set benchmarks type as weekly
+        if: (github.event_name == 'workflow_dispatch' && inputs.benchmark_type == 'weekly') || github.event.schedule == '0 1 * * 6'
+        run: |
+          echo "BENCH_TYPE=weekly_benchmarks" >> "${GITHUB_ENV}"
+
+      - name: Set benchmarks type as quarterly
+        if: (github.event_name == 'workflow_dispatch' && inputs.benchmark_type == 'quarterly') || github.event.schedule == '0 4 25 MAR,JUN,SEP,DEC *'
+        run: |
+          echo "BENCH_TYPE=quarterly_benchmarks" >> "${GITHUB_ENV}"
+
+      - name: Start AWS job in Slab
+        shell: bash
+        run: |
+          echo -n '{"command": "${{ matrix.command }}", "git_ref": "${{ github.ref }}", "sha": "${{ github.sha }}", "user_inputs": "${{ env.BENCH_TYPE }}"}' > command.json
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh command.json '${{ secrets.JOB_SECRET }}')"
+          curl -v -k \
+          --fail-with-body \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: start_aws" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @command.json \
+          ${{ secrets.SLAB_URL }}
--- a/.github/workflows/sync_on_push.yml
+++ b/.github/workflows/sync_on_push.yml
@@ -13,11 +13,11 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repo
-        uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
        with:
          fetch-depth: 0
      - name: Save repo
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
        with:
          name: repo-archive
          path: '.'
--- a/.github/workflows/trigger_aws_tests_on_pr.yml
+++ b/.github/workflows/trigger_aws_tests_on_pr.yml
@@ -3,16 +3,53 @@ name: PR AWS build trigger

 on:
  pull_request:
+  pull_request_review:
+    types: [submitted]

 jobs:
-  test:
+  trigger-tests:
    runs-on: ubuntu-latest
    permissions:
      pull-requests: write
    steps:
-      - uses: mshick/add-pr-comment@a65df5f64fc741e91c59b8359a4bc56e57aaf5b1
+      - name: Get current labels
+        uses: snnaplab/get-labels-action@f426df40304808ace3b5282d4f036515f7609576
+
+      - name: Remove approved label
+        if: ${{ github.event_name == 'pull_request' && contains(fromJSON(env.LABELS), 'approved') }}
+        uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          labels: approved
+
+      - name: Launch fast tests
+        if: ${{ github.event_name == 'pull_request' }}
+        uses: mshick/add-pr-comment@a65df5f64fc741e91c59b8359a4bc56e57aaf5b1
        with:
          allow-repeats: true
          message: |
+            @slab-ci cpu_fast_test
+            @slab-ci gpu_test
+
+      - name: Add approved label
+        uses: actions-ecosystem/action-add-labels@18f1af5e3544586314bbe15c0273249c770b2daf
+        if: ${{ github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && !contains(fromJSON(env.LABELS), 'approved') }}
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          labels: approved
+
+      # PR label 'approved' presence is checked to avoid running the full test suite several times
+      # in case of multiple approvals without new commits in between.
+      - name: Launch full tests suite
+        if: ${{ github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && !contains(fromJSON(env.LABELS), 'approved') }}
+        uses: mshick/add-pr-comment@a65df5f64fc741e91c59b8359a4bc56e57aaf5b1
+        with:
+          allow-repeats: true
+          message: |
+            Pull Request has been approved :tada:
+            Launching full test suite...
            @slab-ci cpu_test
-            @slab-ci cpu_integer_test
+            @slab-ci cpu_unsigned_integer_test
+            @slab-ci cpu_signed_integer_test
+            @slab-ci cpu_wasm_test
+            @slab-ci csprng_randomness_testing
--- a/.github/workflows/wasm_client_benchmark.yml
+++ b/.github/workflows/wasm_client_benchmark.yml
@@ -0,0 +1,136 @@
+# Run WASM client benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: WASM client benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: "Instance ID"
+        type: string
+      instance_image_id:
+        description: "Instance AMI ID"
+        type: string
+      instance_type:
+        description: "Instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: "Slab request ID"
+        type: string
+      # This input is not used in this workflow but still mandatory since a calling workflow could
+      # use it. If a triggering command include a user_inputs field, then the triggered workflow
+      # must include this very input, otherwise the workflow won't be called.
+      # See start_full_benchmarks.yml as example.
+      user_inputs:
+        description: "Type of benchmarks to run"
+        type: string
+        default: "weekly_benchmarks"
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+
+jobs:
+  run-wasm-client-benchmarks:
+    name: Execute WASM client benchmarks in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          fetch-depth: 0
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@be73d7920c329f220ce78e0234b8f96b7ae60248
+        with:
+          toolchain: nightly
+
+      - name: Run benchmarks
+        run: |
+          make install_node
+          make ci_bench_web_js_api_parallel
+
+      - name: Parse results
+        run: |
+          make parse_wasm_benchmarks
+
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py tfhe/wasm_pk_gen.csv ${{ env.RESULTS_FILENAME }} \
+          --database tfhe_rs \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version "${COMMIT_HASH}" \
+          --branch ${{ github.ref_name }} \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${{ env.BENCH_DATE }}" \
+          --key-gen
+
+      - name: Measure public key and ciphertext sizes in HL Api
+        run: |
+          make measure_hlapi_compact_pk_ct_sizes
+
+      - name: Parse key and ciphertext sizes results
+        run: |
+          python3 ./ci/benchmark_parser.py tfhe/hlapi_cpk_and_cctl_sizes.csv ${{ env.RESULTS_FILENAME }} \
+          --key-gen \
+          --append-results
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8
+        with:
+          name: ${{ github.sha }}_wasm
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.CONCRETE_ACTIONS_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on results file"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data_v2" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "WASM benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.gitignore
+++ b/.gitignore
@@ -3,12 +3,19 @@ target/
 .vscode/

 # Path we use for internal-keycache during tests
-./keys/
+/keys/
 # In case of symlinked keys
-./keys
+/keys

 **/Cargo.lock
 **/*.bin

 # Some of our bench outputs
 /tfhe/benchmarks_parameters
+**/*.csv
+
+# dieharder run log
+dieharder_run.log
+
+# Coverage reports
+/coverage/
--- a/.linelint.yml
+++ b/.linelint.yml
@@ -0,0 +1,14 @@
+ignore:
+  - .git
+  - target
+  - tfhe/benchmarks_parameters
+  - tfhe/web_wasm_parallel_tests/node_modules
+  - tfhe/web_wasm_parallel_tests/dist
+  - keys
+  - coverage
+
+rules:
+  # checks if file ends in a newline character
+  end-of-file:
+    enable: true
+    single-new-line: true
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,131 @@
+# Contributor Covenant Code of Conduct
+
+## Our pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+- Demonstrating empathy and kindness toward other people
+- Being respectful of differing opinions, viewpoints, and experiences
+- Giving and gracefully accepting constructive feedback
+- Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+- Focusing on what is best not just for us as individuals, but for the overall
+  community
+
+Examples of unacceptable behavior include:
+
+- The use of sexualized language or imagery, and sexual attention or advances of
+  any kind
+- Trolling, insulting or derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or email address,
+  without their explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting us anonymously through [this form](https://forms.gle/569j3cZqGRFgrR3u9).
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series of
+actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+
+### 3. Temporary ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][mozilla coc].
+
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][faq]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
+
+[faq]: https://www.contributor-covenant.org/faq
+[homepage]: https://www.contributor-covenant.org
+[mozilla coc]: https://github.com/mozilla/diversity
+[translations]: https://www.contributor-covenant.org/translations
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [workspace]
 resolver = "2"
-members = ["tfhe", "tasks"]
+members = ["tfhe", "tasks", "apps/trivium", "concrete-csprng"]

 [profile.bench]
 lto = "fat"
@@ -8,6 +8,10 @@ lto = "fat"
 [profile.release]
 lto = "fat"

+[profile.release_lto_off]
+inherits = "release"
+lto = "off"
+
 # Compiles much faster for tests and allows reasonable performance for iterating
 [profile.devo]
 inherits = "dev"
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 BSD 3-Clause Clear License

-Copyright © 2023 ZAMA.
+Copyright © 2024 ZAMA.
 All rights reserved.

 Redistribution and use in source and binary forms, with or without modification,
--- a/637
+++ b/637
@@ -3,14 +3,31 @@ OS:=$(shell uname)
 RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
 CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
 TARGET_ARCH_FEATURE:=$(shell ./scripts/get_arch_feature.sh)
-RS_BUILD_TOOLCHAIN:=$(shell \
-	( (echo $(TARGET_ARCH_FEATURE) | grep -q x86) && echo stable) || echo $(RS_CHECK_TOOLCHAIN))
+RS_BUILD_TOOLCHAIN:=stable
 CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
 CARGO_PROFILE?=release
-MIN_RUST_VERSION:=1.65
+MIN_RUST_VERSION:=$(shell grep '^rust-version[[:space:]]*=' tfhe/Cargo.toml | cut -d '=' -f 2 | xargs)
 AVX512_SUPPORT?=OFF
 WASM_RUSTFLAGS:=
 BIG_TESTS_INSTANCE?=FALSE
+GEN_KEY_CACHE_MULTI_BIT_ONLY?=FALSE
+GEN_KEY_CACHE_COVERAGE_ONLY?=FALSE
+PARSE_INTEGER_BENCH_CSV_FILE?=tfhe_rs_integer_benches.csv
+FAST_TESTS?=FALSE
+FAST_BENCH?=FALSE
+BENCH_OP_FLAVOR?=DEFAULT
+NODE_VERSION=20
+FORWARD_COMPAT?=OFF
+# sed: -n, do not print input stream, -e means a script/expression
+# 1,/version/ indicates from the first line, to the line matching version at the start of the line
+# p indicates to print, so we keep only the start of the Cargo.toml until we hit the first version
+# entry which should be the version of tfhe
+TFHE_CURRENT_VERSION:=\
+$(shell sed -n -e '1,/^version/p' tfhe/Cargo.toml | \
+grep '^version[[:space:]]*=' | cut -d '=' -f 2 | xargs)
+# Cargo has a hard time distinguishing between our package from the workspace and a package that
+# could be a dependency, so we build an unambiguous spec here
+TFHE_SPEC:=tfhe@$(TFHE_CURRENT_VERSION)
 # This is done to avoid forgetting it, we still precise the RUSTFLAGS in the commands to be able to
 # copy paste the command in the terminal and change them if required without forgetting the flags
 export RUSTFLAGS?=-C target-cpu=native
@@ -21,6 +38,48 @@ else
 		AVX512_FEATURE=
 endif

+ifeq ($(GEN_KEY_CACHE_MULTI_BIT_ONLY),TRUE)
+		MULTI_BIT_ONLY=--multi-bit-only
+else
+		MULTI_BIT_ONLY=
+endif
+
+ifeq ($(GEN_KEY_CACHE_COVERAGE_ONLY),TRUE)
+		COVERAGE_ONLY=--coverage-only
+else
+		COVERAGE_ONLY=
+endif
+
+ifeq ($(FORWARD_COMPAT),ON)
+		FORWARD_COMPAT_FEATURE=forward_compatibility
+else
+		FORWARD_COMPAT_FEATURE=
+endif
+
+# Variables used only for regex_engine example
+REGEX_STRING?=''
+REGEX_PATTERN?=''
+
+# tfhe-cuda-backend
+TFHECUDA_SRC="backends/tfhe-cuda-backend/cuda"
+TFHECUDA_BUILD=$(TFHECUDA_SRC)/build
+
+# Exclude these files from coverage reports
+define COVERAGE_EXCLUDED_FILES
+--exclude-files apps/trivium/src/trivium/* \
+--exclude-files apps/trivium/src/kreyvium/* \
+--exclude-files apps/trivium/src/static_deque/* \
+--exclude-files apps/trivium/src/trans_ciphering/* \
+--exclude-files tasks/src/* \
+--exclude-files tfhe/benches/boolean/* \
+--exclude-files tfhe/benches/core_crypto/* \
+--exclude-files tfhe/benches/shortint/* \
+--exclude-files tfhe/benches/integer/* \
+--exclude-files tfhe/benches/* \
+--exclude-files tfhe/examples/regex_engine/* \
+--exclude-files tfhe/examples/utilities/*
+endef
+
 .PHONY: rs_check_toolchain # Echo the rust toolchain used for checks
 rs_check_toolchain:
 	@echo $(RS_CHECK_TOOLCHAIN)
@@ -52,195 +111,462 @@ install_cargo_nextest: install_rs_build_toolchain
 	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cargo-nextest --locked || \
 	( echo "Unable to install cargo nextest, unknown error." && exit 1 )

+.PHONY: install_wasm_pack # Install wasm-pack to build JS packages
+install_wasm_pack: install_rs_build_toolchain
+	@wasm-pack --version > /dev/null 2>&1 || \
+	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install wasm-pack || \
+	( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )
+
+.PHONY: install_node # Install last version of NodeJS via nvm
+install_node:
+	curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh | $(SHELL)
+	source ~/.bashrc
+	$(SHELL) -i -c 'nvm install $(NODE_VERSION)' || \
+	( echo "Unable to install node, unknown error." && exit 1 )
+
+.PHONY: install_dieharder # Install dieharder for apt distributions or macOS
+install_dieharder:
+	@dieharder -h > /dev/null 2>&1 || \
+	if [[ "$(OS)" == "Linux" ]]; then \
+		sudo apt update && sudo apt install -y dieharder; \
+	elif [[ "$(OS)" == "Darwin" ]]; then\
+		brew install dieharder; \
+	fi || ( echo "Unable to install dieharder, unknown error." && exit 1 )
+
+.PHONY: install_tarpaulin # Install tarpaulin to perform code coverage
+install_tarpaulin: install_rs_build_toolchain
+	@cargo tarpaulin --version > /dev/null 2>&1 || \
+	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cargo-tarpaulin --locked || \
+	( echo "Unable to install cargo tarpaulin, unknown error." && exit 1 )
+
+.PHONY: check_linelint_installed # Check if linelint newline linter is installed
+check_linelint_installed:
+	@printf "\n" | linelint - > /dev/null 2>&1 || \
+	( echo "Unable to locate linelint. Try installing it: https://github.com/fernandrone/linelint/releases" && exit 1 )
+
 .PHONY: fmt # Format rust code
 fmt: install_rs_check_toolchain
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt

-.PHONT: check_fmt # Check rust code format
+.PHONY: fmt_gpu # Format rust and cuda code
+fmt_gpu: install_rs_check_toolchain
+	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt
+	cd "$(TFHECUDA_SRC)" && ./format_tfhe_cuda_backend.sh
+
+.PHONY: check_fmt # Check rust code format
 check_fmt: install_rs_check_toolchain
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt --check

+.PHONY: clippy_gpu # Run clippy lints on the gpu backend
+clippy_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
+		--features=$(TARGET_ARCH_FEATURE),integer,shortint,gpu \
+		-p $(TFHE_SPEC) -- --no-deps -D warnings
+
+.PHONY: fix_newline # Fix newline at end of file issues to be UNIX compliant
+fix_newline: check_linelint_installed
+	linelint -a .
+
+.PHONY: check_newline # Check for newline at end of file to be UNIX compliant
+check_newline: check_linelint_installed
+	linelint .
+
 .PHONY: clippy_core # Run clippy lints on core_crypto with and without experimental features
 clippy_core: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE) \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE),experimental \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
+		--features=$(TARGET_ARCH_FEATURE),nightly-avx512 \
+		-p $(TFHE_SPEC) -- --no-deps -D warnings
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
+		--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 \
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_boolean # Run clippy lints enabling the boolean features
 clippy_boolean: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE),boolean \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_shortint # Run clippy lints enabling the shortint features
 clippy_shortint: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE),shortint \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_integer # Run clippy lints enabling the integer features
 clippy_integer: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE),integer \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy # Run clippy lints enabling the boolean, shortint, integer
 clippy: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
 clippy_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

-.PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint and the js wasm API
+.PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
 clippy_js_wasm_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-		--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api \
-		-p tfhe -- --no-deps -D warnings
+		--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api \
+		-p $(TFHE_SPEC) -- --no-deps -D warnings

 .PHONY: clippy_tasks # Run clippy lints on helper tasks crate.
 clippy_tasks:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		-p tasks -- --no-deps -D warnings

+.PHONY: clippy_trivium # Run clippy lints on Trivium app
+clippy_trivium: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
+		-p tfhe-trivium -- --no-deps -D warnings
+
 .PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
 clippy_all_targets:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache \
-		-p tfhe -- --no-deps -D warnings
+		-p $(TFHE_SPEC) -- --no-deps -D warnings
+
+.PHONY: clippy_concrete_csprng # Run clippy lints on concrete-csprng
+clippy_concrete_csprng:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
+		--features=$(TARGET_ARCH_FEATURE) \
+		-p concrete-csprng -- --no-deps -D warnings

 .PHONY: clippy_all # Run all clippy targets
 clippy_all: clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets clippy_c_api \
-clippy_js_wasm_api clippy_tasks clippy_core
+clippy_js_wasm_api clippy_tasks clippy_core clippy_concrete_csprng clippy_trivium

 .PHONY: clippy_fast # Run main clippy targets
-clippy_fast: clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core
-
-.PHONY: gen_key_cache # Run the script to generate keys and cache them for shortint tests
-gen_key_cache: install_rs_build_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
-		--example generates_test_keys \
-		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe
+clippy_fast: clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core \
+clippy_concrete_csprng

 .PHONY: build_core # Build core_crypto without experimental features
 build_core: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE) -p tfhe
+		--features=$(TARGET_ARCH_FEATURE) -p $(TFHE_SPEC)
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-			--features=$(TARGET_ARCH_FEATURE),$(AVX512_FEATURE) -p tfhe; \
+			--features=$(TARGET_ARCH_FEATURE),$(AVX512_FEATURE) -p $(TFHE_SPEC); \
 	fi

 .PHONY: build_core_experimental # Build core_crypto with experimental features
 build_core_experimental: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),experimental -p tfhe
+		--features=$(TARGET_ARCH_FEATURE),experimental -p $(TFHE_SPEC)
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-			--features=$(TARGET_ARCH_FEATURE),experimental,$(AVX512_FEATURE) -p tfhe; \
+			--features=$(TARGET_ARCH_FEATURE),experimental,$(AVX512_FEATURE) -p $(TFHE_SPEC); \
 	fi

 .PHONY: build_boolean # Build with boolean enabled
 build_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean -p tfhe --all-targets
+		--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) --all-targets

 .PHONY: build_shortint # Build with shortint enabled
 build_shortint: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),shortint -p tfhe --all-targets
+		--features=$(TARGET_ARCH_FEATURE),shortint -p $(TFHE_SPEC) --all-targets

 .PHONY: build_integer # Build with integer enabled
 build_integer: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer -p tfhe --all-targets
+		--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) --all-targets

 .PHONY: build_tfhe_full # Build with boolean, shortint and integer enabled
 build_tfhe_full: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p tfhe --all-targets
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --all-targets

-.PHONY: build_c_api # Build the C API for boolean and shortint
+.PHONY: symlink_c_libs_without_fingerprint # Link the .a and .so files without the changing hash part in target
+symlink_c_libs_without_fingerprint:
+	@./scripts/symlink_c_libs_without_fingerprint.sh \
+		--cargo-profile "$(CARGO_PROFILE)" \
+		--lib-name tfhe-c-api-dynamic-buffer
+
+.PHONY: build_c_api # Build the C API for boolean, shortint and integer
 build_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
-		-p tfhe
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,$(FORWARD_COMPAT_FEATURE) \
+		-p $(TFHE_SPEC)
+	@"$(MAKE)" symlink_c_libs_without_fingerprint
+
+.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
+build_c_api_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,gpu \
+		-p $(TFHE_SPEC)
+	@"$(MAKE)" symlink_c_libs_without_fingerprint
+
+.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
+build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,experimental-force_fft_algo_dif4,$(FORWARD_COMPAT_FEATURE) \
+		-p $(TFHE_SPEC)
+	@"$(MAKE)" symlink_c_libs_without_fingerprint

 .PHONY: build_web_js_api # Build the js API targeting the web browser
-build_web_js_api: install_rs_build_toolchain
+build_web_js_api: install_rs_build_toolchain install_wasm_pack
 	cd tfhe && \
 	RUSTFLAGS="$(WASM_RUSTFLAGS)" rustup run "$(RS_BUILD_TOOLCHAIN)" \
 		wasm-pack build --release --target=web \
-		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api
+		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api
+
+.PHONY: build_web_js_api_parallel # Build the js API targeting the web browser with parallelism support
+build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
+	cd tfhe && \
+	rustup component add rust-src --toolchain $(RS_CHECK_TOOLCHAIN) && \
+	RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory,+mutable-globals" rustup run $(RS_CHECK_TOOLCHAIN) \
+		wasm-pack build --release --target=web \
+		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api \
+		-Z build-std=panic_abort,std

 .PHONY: build_node_js_api # Build the js API targeting nodejs
-build_node_js_api: install_rs_build_toolchain
+build_node_js_api: install_rs_build_toolchain install_wasm_pack
 	cd tfhe && \
 	RUSTFLAGS="$(WASM_RUSTFLAGS)" rustup run "$(RS_BUILD_TOOLCHAIN)" \
 		wasm-pack build --release --target=nodejs \
-		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api
+		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api
+
+.PHONY: build_concrete_csprng # Build concrete_csprng
+build_concrete_csprng: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-csprng --all-targets

 .PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
 test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),experimental -p tfhe -- core_crypto::
+		--features=$(TARGET_ARCH_FEATURE),experimental -p $(TFHE_SPEC) -- core_crypto::
 	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
 		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-			--features=$(TARGET_ARCH_FEATURE),experimental,$(AVX512_FEATURE) -p tfhe -- core_crypto::; \
+			--features=$(TARGET_ARCH_FEATURE),experimental,$(AVX512_FEATURE) -p $(TFHE_SPEC) -- core_crypto::; \
 	fi

+.PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
+test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain install_tarpaulin
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
+		--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
+		--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
+		--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,__coverage \
+		-p $(TFHE_SPEC) -- core_crypto::
+	@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
+		RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
+			--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
+			--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
+			--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,__coverage,$(AVX512_FEATURE) \
+			-p $(TFHE_SPEC) -- core_crypto::; \
+	fi
+
+.PHONY: test_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
+test_gpu: test_core_crypto_gpu test_integer_gpu
+
+.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
+test_core_crypto_gpu: install_rs_build_toolchain install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
+
+.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
+test_integer_gpu: install_rs_build_toolchain install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
+
 .PHONY: test_boolean # Run the tests of the boolean module
 test_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean -p tfhe -- boolean::
+		--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) -- boolean::

-.PHONY: test_c_api # Run the tests for the C API
-test_c_api: build_c_api
+.PHONY: test_boolean_cov # Run the tests of the boolean module with code coverage
+test_boolean_cov: install_rs_check_toolchain install_tarpaulin
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
+		--out xml --output-dir coverage/boolean --line --engine llvm --timeout 500 \
+		$(COVERAGE_EXCLUDED_FILES) \
+		--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,__coverage \
+		-p $(TFHE_SPEC) -- boolean::
+
+.PHONY: test_c_api_rs # Run the rust tests for the C API
+test_c_api_rs: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
+		-p $(TFHE_SPEC) \
+		c_api
+
+.PHONY: test_c_api_c # Run the C tests for the C API
+test_c_api_c: build_c_api
 	./scripts/c_api_tests.sh

+.PHONY: test_c_api # Run all the tests for the C API
+test_c_api: test_c_api_rs test_c_api_c
+
+.PHONY: test_c_api_gpu # Run the C tests for the C API
+test_c_api_gpu: build_c_api_gpu
+	./scripts/c_api_tests.sh --gpu
+
 .PHONY: test_shortint_ci # Run the tests for shortint ci
 test_shortint_ci: install_rs_build_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
-		./scripts/shortint-tests.sh $(CARGO_RS_BUILD_TOOLCHAIN)
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_shortint_multi_bit_ci # Run the tests for shortint ci running only multibit tests
+test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --tfhe-package "$(TFHE_SPEC)"

 .PHONY: test_shortint # Run all the tests for shortint
 test_shortint: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe -- shortint::
+		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
+
+.PHONY: test_shortint_cov # Run the tests of the shortint module with code coverage
+test_shortint_cov: install_rs_check_toolchain install_tarpaulin
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
+		--out xml --output-dir coverage/shortint --line --engine llvm --timeout 500 \
+		$(COVERAGE_EXCLUDED_FILES) \
+		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,__coverage \
+		-p $(TFHE_SPEC) -- shortint::

 .PHONY: test_integer_ci # Run the tests for integer ci
-test_integer_ci: install_rs_build_toolchain install_cargo_nextest
+test_integer_ci: install_rs_check_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
-		./scripts/integer-tests.sh $(CARGO_RS_BUILD_TOOLCHAIN)
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
+		--tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_unsigned_integer_ci # Run the tests for unsigned integer ci
+test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
+		--unsigned-only --tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_signed_integer_ci # Run the tests for signed integer ci
+test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
+		--signed-only --tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_integer_multi_bit_ci # Run the tests for integer ci running only multibit tests
+test_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
+		--tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_unsigned_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
+test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
+		--unsigned-only --tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_signed_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
+test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
+	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
+	FAST_TESTS="$(FAST_TESTS)" \
+		./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
+		--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
+		--signed-only --tfhe-package "$(TFHE_SPEC)"
+
+.PHONY: test_safe_deserialization # Run the tests for safe deserialization
+test_safe_deserialization: install_rs_build_toolchain install_cargo_nextest
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_deserialization::

 .PHONY: test_integer # Run all the tests for integer
 test_integer: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache -p tfhe -- integer::
+		--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache -p $(TFHE_SPEC) -- integer::

 .PHONY: test_high_level_api # Run all the tests for high_level_api
 test_high_level_api: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p tfhe -- high_level_api::
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) \
+		-- high_level_api::

 .PHONY: test_user_doc # Run tests from the .md documentation
 test_user_doc: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p tfhe \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) \
 		-- test_user_docs::

+.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
+test_user_doc_gpu: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu -p $(TFHE_SPEC) \
+		-- test_user_docs::
+
+.PHONY: test_regex_engine # Run tests for regex_engine example
+test_regex_engine: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--example regex_engine \
+		--features=$(TARGET_ARCH_FEATURE),integer
+
+.PHONY: test_sha256_bool # Run tests for sha256_bool example
+test_sha256_bool: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--example sha256_bool \
+		--features=$(TARGET_ARCH_FEATURE),boolean
+
+.PHONY: test_examples # Run tests for examples
+test_examples: test_sha256_bool test_regex_engine
+
+.PHONY: test_trivium # Run tests for trivium
+test_trivium: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		-p tfhe-trivium -- --test-threads=1 trivium::
+
+.PHONY: test_kreyvium # Run tests for kreyvium
+test_kreyvium: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		-p tfhe-trivium -- --test-threads=1 kreyvium::
+
+.PHONY: test_concrete_csprng # Run concrete-csprng tests
+test_concrete_csprng:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-csprng
+
 .PHONY: doc # Build rust doc
 doc: install_rs_check_toolchain
+	RUSTDOCFLAGS="--html-in-header katex-header.html" \
+	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer --no-deps -p $(TFHE_SPEC)
+
+.PHONY: docs # Build rust doc alias for doc
+docs: doc
+
+.PHONY: lint_doc # Build rust doc with linting enabled
+lint_doc: install_rs_check_toolchain
 	RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
-		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer --no-deps
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --no-deps
+
+.PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
+lint_docs: lint_doc

 .PHONY: format_doc_latex # Format the documentation latex equations to avoid broken rendering.
 format_doc_latex:
@@ -252,18 +578,20 @@ format_doc_latex:
 	@printf "\n===============================\n"

 .PHONY: check_compile_tests # Build tests in debug without running them
-check_compile_tests: build_c_api
+check_compile_tests:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
 		--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache \
-		-p tfhe
-		@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
-			./scripts/c_api_tests.sh --build-only; \
-		fi
+		-p $(TFHE_SPEC)
+
+	@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
+		"$(MAKE)" build_c_api && \
+		./scripts/c_api_tests.sh --build-only; \
+	fi

 .PHONY: build_nodejs_test_docker # Build a docker image with tools to run nodejs tests for wasm API
 build_nodejs_test_docker:
 	DOCKER_BUILDKIT=1 docker build --build-arg RUST_TOOLCHAIN="$(RS_BUILD_TOOLCHAIN)" \
-		-f docker/Dockerfile.wasm_tests -t tfhe-wasm-tests .
+		-f docker/Dockerfile.wasm_tests --build-arg NODE_VERSION=$(NODE_VERSION) -t tfhe-wasm-tests .

 .PHONY: test_nodejs_wasm_api_in_docker # Run tests for the nodejs on wasm API in a docker container
 test_nodejs_wasm_api_in_docker: build_nodejs_test_docker
@@ -280,54 +608,223 @@ test_nodejs_wasm_api_in_docker: build_nodejs_test_docker
 test_nodejs_wasm_api: build_node_js_api
 	cd tfhe && node --test js_on_wasm_tests

+.PHONY: test_web_js_api_parallel # Run tests for the web wasm api
+test_web_js_api_parallel: build_web_js_api_parallel
+	$(MAKE) -C tfhe/web_wasm_parallel_tests test
+
+.PHONY: ci_test_web_js_api_parallel # Run tests for the web wasm api
+ci_test_web_js_api_parallel: build_web_js_api_parallel
+	source ~/.nvm/nvm.sh && \
+	nvm install $(NODE_VERSION) && \
+	nvm use $(NODE_VERSION) && \
+	$(MAKE) -C tfhe/web_wasm_parallel_tests test-ci
+
 .PHONY: no_tfhe_typo # Check we did not invert the h and f in tfhe
 no_tfhe_typo:
 	@./scripts/no_tfhe_typo.sh

-.PHONY: bench_integer # Run benchmarks for integer
+.PHONY: no_dbg_log # Check we did not leave dbg macro calls in the rust code
+no_dbg_log:
+	@./scripts/no_dbg_calls.sh
+
+.PHONY: dieharder_csprng # Run the dieharder test suite on our CSPRNG implementation
+dieharder_csprng: install_dieharder build_concrete_csprng
+	./scripts/dieharder_test.sh
+
+#
+# Benchmarks
+#
+
+.PHONY: bench_integer # Run benchmarks for unsigned integer
 bench_integer: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-bench \
-	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p tfhe
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --
+
+.PHONY: bench_signed_integer # Run benchmarks for signed integer
+bench_signed_integer: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-signed-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --
+
+.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
+bench_integer_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --
+
+.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
+bench_integer_multi_bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --
+
+.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
+bench_signed_integer_multi_bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-signed-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --
+
+.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
+bench_integer_multi_bit_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench integer-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --

 .PHONY: bench_shortint # Run benchmarks for shortint
 bench_shortint: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench shortint-bench \
-	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,$(AVX512_FEATURE) -p tfhe
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)
+
+.PHONY: bench_oprf # Run benchmarks for shortint
+bench_oprf: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench oprf-shortint-bench \
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)
+	RUSTFLAGS="$(RUSTFLAGS)" \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench oprf-integer-bench \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)
+
+
+
+.PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
+bench_shortint_multi_bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench shortint-bench \
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC) --
+

 .PHONY: bench_boolean # Run benchmarks for boolean
 bench_boolean: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench boolean-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,$(AVX512_FEATURE) -p tfhe
+	--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)

 .PHONY: bench_pbs # Run benchmarks for PBS
 bench_pbs: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench pbs-bench \
-	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,$(AVX512_FEATURE) -p tfhe
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)
+
+.PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
+bench_pbs_gpu: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench pbs-bench \
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,$(AVX512_FEATURE) -p $(TFHE_SPEC)
+
+.PHONY: bench_web_js_api_parallel # Run benchmarks for the web wasm api
+bench_web_js_api_parallel: build_web_js_api_parallel
+	$(MAKE) -C tfhe/web_wasm_parallel_tests bench
+
+.PHONY: ci_bench_web_js_api_parallel # Run benchmarks for the web wasm api
+ci_bench_web_js_api_parallel: build_web_js_api_parallel
+	source ~/.nvm/nvm.sh && \
+	nvm use node && \
+	$(MAKE) -C tfhe/web_wasm_parallel_tests bench-ci
+
+#
+# Utility tools
+#
+.PHONY: gen_key_cache # Run the script to generate keys and cache them for shortint tests
+gen_key_cache: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+		--example generates_test_keys \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache -- \
+		$(MULTI_BIT_ONLY) $(COVERAGE_ONLY)
+
+.PHONY: gen_key_cache_core_crypto # Run function to generate keys and cache them for core_crypto tests
+gen_key_cache_core_crypto: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --tests --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
+		core_crypto::keycache::generate_keys
+
+.PHONY: measure_hlapi_compact_pk_ct_sizes # Measure sizes of public keys and ciphertext for high-level API
+measure_hlapi_compact_pk_ct_sizes: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+	--example hlapi_compact_pk_ct_sizes \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache

 .PHONY: measure_shortint_key_sizes # Measure sizes of bootstrapping and key switching keys for shortint
 measure_shortint_key_sizes: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example shortint_key_sizes \
 	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache

 .PHONY: measure_boolean_key_sizes # Measure sizes of bootstrapping and key switching keys for boolean
 measure_boolean_key_sizes: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 	--example boolean_key_sizes \
 	--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache

-.PHONY: pcc # pcc stands for pre commit checks
-pcc: no_tfhe_typo check_fmt doc clippy_all check_compile_tests
+.PHONY: parse_integer_benches # Run python parser to output a csv containing integer benches data
+parse_integer_benches:
+	python3 ./ci/parse_integer_benches_to_csv.py \
+		--criterion-dir target/criterion \
+		--output-file "$(PARSE_INTEGER_BENCH_CSV_FILE)"
+
+.PHONY: parse_wasm_benchmarks # Parse benchmarks performed with WASM web client into a CSV file
+parse_wasm_benchmarks: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+	--example wasm_benchmarks_parser \
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
+	-- web_wasm_parallel_tests/test/benchmark_results
+
+.PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
+write_params_to_file: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+	--example write_params_to_file \
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache
+
+#
+# Real use case examples
+#
+
+.PHONY: regex_engine # Run regex_engine example
+regex_engine: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+	--example regex_engine \
+	--features=$(TARGET_ARCH_FEATURE),integer \
+	-- $(REGEX_STRING) $(REGEX_PATTERN)
+
+.PHONY: dark_market # Run dark market example
+dark_market: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+	--example dark_market \
+	--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
+	-- fhe-modified fhe-parallel plain fhe
+
+.PHONY: sha256_bool # Run sha256_bool example
+sha256_bool: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+	--example sha256_bool \
+	--features=$(TARGET_ARCH_FEATURE),boolean
+
+.PHONY: pcc # pcc stands for pre commit checks (except GPU)
+pcc: no_tfhe_typo no_dbg_log check_fmt lint_doc clippy_all check_compile_tests
+
+.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
+pcc_gpu: pcc clippy_gpu

 .PHONY: fpcc # pcc stands for pre commit checks, the f stands for fast
-fpcc: no_tfhe_typo check_fmt doc clippy_fast check_compile_tests
+fpcc: no_tfhe_typo no_dbg_log check_fmt lint_doc clippy_fast check_compile_tests

 .PHONY: conformance # Automatically fix problems that can be fixed
-conformance: fmt
+conformance: fix_newline fmt

 .PHONY: help # Generate list of targets with descriptions
 help:
--- a/README.md
+++ b/README.md
@@ -4,13 +4,17 @@
 </p>
 <hr/>
 <p align="center">
-  <a href="https://docs.zama.ai/tfhe-rs"> 📒 Read documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a>
+  <a href="https://docs.zama.ai/tfhe-rs"> 📒 Read documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a> | <a href="https://github.com/zama-ai/awesome-zama"> 📚 FHE resources</a>
 </p>
 <p align="center">
 <!-- Version badge using shields.io -->
  <a href="https://github.com/zama-ai/tfhe-rs/releases">
    <img src="https://img.shields.io/github/v/release/zama-ai/tfhe-rs?style=flat-square">
  </a>
+  <!-- Link to tutorials badge using shields.io -->
+  <a href="#license">
+    <img src="https://img.shields.io/badge/License-BSD--3--Clause--Clear-orange?style=flat-square">
+  </a>
 <!-- Zama Bounty Program -->
  <a href="https://github.com/zama-ai/bounty-program">
    <img src="https://img.shields.io/badge/Contribute-Zama%20Bounty%20Program-yellow?style=flat-square">
@@ -31,7 +35,9 @@ implementation. The goal is to have a stable, simple, high-performance, and
 production-ready library for all the advanced features of TFHE.

 ## Getting Started
+The steps to run a first example are described below. 

+### Cargo.toml configuration
 To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:

 + For x86_64-based machines running Unix-like OSes:
@@ -45,7 +51,7 @@ tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64-un
 ```toml
 tfhe = { version = "*", features = ["boolean", "shortint", "integer", "aarch64-unix"] }
 ```
-Note: users with ARM devices must use `TFHE-rs` by compiling using the `nightly` toolchain.
+Note: users with ARM devices must compile `TFHE-rs` using a stable toolchain with version >= 1.72.


 + For x86_64-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) 
@@ -55,97 +61,69 @@ running Windows:
 tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64"] }
 ```

-Note: aarch64-based machines are not yet supported for Windows as it's currently missing an entropy source to be able to seed the [CSPRNGs](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) used in TFHE-rs
+Note: aarch64-based machines are not yet supported for Windows as it's currently missing an entropy source to be able to seed the [CSPRNGs](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) used in TFHE-rs.
+
+
+## A simple example
+
+Here is a full example:
+
+``` rust
+use tfhe::prelude::*;
+use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint32, FheUint8};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Basic configuration to use homomorphic integers
+    let config = ConfigBuilder::default().build();
+
+    // Key generation
+    let (client_key, server_keys) = generate_keys(config);
+
+    let clear_a = 1344u32;
+    let clear_b = 5u32;
+    let clear_c = 7u8;
+
+    // Encrypting the input data using the (private) client_key
+    // FheUint32: Encrypted equivalent to u32
+    let mut encrypted_a = FheUint32::try_encrypt(clear_a, &client_key)?;
+    let encrypted_b = FheUint32::try_encrypt(clear_b, &client_key)?;
+
+    // FheUint8: Encrypted equivalent to u8
+    let encrypted_c = FheUint8::try_encrypt(clear_c, &client_key)?;
+
+    // On the server side:
+    set_server_key(server_keys);
+
+    // Clear equivalent computations: 1344 * 5 = 6720
+    let encrypted_res_mul = &encrypted_a * &encrypted_b;
+
+    // Clear equivalent computations: 1344 >> 5 = 42
+    encrypted_a = &encrypted_res_mul >> &encrypted_b;
+
+    // Clear equivalent computations: let casted_a = a as u8;
+    let casted_a: FheUint8 = encrypted_a.cast_into();
+
+    // Clear equivalent computations: min(42, 7) = 7
+    let encrypted_res_min = &casted_a.min(&encrypted_c);
+
+    // Operation between clear and encrypted data:
+    // Clear equivalent computations: 7 & 1 = 1
+    let encrypted_res = encrypted_res_min & 1_u8;
+
+    // Decrypting on the client side:
+    let clear_res: u8 = encrypted_res.decrypt(&client_key);
+    assert_eq!(clear_res, 1_u8);
+
+    Ok(())
+}
+```
+
+To run this code, use the following command: 
+<p align="center"> <code> cargo run --release </code> </p>

 Note that when running code that uses `tfhe-rs`, it is highly recommended
-to run in release mode with cargo`s `--release` flag to have the best performances possible,
-eg: `cargo run --release`.
+to run in release mode with cargo's `--release` flag to have the best performances possible.

-Here is a full example evaluating a Boolean circuit:
-
-```rust
-use tfhe::boolean::prelude::*;
-
-fn main() {
-    // We generate a set of client/server keys, using the default parameters:
-    let (mut client_key, mut server_key) = gen_keys();
-
-    // We use the client secret key to encrypt two messages:
-    let ct_1 = client_key.encrypt(true);
-    let ct_2 = client_key.encrypt(false);
-
-    // We use the server public key to execute a boolean circuit:
-    // if ((NOT ct_2) NAND (ct_1 AND ct_2)) then (NOT ct_2) else (ct_1 AND ct_2)
-    let ct_3 = server_key.not(&ct_2);
-    let ct_4 = server_key.and(&ct_1, &ct_2);
-    let ct_5 = server_key.nand(&ct_3, &ct_4);
-    let ct_6 = server_key.mux(&ct_5, &ct_3, &ct_4);
-
-    // We use the client key to decrypt the output of the circuit:
-    let output = client_key.decrypt(&ct_6);
-    assert_eq!(output, true);
-}
-```
-
-Another example of how the library can be used with shortints:
-
-```rust
-use tfhe::shortint::prelude::*;
-
-fn main() {
-    // Generate a set of client/server keys
-    // with 2 bits of message and 2 bits of carry
-    let (client_key, server_key) = gen_keys(PARAM_MESSAGE_2_CARRY_2);
-
-    let msg1 = 3;
-    let msg2 = 2;
-
-    // Encrypt two messages using the (private) client key:
-    let ct_1 = client_key.encrypt(msg1);
-    let ct_2 = client_key.encrypt(msg2);
-
-    // Homomorphically compute an addition
-    let ct_add = server_key.unchecked_add(&ct_1, &ct_2);
-
-    // Define the Hamming weight function
-    // f: x -> sum of the bits of x
-    let f = |x:u64| x.count_ones() as u64;
-
-    // Generate the accumulator for the function
-    let acc = server_key.generate_accumulator(f);
-
-    // Compute the function over the ciphertext using the PBS
-    let ct_res = server_key.apply_lookup_table(&ct_add, &acc);
-
-    // Decrypt the ciphertext using the (private) client key
-    let output = client_key.decrypt(&ct_res);
-    assert_eq!(output, f(msg1 + msg2));
-}
-```
-
-An example using integer:
-
-```rust
-use tfhe::integer::gen_keys_radix;
-use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2;
-
-fn main() {
-    // We create keys to create 16 bits integers
-    // using 8 blocks of 2 bits
-    let (cks, sks) = gen_keys_radix(&PARAM_MESSAGE_2_CARRY_2, 8);
-
-    let clear_a = 2382u16;
-    let clear_b = 29374u16;
-
-    let mut a = cks.encrypt(clear_a as u64);
-    let mut b = cks.encrypt(clear_b as u64);
-
-    let encrypted_max = sks.smart_max_parallelized(&mut a, &mut b);
-    let decrypted_max: u64 = cks.decrypt(&encrypted_max);
-
-    assert_eq!(decrypted_max as u16, clear_a.max(clear_b))
-}
-```

 ## Contributing

@@ -164,9 +142,24 @@ libraries.

 ## Need support?
 <a target="_blank" href="https://community.zama.ai">
-  <img src="https://user-images.githubusercontent.com/5758427/231115030-21195b55-2629-4c01-9809-be5059243999.png">
+  <img src="https://github.com/zama-ai/tfhe-rs/assets/157474013/33d856dc-f25d-454b-a010-af12bff2aa7d">
 </a>

+
+
+## Citing TFHE-rs
+
+To cite TFHE-rs in academic papers, please use the following entry:
+
+```text
+@Misc{TFHE-rs,
+  title={{TFHE-rs: A Pure Rust Implementation of the TFHE Scheme for Boolean and Integer Arithmetics Over Encrypted Data}},
+  author={Zama},
+  year={2022},
+  note={\url{https://github.com/zama-ai/tfhe-rs}},
+}
+```
+
 ## License

 This software is distributed under the BSD-3-Clause-Clear license. If you have any questions,
--- a/apps/trivium/Cargo.toml
+++ b/apps/trivium/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "tfhe-trivium"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+rayon = { version = "1.7.0"}
+
+[target.'cfg(target_arch = "x86_64")'.dependencies.tfhe]
+path = "../../tfhe"
+features = [ "boolean", "shortint", "integer", "x86_64" ]
+
+[target.'cfg(target_arch = "aarch64")'.dependencies.tfhe]
+path = "../../tfhe"
+features = [ "boolean", "shortint", "integer", "aarch64-unix" ]
+
+[dev-dependencies]
+criterion = { version = "0.5.1", features = [ "html_reports" ]}
+
+[[bench]]
+name = "trivium"
+harness = false
--- a/apps/trivium/README.md
+++ b/apps/trivium/README.md
@@ -0,0 +1,204 @@
+# FHE boolean Trivium implementation using TFHE-rs
+
+The cleartext boolean Trivium is available to be built using the function `TriviumStream::<bool>::new`. 
+This takes as input 2 arrays of 80 bool: the Trivium key and the IV. After initialization, it returns a TriviumStream on 
+which the user can call `next`, getting the next bit of the cipher stream, or `next_64`, which will compute 64 values at once,
+using multithreading to accelerate the computation.
+
+
+Quite similarly, the function `TriviumStream::<FheBool>::new` will return a very similar object running in FHE space. Its arguments are
+2 arrays of 80 FheBool representing the encrypted Trivium key, and the encrypted IV. It also requires a reference to the the server key of the 
+current scheme. This means that any user of this feature must also have the `tfhe-rs` crate as a dependency.
+
+
+Example of a Rust main below:
+```rust
+use tfhe::{ConfigBuilder, generate_keys, FheBool};
+use tfhe::prelude::*;
+
+use tfhe_trivium::TriviumStream;
+
+fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
+	assert!(a.len() % 8 == 0);
+	let mut hexadecimal: String = "".to_string();
+	for test in a.chunks(8) {
+		// Encoding is bytes in LSB order
+		match test[4..8] {
+			[false, false, false, false] => hexadecimal.push('0'),
+			[true, false, false, false] => hexadecimal.push('1'),
+			[false, true, false, false] => hexadecimal.push('2'),
+			[true, true, false, false] => hexadecimal.push('3'),
+
+			[false, false, true, false] => hexadecimal.push('4'),
+			[true, false, true, false] => hexadecimal.push('5'),
+			[false, true, true, false] => hexadecimal.push('6'),
+			[true, true, true, false] => hexadecimal.push('7'),
+
+			[false, false, false, true] => hexadecimal.push('8'),
+			[true, false, false, true] => hexadecimal.push('9'),
+			[false, true, false, true] => hexadecimal.push('A'),
+			[true, true, false, true] => hexadecimal.push('B'),
+
+			[false, false, true, true] => hexadecimal.push('C'),
+			[true, false, true, true] => hexadecimal.push('D'),
+			[false, true, true, true] => hexadecimal.push('E'),
+			[true, true, true, true] => hexadecimal.push('F'),
+			_ => ()
+		};
+		match test[0..4] {
+			[false, false, false, false] => hexadecimal.push('0'),
+			[true, false, false, false] => hexadecimal.push('1'),
+			[false, true, false, false] => hexadecimal.push('2'),
+			[true, true, false, false] => hexadecimal.push('3'),
+
+			[false, false, true, false] => hexadecimal.push('4'),
+			[true, false, true, false] => hexadecimal.push('5'),
+			[false, true, true, false] => hexadecimal.push('6'),
+			[true, true, true, false] => hexadecimal.push('7'),
+
+			[false, false, false, true] => hexadecimal.push('8'),
+			[true, false, false, true] => hexadecimal.push('9'),
+			[false, true, false, true] => hexadecimal.push('A'),
+			[true, true, false, true] => hexadecimal.push('B'),
+
+			[false, false, true, true] => hexadecimal.push('C'),
+			[true, false, true, true] => hexadecimal.push('D'),
+			[false, true, true, true] => hexadecimal.push('E'),
+			[true, true, true, true] => hexadecimal.push('F'),
+			_ => ()
+		};
+	}
+	return hexadecimal;
+}
+
+fn main() {
+	let config = ConfigBuilder::all_disabled().enable_default_bool().build();
+	let (client_key, server_key) = generate_keys(config);
+
+	let key_string = "0053A6F94C9FF24598EB".to_string();
+	let mut key = [false; 80];
+
+	for i in (0..key_string.len()).step_by(2) {
+		let mut val: u8 = u8::from_str_radix(&key_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			key[8*(i>>1) + j] = val % 2 == 1;
+			val >>= 1;
+		}
+	}
+
+	let iv_string = "0D74DB42A91077DE45AC".to_string();
+	let mut iv = [false; 80];
+
+	for i in (0..iv_string.len()).step_by(2) {
+		let mut val: u8 = u8::from_str_radix(&iv_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			iv[8*(i>>1) + j] = val % 2 == 1;
+			val >>= 1;
+		}
+	}
+	
+	let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+
+	let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
+	let cipher_iv = iv.map(|x| FheBool::encrypt(x, &client_key));
+
+
+	let mut trivium = TriviumStream::<FheBool>::new(cipher_key, cipher_iv, &server_key);
+
+	let mut vec = Vec::<bool>::with_capacity(64*8);
+	while vec.len() < 64*8 {
+		let cipher_outputs = trivium.next_64();
+		for c in cipher_outputs {
+			vec.push(c.decrypt(&client_key))
+		}
+	}
+
+	let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+	assert_eq!(output_0_63, hexadecimal[0..64*2]);
+}
+```
+
+# FHE byte Trivium implementation
+
+The same objects have also been implemented to stream bytes instead of booleans. They can be constructed and used in the same way via the functions `TriviumStreamByte::<u8>::new` and 
+`TriviumStreamByte::<FheUint8>::new` with the same arguments as before. The `FheUint8` version is significantly slower than the `FheBool` version, because not running 
+with the same cryptographic parameters. Its interest lie in its trans-ciphering capabilities: `TriviumStreamByte<FheUint8>` implements the trait `TransCiphering`, 
+meaning it implements the functions `trans_encrypt_64`. This function takes as input a `FheUint64` and outputs a `FheUint64`, the output being
+encrypted via tfhe and trivium. For convenience we also provide `trans_decrypt_64`, but this is of course the exact same function.
+
+Other sizes than 64 bit are expected to be available in the future.
+
+# FHE shortint Trivium implementation
+
+The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `PARAM_MESSAGE_1_CARRY_1_KS_PBS`). It uses a lower level API 
+of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run on the same 
+cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes 
+its setup a little more intricate.
+
+Example code:
+```rust
+use tfhe::shortint::prelude::*;
+use tfhe::shortint::CastingKey;
+
+use tfhe::{ConfigBuilder, generate_keys, FheUint64};
+use tfhe::prelude::*;
+
+use tfhe_trivium::TriviumStreamShortint;
+
+fn test_shortint() {
+	let config = ConfigBuilder::all_disabled().enable_default_integers().build();
+	let (hl_client_key, hl_server_key) = generate_keys(config);
+	let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
+	let ksk = CastingKey::new((&client_key, &server_key), (&hl_client_key, &hl_server_key));
+
+	let key_string = "0053A6F94C9FF24598EB".to_string();
+	let mut key = [0; 80];
+
+	for i in (0..key_string.len()).step_by(2) {
+		let mut val = u64::from_str_radix(&key_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			key[8*(i>>1) + j] = val % 2;
+			val >>= 1;
+		}
+	}
+
+	let iv_string = "0D74DB42A91077DE45AC".to_string();
+	let mut iv = [0; 80];
+
+	for i in (0..iv_string.len()).step_by(2) {
+		let mut val = u64::from_str_radix(&iv_string[i..i+2], 16).unwrap();
+		for j in 0..8 {
+			iv[8*(i>>1) + j] = val % 2;
+			val >>= 1;
+		}
+	}
+	let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+
+	let cipher_key = key.map(|x| client_key.encrypt(x));
+	let cipher_iv = iv.map(|x| client_key.encrypt(x));
+
+	let mut ciphered_message = vec![FheUint64::try_encrypt(0u64, &hl_client_key).unwrap(); 9];
+
+	let mut trivium = TriviumStreamShortint::new(cipher_key, cipher_iv, &server_key, &ksk);
+
+	let mut vec = Vec::<u64>::with_capacity(8);
+	while vec.len() < 8 {
+		let trans_ciphered_message = trivium.trans_encrypt_64(ciphered_message.pop().unwrap(), &hl_server_key);
+		vec.push(trans_ciphered_message.decrypt(&hl_client_key));
+	}
+
+	let hexadecimal = get_hexagonal_string_from_u64(vec);
+	assert_eq!(output_0_63, hexadecimal[0..64*2]);
+}
+```
+
+# FHE Kreyvium implementation using tfhe-rs crate
+
+This will work in exactly the same way as the Trivium implementation, except that the key and iv need to be 128 bits now. Available for the same internal types as Trivium, with similar syntax.
+
+`KreyviumStreamByte<FheUint8>` and `KreyviumStreamShortint` also implement the `TransCiphering` trait.
+
+# Testing
+
+If you wish to run tests on this app, please run `cargo test -r trivium -- --test-threads=1` as multithreading provokes interferences between several running 
+Triviums at the same time.
--- a/apps/trivium/benches/kreyvium_bool.rs
+++ b/apps/trivium/benches/kreyvium_bool.rs
@@ -0,0 +1,75 @@
+use tfhe::prelude::*;
+use tfhe::{generate_keys, ConfigBuilder, FheBool};
+
+use tfhe_trivium::KreyviumStream;
+
+use criterion::Criterion;
+
+pub fn kreyvium_bool_gen(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [false; 128];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [false; 128];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
+
+    let mut kreyvium = KreyviumStream::<FheBool>::new(cipher_key, iv, &server_key);
+
+    c.bench_function("kreyvium bool generate 64 bits", |b| {
+        b.iter(|| kreyvium.next_64())
+    });
+}
+
+pub fn kreyvium_bool_warmup(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [false; 128];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [false; 128];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    c.bench_function("kreyvium bool warmup", |b| {
+        b.iter(|| {
+            let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
+            let _kreyvium = KreyviumStream::<FheBool>::new(cipher_key, iv, &server_key);
+        })
+    });
+}
--- a/apps/trivium/benches/kreyvium_byte.rs
+++ b/apps/trivium/benches/kreyvium_byte.rs
@@ -0,0 +1,93 @@
+use tfhe::prelude::*;
+use tfhe::{generate_keys, ConfigBuilder, FheUint64, FheUint8};
+
+use tfhe_trivium::{KreyviumStreamByte, TransCiphering};
+
+use criterion::Criterion;
+
+pub fn kreyvium_byte_gen(c: &mut Criterion) {
+    let config = ConfigBuilder::default()
+        .enable_function_evaluation()
+        .build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [0u8; 16];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [0u8; 16];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    let cipher_key = key.map(|x| FheUint8::encrypt(x, &client_key));
+
+    let mut kreyvium = KreyviumStreamByte::<FheUint8>::new(cipher_key, iv, &server_key);
+
+    c.bench_function("kreyvium byte generate 64 bits", |b| {
+        b.iter(|| kreyvium.next_64())
+    });
+}
+
+pub fn kreyvium_byte_trans(c: &mut Criterion) {
+    let config = ConfigBuilder::default()
+        .enable_function_evaluation()
+        .build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [0u8; 16];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [0u8; 16];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    let cipher_key = key.map(|x| FheUint8::encrypt(x, &client_key));
+
+    let ciphered_message = FheUint64::try_encrypt(0u64, &client_key).unwrap();
+    let mut kreyvium = KreyviumStreamByte::<FheUint8>::new(cipher_key, iv, &server_key);
+
+    c.bench_function("kreyvium byte transencrypt 64 bits", |b| {
+        b.iter(|| kreyvium.trans_encrypt_64(ciphered_message.clone()))
+    });
+}
+
+pub fn kreyvium_byte_warmup(c: &mut Criterion) {
+    let config = ConfigBuilder::default()
+        .enable_function_evaluation()
+        .build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [0u8; 16];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [0u8; 16];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    c.bench_function("kreyvium byte warmup", |b| {
+        b.iter(|| {
+            let cipher_key = key.map(|x| FheUint8::encrypt(x, &client_key));
+            let _kreyvium = KreyviumStreamByte::<FheUint8>::new(cipher_key, iv, &server_key);
+        })
+    });
+}
--- a/apps/trivium/benches/kreyvium_shortint.rs
+++ b/apps/trivium/benches/kreyvium_shortint.rs
@@ -0,0 +1,149 @@
+use tfhe::prelude::*;
+use tfhe::shortint::prelude::*;
+use tfhe::shortint::KeySwitchingKey;
+use tfhe::{generate_keys, ConfigBuilder, FheUint64};
+
+use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};
+
+use criterion::Criterion;
+
+pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (hl_client_key, hl_server_key) = generate_keys(config);
+    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
+    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
+
+    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
+
+    let ksk = KeySwitchingKey::new(
+        (&client_key, &server_key),
+        (&underlying_ck, &underlying_sk),
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+    );
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [0; 128];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [0; 128];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    c.bench_function("kreyvium 1_1 warmup", |b| {
+        b.iter(|| {
+            let cipher_key = key.map(|x| client_key.encrypt(x));
+            let _kreyvium = KreyviumStreamShortint::new(
+                cipher_key,
+                iv,
+                server_key.clone(),
+                ksk.clone(),
+                hl_server_key.clone(),
+            );
+        })
+    });
+}
+
+pub fn kreyvium_shortint_gen(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (hl_client_key, hl_server_key) = generate_keys(config);
+    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
+    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
+
+    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
+
+    let ksk = KeySwitchingKey::new(
+        (&client_key, &server_key),
+        (&underlying_ck, &underlying_sk),
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+    );
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [0; 128];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [0; 128];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let cipher_key = key.map(|x| client_key.encrypt(x));
+
+    let mut kreyvium = KreyviumStreamShortint::new(cipher_key, iv, server_key, ksk, hl_server_key);
+
+    c.bench_function("kreyvium 1_1 generate 64 bits", |b| {
+        b.iter(|| kreyvium.next_64())
+    });
+}
+
+pub fn kreyvium_shortint_trans(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (hl_client_key, hl_server_key) = generate_keys(config);
+    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
+    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
+
+    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
+
+    let ksk = KeySwitchingKey::new(
+        (&client_key, &server_key),
+        (&underlying_ck, &underlying_sk),
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+    );
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [0; 128];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [0; 128];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let cipher_key = key.map(|x| client_key.encrypt(x));
+
+    let ciphered_message = FheUint64::try_encrypt(0u64, &hl_client_key).unwrap();
+    let mut kreyvium = KreyviumStreamShortint::new(cipher_key, iv, server_key, ksk, hl_server_key);
+
+    c.bench_function("kreyvium 1_1 transencrypt 64 bits", |b| {
+        b.iter(|| kreyvium.trans_encrypt_64(ciphered_message.clone()))
+    });
+}
--- a/apps/trivium/benches/trivium.rs
+++ b/apps/trivium/benches/trivium.rs
@@ -0,0 +1,53 @@
+use criterion::{criterion_group, criterion_main};
+
+mod trivium_bool;
+criterion_group!(
+    trivium_bool,
+    trivium_bool::trivium_bool_gen,
+    trivium_bool::trivium_bool_warmup
+);
+mod kreyvium_bool;
+criterion_group!(
+    kreyvium_bool,
+    kreyvium_bool::kreyvium_bool_gen,
+    kreyvium_bool::kreyvium_bool_warmup
+);
+
+mod trivium_shortint;
+criterion_group!(
+    trivium_shortint,
+    trivium_shortint::trivium_shortint_gen,
+    trivium_shortint::trivium_shortint_warmup,
+    trivium_shortint::trivium_shortint_trans
+);
+mod kreyvium_shortint;
+criterion_group!(
+    kreyvium_shortint,
+    kreyvium_shortint::kreyvium_shortint_gen,
+    kreyvium_shortint::kreyvium_shortint_warmup,
+    kreyvium_shortint::kreyvium_shortint_trans
+);
+
+mod trivium_byte;
+criterion_group!(
+    trivium_byte,
+    trivium_byte::trivium_byte_gen,
+    trivium_byte::trivium_byte_trans,
+    trivium_byte::trivium_byte_warmup
+);
+mod kreyvium_byte;
+criterion_group!(
+    kreyvium_byte,
+    kreyvium_byte::kreyvium_byte_gen,
+    kreyvium_byte::kreyvium_byte_trans,
+    kreyvium_byte::kreyvium_byte_warmup
+);
+
+criterion_main!(
+    trivium_bool,
+    trivium_shortint,
+    trivium_byte,
+    kreyvium_bool,
+    kreyvium_shortint,
+    kreyvium_byte,
+);
--- a/apps/trivium/benches/trivium_bool.rs
+++ b/apps/trivium/benches/trivium_bool.rs
@@ -0,0 +1,75 @@
+use tfhe::prelude::*;
+use tfhe::{generate_keys, ConfigBuilder, FheBool};
+
+use tfhe_trivium::TriviumStream;
+
+use criterion::Criterion;
+
+pub fn trivium_bool_gen(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [false; 80];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [false; 80];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
+
+    let mut trivium = TriviumStream::<FheBool>::new(cipher_key, iv, &server_key);
+
+    c.bench_function("trivium bool generate 64 bits", |b| {
+        b.iter(|| trivium.next_64())
+    });
+}
+
+pub fn trivium_bool_warmup(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [false; 80];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [false; 80];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    c.bench_function("trivium bool warmup", |b| {
+        b.iter(|| {
+            let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
+            let _trivium = TriviumStream::<FheBool>::new(cipher_key, iv, &server_key);
+        })
+    });
+}
--- a/apps/trivium/benches/trivium_byte.rs
+++ b/apps/trivium/benches/trivium_byte.rs
@@ -0,0 +1,87 @@
+use tfhe::prelude::*;
+use tfhe::{generate_keys, ConfigBuilder, FheUint64, FheUint8};
+
+use tfhe_trivium::{TransCiphering, TriviumStreamByte};
+
+use criterion::Criterion;
+
+pub fn trivium_byte_gen(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [0u8; 10];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [0u8; 10];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    let cipher_key = key.map(|x| FheUint8::encrypt(x, &client_key));
+
+    let mut trivium = TriviumStreamByte::<FheUint8>::new(cipher_key, iv, &server_key);
+
+    c.bench_function("trivium byte generate 64 bits", |b| {
+        b.iter(|| trivium.next_64())
+    });
+}
+
+pub fn trivium_byte_trans(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [0u8; 10];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [0u8; 10];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    let cipher_key = key.map(|x| FheUint8::encrypt(x, &client_key));
+
+    let ciphered_message = FheUint64::try_encrypt(0u64, &client_key).unwrap();
+    let mut trivium = TriviumStreamByte::<FheUint8>::new(cipher_key, iv, &server_key);
+
+    c.bench_function("trivium byte transencrypt 64 bits", |b| {
+        b.iter(|| trivium.trans_encrypt_64(ciphered_message.clone()))
+    });
+}
+
+pub fn trivium_byte_warmup(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [0u8; 10];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [0u8; 10];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    c.bench_function("trivium byte warmup", |b| {
+        b.iter(|| {
+            let cipher_key = key.map(|x| FheUint8::encrypt(x, &client_key));
+            let _trivium = TriviumStreamByte::<FheUint8>::new(cipher_key, iv, &server_key);
+        })
+    });
+}
--- a/apps/trivium/benches/trivium_shortint.rs
+++ b/apps/trivium/benches/trivium_shortint.rs
@@ -0,0 +1,149 @@
+use tfhe::prelude::*;
+use tfhe::shortint::prelude::*;
+use tfhe::shortint::KeySwitchingKey;
+use tfhe::{generate_keys, ConfigBuilder, FheUint64};
+
+use tfhe_trivium::{TransCiphering, TriviumStreamShortint};
+
+use criterion::Criterion;
+
+pub fn trivium_shortint_warmup(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (hl_client_key, hl_server_key) = generate_keys(config);
+    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
+    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
+
+    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
+
+    let ksk = KeySwitchingKey::new(
+        (&client_key, &server_key),
+        (&underlying_ck, &underlying_sk),
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+    );
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [0; 80];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [0; 80];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    c.bench_function("trivium 1_1 warmup", |b| {
+        b.iter(|| {
+            let cipher_key = key.map(|x| client_key.encrypt(x));
+            let _trivium = TriviumStreamShortint::new(
+                cipher_key,
+                iv,
+                server_key.clone(),
+                ksk.clone(),
+                hl_server_key.clone(),
+            );
+        })
+    });
+}
+
+pub fn trivium_shortint_gen(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (hl_client_key, hl_server_key) = generate_keys(config);
+    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
+    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
+
+    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
+
+    let ksk = KeySwitchingKey::new(
+        (&client_key, &server_key),
+        (&underlying_ck, &underlying_sk),
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+    );
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [0; 80];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [0; 80];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let cipher_key = key.map(|x| client_key.encrypt(x));
+
+    let mut trivium = TriviumStreamShortint::new(cipher_key, iv, server_key, ksk, hl_server_key);
+
+    c.bench_function("trivium 1_1 generate 64 bits", |b| {
+        b.iter(|| trivium.next_64())
+    });
+}
+
+pub fn trivium_shortint_trans(c: &mut Criterion) {
+    let config = ConfigBuilder::default().build();
+    let (hl_client_key, hl_server_key) = generate_keys(config);
+    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
+    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
+
+    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
+
+    let ksk = KeySwitchingKey::new(
+        (&client_key, &server_key),
+        (&underlying_ck, &underlying_sk),
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+    );
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [0; 80];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [0; 80];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let cipher_key = key.map(|x| client_key.encrypt(x));
+
+    let ciphered_message = FheUint64::try_encrypt(0u64, &hl_client_key).unwrap();
+    let mut trivium = TriviumStreamShortint::new(cipher_key, iv, server_key, ksk, hl_server_key);
+
+    c.bench_function("trivium 1_1 transencrypt 64 bits", |b| {
+        b.iter(|| trivium.trans_encrypt_64(ciphered_message.clone()))
+    });
+}
--- a/apps/trivium/src/kreyvium/kreyvium.rs
+++ b/apps/trivium/src/kreyvium/kreyvium.rs
@@ -0,0 +1,257 @@
+//! This module implements the Kreyvium stream cipher, using booleans or FheBool
+//! for the representation of the inner bits.
+
+use crate::static_deque::StaticDeque;
+
+use tfhe::prelude::*;
+use tfhe::{set_server_key, unset_server_key, FheBool, ServerKey};
+
+use rayon::prelude::*;
+
+/// Internal trait specifying which operations are necessary for KreyviumStream generic type
+pub trait KreyviumBoolInput<OpOutput>:
+    Sized
+    + Clone
+    + std::ops::BitXor<Output = OpOutput>
+    + std::ops::BitAnd<Output = OpOutput>
+    + std::ops::Not<Output = OpOutput>
+{
+}
+impl KreyviumBoolInput<bool> for bool {}
+impl KreyviumBoolInput<bool> for &bool {}
+impl KreyviumBoolInput<FheBool> for FheBool {}
+impl KreyviumBoolInput<FheBool> for &FheBool {}
+
+/// KreyviumStream: a struct implementing the Kreyvium stream cipher, using T for the internal
+/// representation of bits (bool or FheBool). To be able to compute FHE operations, it also owns
+/// an Option for a ServerKey.
+pub struct KreyviumStream<T> {
+    a: StaticDeque<93, T>,
+    b: StaticDeque<84, T>,
+    c: StaticDeque<111, T>,
+    k: StaticDeque<128, T>,
+    iv: StaticDeque<128, T>,
+    fhe_key: Option<ServerKey>,
+}
+
+impl KreyviumStream<bool> {
+    /// Constructor for `KreyviumStream<bool>`: arguments are the secret key and the input vector.
+    /// Outputs a KreyviumStream object already initialized (1152 steps have been run before
+    /// returning)
+    pub fn new(mut key: [bool; 128], mut iv: [bool; 128]) -> KreyviumStream<bool> {
+        // Initialization of Kreyvium registers: a has the secret key, b the input vector,
+        // and c a few ones.
+        let mut a_register = [false; 93];
+        let mut b_register = [false; 84];
+        let mut c_register = [false; 111];
+
+        for i in 0..93 {
+            a_register[i] = key[128 - 93 + i];
+        }
+        for i in 0..84 {
+            b_register[i] = iv[128 - 84 + i];
+        }
+        for i in 0..44 {
+            c_register[111 - 44 + i] = iv[i];
+        }
+        for i in 0..66 {
+            c_register[i + 1] = true;
+        }
+
+        key.reverse();
+        iv.reverse();
+        KreyviumStream::<bool>::new_from_registers(
+            a_register, b_register, c_register, key, iv, None,
+        )
+    }
+}
+
+impl KreyviumStream<FheBool> {
+    /// Constructor for `KreyviumStream<FheBool>`: arguments are the encrypted secret key and input
+    /// vector, and the FHE server key.
+    /// Outputs a KreyviumStream object already initialized (1152 steps have been run before
+    /// returning)
+    pub fn new(
+        mut key: [FheBool; 128],
+        mut iv: [bool; 128],
+        sk: &ServerKey,
+    ) -> KreyviumStream<FheBool> {
+        set_server_key(sk.clone());
+
+        // Initialization of Kreyvium registers: a has the secret key, b the input vector,
+        // and c a few ones.
+        let mut a_register = [false; 93].map(FheBool::encrypt_trivial);
+        let mut b_register = [false; 84].map(FheBool::encrypt_trivial);
+        let mut c_register = [false; 111].map(FheBool::encrypt_trivial);
+
+        for i in 0..93 {
+            a_register[i] = key[128 - 93 + i].clone();
+        }
+        for i in 0..84 {
+            b_register[i] = FheBool::encrypt_trivial(iv[128 - 84 + i]);
+        }
+        for i in 0..44 {
+            c_register[111 - 44 + i] = FheBool::encrypt_trivial(iv[i]);
+        }
+        for i in 0..66 {
+            c_register[i + 1] = FheBool::encrypt_trivial(true);
+        }
+
+        key.reverse();
+        iv.reverse();
+        let iv = iv.map(FheBool::encrypt_trivial);
+
+        unset_server_key();
+        KreyviumStream::<FheBool>::new_from_registers(
+            a_register,
+            b_register,
+            c_register,
+            key,
+            iv,
+            Some(sk.clone()),
+        )
+    }
+}
+
+impl<T> KreyviumStream<T>
+where
+    T: KreyviumBoolInput<T> + std::marker::Send + std::marker::Sync,
+    for<'a> &'a T: KreyviumBoolInput<T>,
+{
+    /// Internal generic constructor: arguments are already prepared registers, and an optional FHE
+    /// server key
+    fn new_from_registers(
+        a_register: [T; 93],
+        b_register: [T; 84],
+        c_register: [T; 111],
+        k_register: [T; 128],
+        iv_register: [T; 128],
+        key: Option<ServerKey>,
+    ) -> Self {
+        let mut ret = Self {
+            a: StaticDeque::<93, T>::new(a_register),
+            b: StaticDeque::<84, T>::new(b_register),
+            c: StaticDeque::<111, T>::new(c_register),
+            k: StaticDeque::<128, T>::new(k_register),
+            iv: StaticDeque::<128, T>::new(iv_register),
+            fhe_key: key,
+        };
+        ret.init();
+        ret
+    }
+
+    /// The specification of Kreyvium includes running 1152 (= 18*64) unused steps to mix up the
+    /// registers, before starting the proper stream
+    fn init(&mut self) {
+        for _ in 0..18 {
+            self.next_64();
+        }
+    }
+
+    /// Computes one turn of the stream, updating registers and outputting the new bit.
+    pub fn next_bool(&mut self) -> T {
+        match &self.fhe_key {
+            Some(sk) => set_server_key(sk.clone()),
+            None => (),
+        };
+
+        let [o, a, b, c] = self.get_output_and_values(0);
+
+        self.a.push(a);
+        self.b.push(b);
+        self.c.push(c);
+        self.k.shift();
+        self.iv.shift();
+
+        o
+    }
+
+    /// Computes a potential future step of Kreyvium, n terms in the future. This does not update
+    /// registers, but rather returns with the output, the three values that will be used to
+    /// update the registers, when the time is right. This function is meant to be used in
+    /// parallel.
+    fn get_output_and_values(&self, n: usize) -> [T; 4] {
+        assert!(n < 65);
+
+        let (((temp_a, temp_b), (temp_c, a_and)), (b_and, c_and)) = rayon::join(
+            || {
+                rayon::join(
+                    || {
+                        rayon::join(
+                            || &self.a[65 - n] ^ &self.a[92 - n],
+                            || &self.b[68 - n] ^ &self.b[83 - n],
+                        )
+                    },
+                    || {
+                        rayon::join(
+                            || &(&self.c[65 - n] ^ &self.c[110 - n]) ^ &self.k[127 - n],
+                            || &(&self.a[91 - n] & &self.a[90 - n]) ^ &self.iv[127 - n],
+                        )
+                    },
+                )
+            },
+            || {
+                rayon::join(
+                    || &self.b[82 - n] & &self.b[81 - n],
+                    || &self.c[109 - n] & &self.c[108 - n],
+                )
+            },
+        );
+
+        let ((o, a), (b, c)) = rayon::join(
+            || {
+                rayon::join(
+                    || &(&temp_a ^ &temp_b) ^ &temp_c,
+                    || &temp_c ^ &(&c_and ^ &self.a[68 - n]),
+                )
+            },
+            || {
+                rayon::join(
+                    || &temp_a ^ &(&a_and ^ &self.b[77 - n]),
+                    || &temp_b ^ &(&b_and ^ &self.c[86 - n]),
+                )
+            },
+        );
+
+        [o, a, b, c]
+    }
+
+    /// This calls `get_output_and_values` in parallel 64 times, and stores all results in a Vec.
+    fn get_64_output_and_values(&self) -> Vec<[T; 4]> {
+        (0..64)
+            .into_par_iter()
+            .map(|x| self.get_output_and_values(x))
+            .rev()
+            .collect()
+    }
+
+    /// Computes 64 turns of the stream, outputting the 64 bits all at once in a
+    /// Vec (first value is oldest, last is newest)
+    pub fn next_64(&mut self) -> Vec<T> {
+        match &self.fhe_key {
+            Some(sk) => {
+                rayon::broadcast(|_| set_server_key(sk.clone()));
+            }
+            None => (),
+        }
+        let mut values = self.get_64_output_and_values();
+        match &self.fhe_key {
+            Some(_) => {
+                rayon::broadcast(|_| unset_server_key());
+            }
+            None => (),
+        }
+
+        let mut ret = Vec::<T>::with_capacity(64);
+
+        while let Some([o, a, b, c]) = values.pop() {
+            ret.push(o);
+            self.a.push(a);
+            self.b.push(b);
+            self.c.push(c);
+        }
+        self.k.n_shifts(64);
+        self.iv.n_shifts(64);
+        ret
+    }
+}
--- a/apps/trivium/src/kreyvium/kreyvium_byte.rs
+++ b/apps/trivium/src/kreyvium/kreyvium_byte.rs
@@ -0,0 +1,293 @@
+//! This module implements the Kreyvium stream cipher, using u8 or FheUint8
+//! for the representation of the inner bits.
+
+use crate::static_deque::{StaticByteDeque, StaticByteDequeInput};
+
+use tfhe::prelude::*;
+use tfhe::{set_server_key, unset_server_key, FheUint8, ServerKey};
+
+use rayon::prelude::*;
+
+/// Internal trait specifying which operations are necessary for KreyviumStreamByte generic type
+pub trait KreyviumByteInput<OpOutput>:
+    Sized
+    + Send
+    + Sync
+    + Clone
+    + StaticByteDequeInput<OpOutput>
+    + std::ops::BitXor<Output = OpOutput>
+    + std::ops::BitAnd<Output = OpOutput>
+    + std::ops::Shr<u8, Output = OpOutput>
+    + std::ops::Shl<u8, Output = OpOutput>
+    + std::ops::Add<Output = OpOutput>
+{
+}
+impl KreyviumByteInput<u8> for u8 {}
+impl KreyviumByteInput<u8> for &u8 {}
+impl KreyviumByteInput<FheUint8> for FheUint8 {}
+impl KreyviumByteInput<FheUint8> for &FheUint8 {}
+
+/// KreyviumStreamByte: a struct implementing the Kreyvium stream cipher, using T for the internal
+/// representation of bits (u8 or FheUint8). To be able to compute FHE operations, it also owns
+/// an Option for a ServerKey.
+/// Since the original Kreyvium registers' sizes are not a multiple of 8, these registers (which
+/// store byte-like objects) have a size that is the eighth of the closest multiple of 8 above the
+/// originals' sizes.
+pub struct KreyviumStreamByte<T> {
+    a_byte: StaticByteDeque<12, T>,
+    b_byte: StaticByteDeque<11, T>,
+    c_byte: StaticByteDeque<14, T>,
+    k_byte: StaticByteDeque<16, T>,
+    iv_byte: StaticByteDeque<16, T>,
+    fhe_key: Option<ServerKey>,
+}
+
+impl KreyviumStreamByte<u8> {
+    /// Constructor for `KreyviumStreamByte<u8>`: arguments are the secret key and the input vector.
+    /// Outputs a KreyviumStream object already initialized (1152 steps have been run before
+    /// returning)
+    pub fn new(key_bytes: [u8; 16], iv_bytes: [u8; 16]) -> KreyviumStreamByte<u8> {
+        // Initialization of Kreyvium registers: a has the secret key, b the input vector,
+        // and c a few ones.
+        let mut a_byte_reg = [0u8; 12];
+        let mut b_byte_reg = [0u8; 11];
+        let mut c_byte_reg = [0u8; 14];
+
+        // Copy key bits into a register
+        a_byte_reg.copy_from_slice(&key_bytes[4..]);
+
+        // Copy iv bits into a register
+        b_byte_reg.copy_from_slice(&iv_bytes[5..]);
+
+        // Copy a lot of ones in the c register
+        c_byte_reg[0] = 252;
+        c_byte_reg[1..8].fill(255);
+
+        // Copy iv bits in the c register
+        c_byte_reg[8] = (iv_bytes[0] << 4) | 31;
+        for b in 9..14 {
+            c_byte_reg[b] = (iv_bytes[b - 9] >> 4) | (iv_bytes[b - 8] << 4);
+        }
+
+        // Key and iv are stored in reverse in their shift registers
+        let mut key = key_bytes.map(|b| b.reverse_bits());
+        let mut iv = iv_bytes.map(|b| b.reverse_bits());
+        key.reverse();
+        iv.reverse();
+
+        let mut ret = KreyviumStreamByte::<u8>::new_from_registers(
+            a_byte_reg, b_byte_reg, c_byte_reg, key, iv, None,
+        );
+        ret.init();
+        ret
+    }
+}
+
+impl KreyviumStreamByte<FheUint8> {
+    /// Constructor for `KreyviumStream<FheUint8>`: arguments are the encrypted secret key and input
+    /// vector, and the FHE server key.
+    /// Outputs a KreyviumStream object already initialized (1152 steps have been run before
+    /// returning)
+    pub fn new(
+        key_bytes: [FheUint8; 16],
+        iv_bytes: [u8; 16],
+        server_key: &ServerKey,
+    ) -> KreyviumStreamByte<FheUint8> {
+        set_server_key(server_key.clone());
+
+        // Initialization of Kreyvium registers: a has the secret key, b the input vector,
+        // and c a few ones.
+        let mut a_byte_reg = [0u8; 12].map(FheUint8::encrypt_trivial);
+        let mut b_byte_reg = [0u8; 11].map(FheUint8::encrypt_trivial);
+        let mut c_byte_reg = [0u8; 14].map(FheUint8::encrypt_trivial);
+
+        // Copy key bits into a register
+        a_byte_reg.clone_from_slice(&key_bytes[4..]);
+
+        // Copy iv bits into a register
+        for b in 0..11 {
+            b_byte_reg[b] = FheUint8::encrypt_trivial(iv_bytes[b + 5]);
+        }
+        // Copy a lot of ones in the c register
+        c_byte_reg[0] = FheUint8::encrypt_trivial(252u8);
+
+        c_byte_reg[1..8].fill_with(|| FheUint8::encrypt_trivial(255u8));
+
+        // Copy iv bits in the c register
+        c_byte_reg[8] = FheUint8::encrypt_trivial((&iv_bytes[0] << 4u8) | 31u8);
+        for b in 9..14 {
+            c_byte_reg[b] =
+                FheUint8::encrypt_trivial((&iv_bytes[b - 9] >> 4u8) | (&iv_bytes[b - 8] << 4u8));
+        }
+
+        // Key and iv are stored in reverse in their shift registers
+        let mut key = key_bytes.map(|b| b.map(|x| (x as u8).reverse_bits() as u64));
+        let mut iv = iv_bytes.map(|x| FheUint8::encrypt_trivial(x.reverse_bits()));
+        key.reverse();
+        iv.reverse();
+
+        unset_server_key();
+
+        let mut ret = KreyviumStreamByte::<FheUint8>::new_from_registers(
+            a_byte_reg,
+            b_byte_reg,
+            c_byte_reg,
+            key,
+            iv,
+            Some(server_key.clone()),
+        );
+        ret.init();
+        ret
+    }
+}
+
+impl<T> KreyviumStreamByte<T>
+where
+    T: KreyviumByteInput<T> + Send,
+    for<'a> &'a T: KreyviumByteInput<T>,
+{
+    /// Internal generic constructor: arguments are already prepared registers, and an optional FHE
+    /// server key
+    fn new_from_registers(
+        a_register: [T; 12],
+        b_register: [T; 11],
+        c_register: [T; 14],
+        k_register: [T; 16],
+        iv_register: [T; 16],
+        sk: Option<ServerKey>,
+    ) -> Self {
+        Self {
+            a_byte: StaticByteDeque::<12, T>::new(a_register),
+            b_byte: StaticByteDeque::<11, T>::new(b_register),
+            c_byte: StaticByteDeque::<14, T>::new(c_register),
+            k_byte: StaticByteDeque::<16, T>::new(k_register),
+            iv_byte: StaticByteDeque::<16, T>::new(iv_register),
+            fhe_key: sk,
+        }
+    }
+
+    /// The specification of Kreyvium includes running 1152 (= 18*64) unused steps to mix up the
+    /// registers, before starting the proper stream
+    fn init(&mut self) {
+        for _ in 0..18 {
+            self.next_64();
+        }
+    }
+
+    /// Computes 8 potential future step of Kreyvium, b*8 terms in the future. This does not update
+    /// registers, but rather returns with the output, the three values that will be used to
+    /// update the registers, when the time is right. This function is meant to be used in
+    /// parallel.
+    fn get_output_and_values(&self, b: usize) -> [T; 4] {
+        let n = b * 8 + 7;
+        assert!(n < 65);
+
+        let (((k, iv), (a1, a2, a3, a4, a5)), ((b1, b2, b3, b4, b5), (c1, c2, c3, c4, c5))) =
+            rayon::join(
+                || {
+                    rayon::join(
+                        || (self.k_byte.byte(127 - n), self.iv_byte.byte(127 - n)),
+                        || Self::get_bytes(&self.a_byte, [91 - n, 90 - n, 68 - n, 65 - n, 92 - n]),
+                    )
+                },
+                || {
+                    rayon::join(
+                        || Self::get_bytes(&self.b_byte, [82 - n, 81 - n, 77 - n, 68 - n, 83 - n]),
+                        || {
+                            Self::get_bytes(
+                                &self.c_byte,
+                                [109 - n, 108 - n, 86 - n, 65 - n, 110 - n],
+                            )
+                        },
+                    )
+                },
+            );
+
+        let (((temp_a, temp_b), (temp_c, a_and)), (b_and, c_and)) = rayon::join(
+            || {
+                rayon::join(
+                    || rayon::join(|| a4 ^ a5, || b4 ^ b5),
+                    || rayon::join(|| c4 ^ c5 ^ k, || a1 & a2 ^ iv),
+                )
+            },
+            || rayon::join(|| b1 & b2, || c1 & c2),
+        );
+
+        let (temp_a_2, temp_b_2, temp_c_2) = (temp_a.clone(), temp_b.clone(), temp_c.clone());
+
+        let ((o, a), (b, c)) = rayon::join(
+            || {
+                rayon::join(
+                    || (temp_a_2 ^ temp_b_2) ^ temp_c_2,
+                    || temp_c ^ ((c_and) ^ a3),
+                )
+            },
+            || rayon::join(|| temp_a ^ (a_and ^ b3), || temp_b ^ (b_and ^ c3)),
+        );
+
+        [o, a, b, c]
+    }
+
+    /// This calls `get_output_and_values` in parallel 8 times, and stores all results in a Vec.
+    fn get_64_output_and_values(&self) -> Vec<[T; 4]> {
+        (0..8)
+            .into_par_iter()
+            .map(|i| self.get_output_and_values(i))
+            .collect()
+    }
+
+    /// Computes 64 turns of the stream, outputting the 64 bits (in 8 bytes) all at once in a
+    /// Vec (first value is oldest, last is newest)
+    pub fn next_64(&mut self) -> Vec<T> {
+        match &self.fhe_key {
+            Some(sk) => {
+                rayon::broadcast(|_| set_server_key(sk.clone()));
+            }
+            None => (),
+        }
+        let values = self.get_64_output_and_values();
+        match &self.fhe_key {
+            Some(_) => {
+                rayon::broadcast(|_| unset_server_key());
+            }
+            None => (),
+        }
+
+        let mut bytes = Vec::<T>::with_capacity(8);
+        for [o, a, b, c] in values {
+            self.a_byte.push(a);
+            self.b_byte.push(b);
+            self.c_byte.push(c);
+            bytes.push(o);
+        }
+        self.k_byte.n_shifts(8);
+        self.iv_byte.n_shifts(8);
+
+        bytes
+    }
+
+    /// Reconstructs a bunch of 5 bytes in a parallel fashion.
+    fn get_bytes<const N: usize>(
+        reg: &StaticByteDeque<N, T>,
+        offsets: [usize; 5],
+    ) -> (T, T, T, T, T) {
+        let mut ret = offsets
+            .par_iter()
+            .rev()
+            .map(|&i| reg.byte(i))
+            .collect::<Vec<_>>();
+        (
+            ret.pop().unwrap(),
+            ret.pop().unwrap(),
+            ret.pop().unwrap(),
+            ret.pop().unwrap(),
+            ret.pop().unwrap(),
+        )
+    }
+}
+
+impl KreyviumStreamByte<FheUint8> {
+    pub fn get_server_key(&self) -> &ServerKey {
+        self.fhe_key.as_ref().unwrap()
+    }
+}
--- a/apps/trivium/src/kreyvium/kreyvium_shortint.rs
+++ b/apps/trivium/src/kreyvium/kreyvium_shortint.rs
@@ -0,0 +1,205 @@
+use crate::static_deque::StaticDeque;
+
+use tfhe::shortint::prelude::*;
+
+use rayon::prelude::*;
+
+/// KreyviumStreamShortint: a struct implementing the Kreyvium stream cipher, using a generic
+/// Ciphertext for the internal representation of bits (intended to represent a single bit). To be
+/// able to compute FHE operations, it also owns a ServerKey.
+pub struct KreyviumStreamShortint {
+    a: StaticDeque<93, Ciphertext>,
+    b: StaticDeque<84, Ciphertext>,
+    c: StaticDeque<111, Ciphertext>,
+    k: StaticDeque<128, Ciphertext>,
+    iv: StaticDeque<128, Ciphertext>,
+    internal_server_key: ServerKey,
+    transciphering_casting_key: KeySwitchingKey,
+    hl_server_key: tfhe::ServerKey,
+}
+
+impl KreyviumStreamShortint {
+    /// Constructor for KreyviumStreamShortint: arguments are the secret key and the input vector,
+    /// and a ServerKey reference. Outputs a KreyviumStream object already initialized (1152
+    /// steps have been run before returning)
+    pub fn new(
+        mut key: [Ciphertext; 128],
+        mut iv: [u64; 128],
+        sk: ServerKey,
+        ksk: KeySwitchingKey,
+        hl_sk: tfhe::ServerKey,
+    ) -> Self {
+        // Initialization of Kreyvium registers: a has the secret key, b the input vector,
+        // and c a few ones.
+        let mut a_register: [Ciphertext; 93] = [0; 93].map(|x| sk.create_trivial(x));
+        let mut b_register: [Ciphertext; 84] = [0; 84].map(|x| sk.create_trivial(x));
+        let mut c_register: [Ciphertext; 111] = [0; 111].map(|x| sk.create_trivial(x));
+
+        for i in 0..93 {
+            a_register[i] = key[128 - 93 + i].clone();
+        }
+        for i in 0..84 {
+            b_register[i] = sk.create_trivial(iv[128 - 84 + i]);
+        }
+        for i in 0..44 {
+            c_register[111 - 44 + i] = sk.create_trivial(iv[i]);
+        }
+        for i in 0..66 {
+            c_register[i + 1] = sk.create_trivial(1);
+        }
+
+        key.reverse();
+        iv.reverse();
+        let iv = iv.map(|x| sk.create_trivial(x));
+
+        let mut ret = Self {
+            a: StaticDeque::<93, Ciphertext>::new(a_register),
+            b: StaticDeque::<84, Ciphertext>::new(b_register),
+            c: StaticDeque::<111, Ciphertext>::new(c_register),
+            k: StaticDeque::<128, Ciphertext>::new(key),
+            iv: StaticDeque::<128, Ciphertext>::new(iv),
+            internal_server_key: sk,
+            transciphering_casting_key: ksk,
+            hl_server_key: hl_sk,
+        };
+        ret.init();
+        ret
+    }
+
+    /// The specification of Kreyvium includes running 1152 (= 18*64) unused steps to mix up the
+    /// registers, before starting the proper stream
+    fn init(&mut self) {
+        for _ in 0..18 {
+            self.next_64();
+        }
+    }
+
+    /// Computes one turn of the stream, updating registers and outputting the new bit.
+    pub fn next_ct(&mut self) -> Ciphertext {
+        let [o, a, b, c] = self.get_output_and_values(0);
+
+        self.a.push(a);
+        self.b.push(b);
+        self.c.push(c);
+
+        o
+    }
+
+    /// Computes a potential future step of Kreyvium, n terms in the future. This does not update
+    /// registers, but rather returns with the output, the three values that will be used to
+    /// update the registers, when the time is right. This function is meant to be used in
+    /// parallel.
+    fn get_output_and_values(&self, n: usize) -> [Ciphertext; 4] {
+        let (k, iv) = (&self.k[127 - n], &self.iv[127 - n]);
+
+        let (a1, a2, a3, a4, a5) = (
+            &self.a[65 - n],
+            &self.a[92 - n],
+            &self.a[91 - n],
+            &self.a[90 - n],
+            &self.a[68 - n],
+        );
+        let (b1, b2, b3, b4, b5) = (
+            &self.b[68 - n],
+            &self.b[83 - n],
+            &self.b[82 - n],
+            &self.b[81 - n],
+            &self.b[77 - n],
+        );
+        let (c1, c2, c3, c4, c5) = (
+            &self.c[65 - n],
+            &self.c[110 - n],
+            &self.c[109 - n],
+            &self.c[108 - n],
+            &self.c[86 - n],
+        );
+
+        let temp_a = self.internal_server_key.unchecked_add(a1, a2);
+        let temp_b = self.internal_server_key.unchecked_add(b1, b2);
+        let mut temp_c = self.internal_server_key.unchecked_add(c1, c2);
+        self.internal_server_key
+            .unchecked_add_assign(&mut temp_c, k);
+
+        let ((new_a, new_b), (new_c, o)) = rayon::join(
+            || {
+                rayon::join(
+                    || {
+                        let mut new_a = self.internal_server_key.unchecked_bitand(c3, c4);
+                        self.internal_server_key
+                            .unchecked_add_assign(&mut new_a, a5);
+                        self.internal_server_key.add_assign(&mut new_a, &temp_c);
+                        new_a
+                    },
+                    || {
+                        let mut new_b = self.internal_server_key.unchecked_bitand(a3, a4);
+                        self.internal_server_key
+                            .unchecked_add_assign(&mut new_b, b5);
+                        self.internal_server_key
+                            .unchecked_add_assign(&mut new_b, &temp_a);
+                        self.internal_server_key.add_assign(&mut new_b, iv);
+                        new_b
+                    },
+                )
+            },
+            || {
+                rayon::join(
+                    || {
+                        let mut new_c = self.internal_server_key.unchecked_bitand(b3, b4);
+                        self.internal_server_key
+                            .unchecked_add_assign(&mut new_c, c5);
+                        self.internal_server_key
+                            .unchecked_add_assign(&mut new_c, &temp_b);
+                        self.internal_server_key.message_extract_assign(&mut new_c);
+                        new_c
+                    },
+                    || {
+                        self.internal_server_key.bitxor(
+                            &self.internal_server_key.unchecked_add(&temp_a, &temp_b),
+                            &temp_c,
+                        )
+                    },
+                )
+            },
+        );
+
+        [o, new_a, new_b, new_c]
+    }
+
+    /// This calls `get_output_and_values` in parallel 64 times, and stores all results in a Vec.
+    fn get_64_output_and_values(&self) -> Vec<[Ciphertext; 4]> {
+        (0..64)
+            .into_par_iter()
+            .map(|x| self.get_output_and_values(x))
+            .rev()
+            .collect()
+    }
+
+    /// Computes 64 turns of the stream, outputting the 64 bits all at once in a
+    /// Vec (first value is oldest, last is newest)
+    pub fn next_64(&mut self) -> Vec<Ciphertext> {
+        let mut values = self.get_64_output_and_values();
+
+        let mut ret = Vec::<Ciphertext>::with_capacity(64);
+        while let Some([o, a, b, c]) = values.pop() {
+            ret.push(o);
+            self.a.push(a);
+            self.b.push(b);
+            self.c.push(c);
+        }
+        self.k.n_shifts(64);
+        self.iv.n_shifts(64);
+        ret
+    }
+
+    pub fn get_internal_server_key(&self) -> &ServerKey {
+        &self.internal_server_key
+    }
+
+    pub fn get_casting_key(&self) -> &KeySwitchingKey {
+        &self.transciphering_casting_key
+    }
+
+    pub fn get_hl_server_key(&self) -> &tfhe::ServerKey {
+        &self.hl_server_key
+    }
+}
--- a/apps/trivium/src/kreyvium/mod.rs
+++ b/apps/trivium/src/kreyvium/mod.rs
@@ -0,0 +1,12 @@
+#[allow(clippy::module_inception)]
+mod kreyvium;
+pub use kreyvium::KreyviumStream;
+
+mod kreyvium_byte;
+pub use kreyvium_byte::KreyviumStreamByte;
+
+mod kreyvium_shortint;
+pub use kreyvium_shortint::KreyviumStreamShortint;
+
+#[cfg(test)]
+mod test;
--- a/apps/trivium/src/kreyvium/test.rs
+++ b/apps/trivium/src/kreyvium/test.rs
@@ -0,0 +1,374 @@
+use tfhe::prelude::*;
+use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
+
+use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
+
+// Values for these tests come from the github repo renaud1239/Kreyvium,
+// commit fd6828f68711276c25f55e605935028f5e843f43
+
+fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
+    assert!(a.len() % 8 == 0);
+    let mut hexadecimal: String = "".to_string();
+    for test in a.chunks(8) {
+        // Encoding is bytes in LSB order
+        match test[4..8] {
+            [false, false, false, false] => hexadecimal.push('0'),
+            [true, false, false, false] => hexadecimal.push('1'),
+            [false, true, false, false] => hexadecimal.push('2'),
+            [true, true, false, false] => hexadecimal.push('3'),
+
+            [false, false, true, false] => hexadecimal.push('4'),
+            [true, false, true, false] => hexadecimal.push('5'),
+            [false, true, true, false] => hexadecimal.push('6'),
+            [true, true, true, false] => hexadecimal.push('7'),
+
+            [false, false, false, true] => hexadecimal.push('8'),
+            [true, false, false, true] => hexadecimal.push('9'),
+            [false, true, false, true] => hexadecimal.push('A'),
+            [true, true, false, true] => hexadecimal.push('B'),
+
+            [false, false, true, true] => hexadecimal.push('C'),
+            [true, false, true, true] => hexadecimal.push('D'),
+            [false, true, true, true] => hexadecimal.push('E'),
+            [true, true, true, true] => hexadecimal.push('F'),
+            _ => (),
+        };
+        match test[0..4] {
+            [false, false, false, false] => hexadecimal.push('0'),
+            [true, false, false, false] => hexadecimal.push('1'),
+            [false, true, false, false] => hexadecimal.push('2'),
+            [true, true, false, false] => hexadecimal.push('3'),
+
+            [false, false, true, false] => hexadecimal.push('4'),
+            [true, false, true, false] => hexadecimal.push('5'),
+            [false, true, true, false] => hexadecimal.push('6'),
+            [true, true, true, false] => hexadecimal.push('7'),
+
+            [false, false, false, true] => hexadecimal.push('8'),
+            [true, false, false, true] => hexadecimal.push('9'),
+            [false, true, false, true] => hexadecimal.push('A'),
+            [true, true, false, true] => hexadecimal.push('B'),
+
+            [false, false, true, true] => hexadecimal.push('C'),
+            [true, false, true, true] => hexadecimal.push('D'),
+            [false, true, true, true] => hexadecimal.push('E'),
+            [true, true, true, true] => hexadecimal.push('F'),
+            _ => (),
+        };
+    }
+    hexadecimal
+}
+
+fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
+    assert!(a.len() % 8 == 0);
+    let mut hexadecimal: String = "".to_string();
+    for test in a {
+        hexadecimal.push_str(&format!("{:02X?}", test));
+    }
+    hexadecimal
+}
+
+fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
+    let mut hexadecimal: String = "".to_string();
+    for test in a {
+        hexadecimal.push_str(&format!("{:016X?}", test));
+    }
+    hexadecimal
+}
+
+#[test]
+fn kreyvium_test_1() {
+    let key = [false; 128];
+    let iv = [false; 128];
+    let output = "26DCF1F4BC0F1922";
+
+    let mut kreyvium = KreyviumStream::<bool>::new(key, iv);
+
+    let mut vec = Vec::<bool>::with_capacity(64);
+    while vec.len() < 64 {
+        vec.push(kreyvium.next_bool());
+    }
+
+    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+    assert_eq!(output, hexadecimal);
+}
+
+#[test]
+fn kreyvium_test_2() {
+    let mut key = [false; 128];
+    let iv = [false; 128];
+    key[0] = true;
+
+    let output = "4FD421D4DA3D2C8A";
+
+    let mut kreyvium = KreyviumStream::<bool>::new(key, iv);
+
+    let mut vec = Vec::<bool>::with_capacity(64);
+    while vec.len() < 64 {
+        vec.push(kreyvium.next_bool());
+    }
+
+    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+    assert_eq!(output, hexadecimal);
+}
+
+#[test]
+fn kreyvium_test_3() {
+    let key = [false; 128];
+    let mut iv = [false; 128];
+    iv[0] = true;
+
+    let output = "C9217BA0D762ACA1";
+
+    let mut kreyvium = KreyviumStream::<bool>::new(key, iv);
+
+    let mut vec = Vec::<bool>::with_capacity(64);
+    while vec.len() < 64 {
+        vec.push(kreyvium.next_bool());
+    }
+
+    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+    assert_eq!(output, hexadecimal);
+}
+
+#[test]
+fn kreyvium_test_4() {
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [false; 128];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [false; 128];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let output = "D1F0303482061111";
+
+    let mut kreyvium = KreyviumStream::<bool>::new(key, iv);
+
+    let mut vec = Vec::<bool>::with_capacity(64);
+    while vec.len() < 64 {
+        vec.push(kreyvium.next_bool());
+    }
+
+    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+    assert_eq!(hexadecimal, output);
+}
+
+#[test]
+fn kreyvium_test_fhe_long() {
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [false; 128];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [false; 128];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let output = "D1F0303482061111";
+
+    let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
+
+    let mut kreyvium = KreyviumStream::<FheBool>::new(cipher_key, iv, &server_key);
+
+    let mut vec = Vec::<bool>::with_capacity(64);
+    while vec.len() < 64 {
+        let cipher_outputs = kreyvium.next_64();
+        for c in cipher_outputs {
+            vec.push(c.decrypt(&client_key))
+        }
+    }
+
+    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+    assert_eq!(output, hexadecimal);
+}
+
+use tfhe::shortint::prelude::*;
+
+#[test]
+fn kreyvium_test_shortint_long() {
+    let config = ConfigBuilder::default().build();
+    let (hl_client_key, hl_server_key) = generate_keys(config);
+    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
+    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
+
+    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
+
+    let ksk = KeySwitchingKey::new(
+        (&client_key, &server_key),
+        (&underlying_ck, &underlying_sk),
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+    );
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [0; 128];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [0; 128];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+    let output = "D1F0303482061111".to_string();
+
+    let cipher_key = key.map(|x| client_key.encrypt(x));
+
+    let ciphered_message = FheUint64::try_encrypt(0u64, &hl_client_key).unwrap();
+
+    let mut kreyvium = KreyviumStreamShortint::new(cipher_key, iv, server_key, ksk, hl_server_key);
+
+    let trans_ciphered_message = kreyvium.trans_encrypt_64(ciphered_message);
+    let ciphered_message = trans_ciphered_message.decrypt(&hl_client_key);
+
+    let hexadecimal = get_hexagonal_string_from_u64(vec![ciphered_message]);
+    assert_eq!(output, hexadecimal);
+}
+
+#[test]
+fn kreyvium_test_clear_byte() {
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key_bytes = [0u8; 16];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key_bytes[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv_bytes = [0u8; 16];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv_bytes[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    let output = "D1F0303482061111".to_string();
+
+    let mut kreyvium = KreyviumStreamByte::<u8>::new(key_bytes, iv_bytes);
+
+    let mut vec = Vec::<u8>::with_capacity(8);
+    while vec.len() < 8 {
+        let outputs = kreyvium.next_64();
+        for c in outputs {
+            vec.push(c)
+        }
+    }
+
+    let hexadecimal = get_hexagonal_string_from_bytes(vec);
+    assert_eq!(output, hexadecimal);
+}
+
+#[test]
+fn kreyvium_test_byte_long() {
+    let config = ConfigBuilder::default()
+        .enable_function_evaluation()
+        .build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key_bytes = [0u8; 16];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key_bytes[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv_bytes = [0u8; 16];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv_bytes[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    let cipher_key = key_bytes.map(|x| FheUint8::encrypt(x, &client_key));
+
+    let output = "D1F0303482061111".to_string();
+
+    let mut kreyvium = KreyviumStreamByte::<FheUint8>::new(cipher_key, iv_bytes, &server_key);
+
+    let mut vec = Vec::<u8>::with_capacity(8);
+    while vec.len() < 8 {
+        let cipher_outputs = kreyvium.next_64();
+        for c in cipher_outputs {
+            vec.push(c.decrypt(&client_key))
+        }
+    }
+
+    let hexadecimal = get_hexagonal_string_from_bytes(vec);
+    assert_eq!(output, hexadecimal);
+}
+
+#[test]
+fn kreyvium_test_fhe_byte_transciphering_long() {
+    let config = ConfigBuilder::default()
+        .enable_function_evaluation()
+        .build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
+    let mut key = [0u8; 16];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC000000000000".to_string();
+    let mut iv = [0u8; 16];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    let output = "D1F0303482061111".to_string();
+
+    let cipher_key = key.map(|x| FheUint8::encrypt(x, &client_key));
+
+    let ciphered_message = FheUint64::try_encrypt(0u64, &client_key).unwrap();
+
+    let mut kreyvium = KreyviumStreamByte::<FheUint8>::new(cipher_key, iv, &server_key);
+
+    let trans_ciphered_message = kreyvium.trans_encrypt_64(ciphered_message);
+    let ciphered_message = trans_ciphered_message.decrypt(&client_key);
+
+    let hexadecimal = get_hexagonal_string_from_u64(vec![ciphered_message]);
+    assert_eq!(output, hexadecimal);
+}
--- a/apps/trivium/src/lib.rs
+++ b/apps/trivium/src/lib.rs
@@ -0,0 +1,10 @@
+mod static_deque;
+
+mod kreyvium;
+pub use kreyvium::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint};
+
+mod trivium;
+pub use trivium::{TriviumStream, TriviumStreamByte, TriviumStreamShortint};
+
+mod trans_ciphering;
+pub use trans_ciphering::TransCiphering;
--- a/apps/trivium/src/static_deque/mod.rs
+++ b/apps/trivium/src/static_deque/mod.rs
@@ -0,0 +1,5 @@
+#[allow(clippy::module_inception)]
+mod static_deque;
+pub use static_deque::StaticDeque;
+mod static_byte_deque;
+pub use static_byte_deque::{StaticByteDeque, StaticByteDequeInput};
--- a/apps/trivium/src/static_deque/static_byte_deque.rs
+++ b/apps/trivium/src/static_deque/static_byte_deque.rs
@@ -0,0 +1,141 @@
+//! This module implements the StaticByteDeque struct: a deque of bytes. The idea
+//! is that this is a wrapper around StaticDeque, but StaticByteDeque has an additional
+//! functionality: it can construct the "intermediate" bytes, made of parts of other bytes.
+//! This is pretending to store bits, and allows accessing bits in chunks of 8 consecutive.
+
+use crate::static_deque::StaticDeque;
+
+use tfhe::FheUint8;
+
+/// Internal trait specifying which operations are needed by StaticByteDeque
+pub trait StaticByteDequeInput<OpOutput>:
+    Clone
+    + std::ops::Shr<u8, Output = OpOutput>
+    + std::ops::Shl<u8, Output = OpOutput>
+    + std::ops::BitOr<Output = OpOutput>
+{
+}
+impl StaticByteDequeInput<u8> for u8 {}
+impl StaticByteDequeInput<u8> for &u8 {}
+impl StaticByteDequeInput<FheUint8> for FheUint8 {}
+impl StaticByteDequeInput<FheUint8> for &FheUint8 {}
+
+/// Here T must represent a type covering a byte, like u8 or FheUint8.
+#[derive(Clone)]
+pub struct StaticByteDeque<const N: usize, T> {
+    deque: StaticDeque<N, T>,
+}
+
+impl<const N: usize, T> StaticByteDeque<N, T>
+where
+    T: StaticByteDequeInput<T>,
+    for<'a> &'a T: StaticByteDequeInput<T>,
+{
+    /// Constructor always uses a fully initialized array, the first element of
+    /// which is oldest, the last is newest
+    pub fn new(_arr: [T; N]) -> Self {
+        Self {
+            deque: StaticDeque::<N, T>::new(_arr),
+        }
+    }
+
+    /// Elements are pushed via a byte element (covering 8 underlying bits)
+    pub fn push(&mut self, val: T) {
+        self.deque.push(val)
+    }
+
+    /// computes n shift in a row
+    pub fn n_shifts(&mut self, n: usize) {
+        self.deque.n_shifts(n);
+    }
+
+    /// Getter for the internal memory
+    #[allow(dead_code)]
+    fn get_arr(&self) -> &[T; N] {
+        self.deque.get_arr()
+    }
+
+    /// This returns a byte full of zeros, except maybe a one
+    /// at the specified location, if it is present in the deque
+    #[allow(dead_code)]
+    fn bit(&self, i: usize) -> T
+    where
+        for<'a> &'a T: std::ops::BitAnd<u8, Output = T>,
+    {
+        let byte: &T = &self.deque[i / 8];
+        let bit_selector: u8 = 1u8 << (i % 8);
+        byte & bit_selector
+    }
+
+    /// This function reconstructs an intermediate byte if necessary
+    pub fn byte(&self, i: usize) -> T {
+        let byte: &T = &self.deque[i / 8];
+        let bit_idx: u8 = (i % 8) as u8;
+
+        if bit_idx == 0 {
+            return byte.clone();
+        }
+
+        let byte_next: &T = &self.deque[i / 8 + 1];
+        (byte << bit_idx) | (byte_next >> (8 - bit_idx))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::static_deque::StaticByteDeque;
+
+    #[test]
+    fn byte_deque_test() {
+        let mut deque = StaticByteDeque::<3, u8>::new([2, 64, 128]);
+        deque.push(4);
+
+        // Youngest: 4
+        assert!(deque.bit(0) == 0);
+        assert!(deque.bit(1) == 0);
+        assert!(deque.bit(2) > 0);
+        assert!(deque.bit(3) == 0);
+        assert!(deque.bit(4) == 0);
+        assert!(deque.bit(5) == 0);
+        assert!(deque.bit(6) == 0);
+        assert!(deque.bit(7) == 0);
+
+        // second youngest: 128
+        assert!(deque.bit(8) == 0);
+        assert!(deque.bit(8 + 1) == 0);
+        assert!(deque.bit(8 + 2) == 0);
+        assert!(deque.bit(8 + 3) == 0);
+        assert!(deque.bit(8 + 4) == 0);
+        assert!(deque.bit(8 + 5) == 0);
+        assert!(deque.bit(8 + 6) == 0);
+        assert!(deque.bit(8 + 7) > 0);
+
+        // oldest: 64
+        assert!(deque.bit(16) == 0);
+        assert!(deque.bit(16 + 1) == 0);
+        assert!(deque.bit(16 + 2) == 0);
+        assert!(deque.bit(16 + 3) == 0);
+        assert!(deque.bit(16 + 4) == 0);
+        assert!(deque.bit(16 + 5) == 0);
+        assert!(deque.bit(16 + 6) > 0);
+        assert!(deque.bit(16 + 7) == 0);
+
+        assert_eq!(deque.byte(0), 4u8);
+        assert_eq!(deque.byte(1), 9u8);
+        assert_eq!(deque.byte(2), 18u8);
+        assert_eq!(deque.byte(3), 36u8);
+        assert_eq!(deque.byte(4), 72u8);
+        assert_eq!(deque.byte(5), 144u8);
+        assert_eq!(deque.byte(6), 32u8);
+        assert_eq!(deque.byte(7), 64u8);
+        assert_eq!(deque.byte(8), 128u8);
+        assert_eq!(deque.byte(9), 0u8);
+        assert_eq!(deque.byte(10), 1u8);
+        assert_eq!(deque.byte(11), 2u8);
+        assert_eq!(deque.byte(12), 4u8);
+        assert_eq!(deque.byte(13), 8u8);
+        assert_eq!(deque.byte(14), 16u8);
+        assert_eq!(deque.byte(15), 32u8);
+        assert_eq!(deque.byte(16), 64u8);
+    }
+}
--- a/apps/trivium/src/static_deque/static_deque.rs
+++ b/apps/trivium/src/static_deque/static_deque.rs
@@ -0,0 +1,135 @@
+//! This module implements the StaticDeque struct: a deque utility whose size
+//! is known at compile time. Construction, push, and indexing are publicly
+//! available.
+
+use core::ops::{Index, IndexMut};
+
+/// StaticDeque: a struct implementing a deque whose size is known at compile time.
+/// It has 2 members: the static array containing the data (never empty), and a cursor
+/// equal to the index of the oldest element (and the next one to be overwritten).
+#[derive(Clone)]
+pub struct StaticDeque<const N: usize, T> {
+    arr: [T; N],
+    cursor: usize,
+}
+
+impl<const N: usize, T> StaticDeque<N, T> {
+    /// Constructor always uses a fully initialized array, the first element of
+    /// which is oldest, the last is newest
+    pub fn new(_arr: [T; N]) -> Self {
+        Self {
+            arr: _arr,
+            cursor: 0,
+        }
+    }
+
+    /// Push a new element to the deque, overwriting the oldest at the same time.
+    pub fn push(&mut self, val: T) {
+        self.arr[self.cursor] = val;
+        self.shift();
+    }
+
+    /// Shift: equivalent to pushing the oldest element
+    pub fn shift(&mut self) {
+        self.n_shifts(1);
+    }
+
+    /// computes n shift in a row
+    pub fn n_shifts(&mut self, n: usize) {
+        self.cursor += n;
+        self.cursor %= N;
+    }
+
+    /// Getter for the internal memory
+    #[allow(dead_code)]
+    pub fn get_arr(&self) -> &[T; N] {
+        &self.arr
+    }
+}
+
+/// Index trait for the StaticDeque: 0 is the youngest element, N-1 is the oldest,
+/// and above N will panic.
+impl<const N: usize, T> Index<usize> for StaticDeque<N, T> {
+    type Output = T;
+
+    /// 0 is youngest
+    fn index(&self, i: usize) -> &T {
+        if i >= N {
+            panic!("Index {:?} too high for size {:?}", i, N);
+        }
+        &self.arr[(N + self.cursor - i - 1) % N]
+    }
+}
+/// IndexMut trait for the StaticDeque: 0 is the youngest element, N-1 is the oldest,
+/// and above N will panic.
+impl<const N: usize, T> IndexMut<usize> for StaticDeque<N, T> {
+    /// 0 is youngest
+    fn index_mut(&mut self, i: usize) -> &mut T {
+        if i >= N {
+            panic!("Index {:?} too high for size {:?}", i, N);
+        }
+        &mut self.arr[(N + self.cursor - i - 1) % N]
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::static_deque::StaticDeque;
+
+    #[test]
+    fn test_static_deque() {
+        let a = [1, 2, 3, 4, 5, 6];
+
+        let mut static_deque = StaticDeque::new(a);
+        for i in 7..11 {
+            static_deque.push(i);
+        }
+        assert_eq!(*static_deque.get_arr(), [7, 8, 9, 10, 5, 6]);
+
+        for i in 11..15 {
+            static_deque.push(i);
+        }
+        assert_eq!(*static_deque.get_arr(), [13, 14, 9, 10, 11, 12]);
+
+        assert_eq!(static_deque[0], 14);
+        assert_eq!(static_deque[1], 13);
+        assert_eq!(static_deque[2], 12);
+        assert_eq!(static_deque[3], 11);
+        assert_eq!(static_deque[4], 10);
+        assert_eq!(static_deque[5], 9);
+    }
+
+    #[test]
+    fn test_static_deque_indexmut() {
+        let a = [1, 2, 3, 4, 5, 6];
+
+        let mut static_deque = StaticDeque::new(a);
+        for i in 7..11 {
+            static_deque.push(i);
+        }
+        assert_eq!(*static_deque.get_arr(), [7, 8, 9, 10, 5, 6]);
+
+        for i in 11..15 {
+            static_deque.push(i);
+        }
+        assert_eq!(*static_deque.get_arr(), [13, 14, 9, 10, 11, 12]);
+
+        static_deque[1] = 100;
+
+        assert_eq!(static_deque[0], 14);
+        assert_eq!(static_deque[1], 100);
+        assert_eq!(static_deque[2], 12);
+        assert_eq!(static_deque[3], 11);
+        assert_eq!(static_deque[4], 10);
+        assert_eq!(static_deque[5], 9);
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_static_deque_index_fail() {
+        let a = [1, 2, 3, 4, 5, 6];
+
+        let static_deque = StaticDeque::new(a);
+        let _ = static_deque[6];
+    }
+}
--- a/apps/trivium/src/trans_ciphering/mod.rs
+++ b/apps/trivium/src/trans_ciphering/mod.rs
@@ -0,0 +1,119 @@
+//! This module will contain extensions of some TriviumStream of KreyviumStream objects,
+//! when trans ciphering is available to them.
+
+use crate::{KreyviumStreamByte, KreyviumStreamShortint, TriviumStreamByte, TriviumStreamShortint};
+use tfhe::shortint::Ciphertext;
+
+use tfhe::prelude::*;
+use tfhe::{set_server_key, unset_server_key, FheUint64, FheUint8, ServerKey};
+
+use rayon::prelude::*;
+
+/// Triat specifying the interface for trans ciphering a FheUint64 object. Since it is meant
+/// to be used with stream ciphers, encryption and decryption are by default the same.
+pub trait TransCiphering {
+    fn trans_encrypt_64(&mut self, cipher: FheUint64) -> FheUint64;
+    fn trans_decrypt_64(&mut self, cipher: FheUint64) -> FheUint64 {
+        self.trans_encrypt_64(cipher)
+    }
+}
+
+fn transcipher_from_fheu8_stream(
+    stream: Vec<FheUint8>,
+    cipher: FheUint64,
+    fhe_server_key: &ServerKey,
+) -> FheUint64 {
+    assert_eq!(stream.len(), 8);
+
+    set_server_key(fhe_server_key.clone());
+    rayon::broadcast(|_| set_server_key(fhe_server_key.clone()));
+
+    let ret: FheUint64 = stream
+        .into_par_iter()
+        .enumerate()
+        .map(|(i, x)| &cipher ^ &(FheUint64::cast_from(x) << (8 * (7 - i) as u8)))
+        .reduce_with(|a, b| a | b)
+        .unwrap();
+
+    unset_server_key();
+    rayon::broadcast(|_| unset_server_key());
+
+    ret
+}
+
+fn transcipher_from_1_1_stream(
+    stream: Vec<Ciphertext>,
+    cipher: FheUint64,
+    hl_server_key: &ServerKey,
+    internal_server_key: &tfhe::shortint::ServerKey,
+    casting_key: &tfhe::shortint::KeySwitchingKey,
+) -> FheUint64 {
+    assert_eq!(stream.len(), 64);
+
+    let pairs = (0..32)
+        .into_par_iter()
+        .map(|i| {
+            let byte_idx = 7 - i / 4;
+            let pair_idx = i % 4;
+
+            let b0 = &stream[8 * byte_idx + 2 * pair_idx];
+            let b1 = &stream[8 * byte_idx + 2 * pair_idx + 1];
+
+            casting_key.cast(
+                &internal_server_key
+                    .unchecked_add(b0, &internal_server_key.unchecked_scalar_mul(b1, 2)),
+            )
+        })
+        .collect::<Vec<_>>();
+
+    set_server_key(hl_server_key.clone());
+    let ret = &cipher ^ &FheUint64::try_from(pairs).unwrap();
+    unset_server_key();
+    ret
+}
+
+impl TransCiphering for TriviumStreamByte<FheUint8> {
+    /// `TriviumStreamByte<FheUint8>`: since a full step outputs 8 bytes, these bytes
+    /// are each shifted by a number in [0, 8), and XORed with the input cipher
+    fn trans_encrypt_64(&mut self, cipher: FheUint64) -> FheUint64 {
+        transcipher_from_fheu8_stream(self.next_64(), cipher, self.get_server_key())
+    }
+}
+
+impl TransCiphering for KreyviumStreamByte<FheUint8> {
+    /// `KreyviumStreamByte<FheUint8>`: since a full step outputs 8 bytes, these bytes
+    /// are each shifted by a number in [0, 8), and XORed with the input cipher
+    fn trans_encrypt_64(&mut self, cipher: FheUint64) -> FheUint64 {
+        transcipher_from_fheu8_stream(self.next_64(), cipher, self.get_server_key())
+    }
+}
+
+impl TransCiphering for TriviumStreamShortint {
+    /// TriviumStreamShortint: since a full step outputs 64 shortints, these bits
+    /// are paired 2 by 2 in the HL parameter space and packed in a full word,
+    /// and XORed with the input cipher
+    fn trans_encrypt_64(&mut self, cipher: FheUint64) -> FheUint64 {
+        transcipher_from_1_1_stream(
+            self.next_64(),
+            cipher,
+            self.get_hl_server_key(),
+            self.get_internal_server_key(),
+            self.get_casting_key(),
+        )
+    }
+}
+
+impl TransCiphering for KreyviumStreamShortint {
+    /// KreyviumStreamShortint: since a full step outputs 64 shortints, these bits
+    /// are paired 2 by 2 in the HL parameter space and packed in a full word,
+    /// and XORed with the input cipher
+    fn trans_encrypt_64(&mut self, cipher: FheUint64) -> FheUint64 {
+        transcipher_from_1_1_stream(
+            self.next_64(),
+            cipher,
+            self.get_hl_server_key(),
+            self.get_internal_server_key(),
+            self.get_casting_key(),
+        )
+    }
+}
--- a/apps/trivium/src/trivium/mod.rs
+++ b/apps/trivium/src/trivium/mod.rs
@@ -0,0 +1,11 @@
+mod trivium_bool;
+pub use trivium_bool::TriviumStream;
+
+mod trivium_byte;
+pub use trivium_byte::TriviumStreamByte;
+
+mod trivium_shortint;
+pub use trivium_shortint::TriviumStreamShortint;
+
+#[cfg(test)]
+mod test;
--- a/apps/trivium/src/trivium/test.rs
+++ b/apps/trivium/src/trivium/test.rs
@@ -0,0 +1,406 @@
+use tfhe::prelude::*;
+use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
+
+use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
+
+// Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
+// file testvectors/trivium-80.80.test-vectors
+
+fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
+    assert!(a.len() % 8 == 0);
+    let mut hexadecimal: String = "".to_string();
+    for test in a.chunks(8) {
+        // Encoding is bytes in LSB order
+        match test[4..8] {
+            [false, false, false, false] => hexadecimal.push('0'),
+            [true, false, false, false] => hexadecimal.push('1'),
+            [false, true, false, false] => hexadecimal.push('2'),
+            [true, true, false, false] => hexadecimal.push('3'),
+
+            [false, false, true, false] => hexadecimal.push('4'),
+            [true, false, true, false] => hexadecimal.push('5'),
+            [false, true, true, false] => hexadecimal.push('6'),
+            [true, true, true, false] => hexadecimal.push('7'),
+
+            [false, false, false, true] => hexadecimal.push('8'),
+            [true, false, false, true] => hexadecimal.push('9'),
+            [false, true, false, true] => hexadecimal.push('A'),
+            [true, true, false, true] => hexadecimal.push('B'),
+
+            [false, false, true, true] => hexadecimal.push('C'),
+            [true, false, true, true] => hexadecimal.push('D'),
+            [false, true, true, true] => hexadecimal.push('E'),
+            [true, true, true, true] => hexadecimal.push('F'),
+            _ => (),
+        };
+        match test[0..4] {
+            [false, false, false, false] => hexadecimal.push('0'),
+            [true, false, false, false] => hexadecimal.push('1'),
+            [false, true, false, false] => hexadecimal.push('2'),
+            [true, true, false, false] => hexadecimal.push('3'),
+
+            [false, false, true, false] => hexadecimal.push('4'),
+            [true, false, true, false] => hexadecimal.push('5'),
+            [false, true, true, false] => hexadecimal.push('6'),
+            [true, true, true, false] => hexadecimal.push('7'),
+
+            [false, false, false, true] => hexadecimal.push('8'),
+            [true, false, false, true] => hexadecimal.push('9'),
+            [false, true, false, true] => hexadecimal.push('A'),
+            [true, true, false, true] => hexadecimal.push('B'),
+
+            [false, false, true, true] => hexadecimal.push('C'),
+            [true, false, true, true] => hexadecimal.push('D'),
+            [false, true, true, true] => hexadecimal.push('E'),
+            [true, true, true, true] => hexadecimal.push('F'),
+            _ => (),
+        };
+    }
+    hexadecimal
+}
+
+fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
+    assert!(a.len() % 8 == 0);
+    let mut hexadecimal: String = "".to_string();
+    for test in a {
+        hexadecimal.push_str(&format!("{:02X?}", test));
+    }
+    hexadecimal
+}
+
+fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
+    let mut hexadecimal: String = "".to_string();
+    for test in a {
+        hexadecimal.push_str(&format!("{:016X?}", test));
+    }
+    hexadecimal
+}
+
+#[test]
+fn trivium_test_1() {
+    let key = [false; 80];
+    let iv = [false; 80];
+    let output_0_63    = "FBE0BF265859051B517A2E4E239FC97F563203161907CF2DE7A8790FA1B2E9CDF75292030268B7382B4C1A759AA2599A285549986E74805903801A4CB5A5D4F2".to_string();
+    let output_192_255 = "0F1BE95091B8EA857B062AD52BADF47784AC6D9B2E3F85A9D79995043302F0FDF8B76E5BC8B7B4F0AA46CD20DDA04FDD197BC5E1635496828F2DBFB23F6BD5D0".to_string();
+    let output_256_319 = "80F9075437BAC73F696D0ABE3972F5FCE2192E5FCC13C0CB77D0ABA09126838D31A2D38A2087C46304C8A63B54109F679B0B1BC71E72A58D6DD3E0A3FF890D4A".to_string();
+    let output_448_511 = "68450EB0910A98EF1853E0FC1BED8AB6BB08DF5F167D34008C2A85284D4B886DD56883EE92BF18E69121670B4C81A5689C9B0538373D22EB923A28A2DB44C0EB".to_string();
+
+    let mut trivium = TriviumStream::<bool>::new(key, iv);
+
+    let mut vec = Vec::<bool>::with_capacity(512 * 8);
+    while vec.len() < 512 * 8 {
+        vec.push(trivium.next_bool());
+    }
+
+    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+    assert_eq!(output_0_63, hexadecimal[0..64 * 2]);
+    assert_eq!(output_192_255, hexadecimal[192 * 2..256 * 2]);
+    assert_eq!(output_256_319, hexadecimal[256 * 2..320 * 2]);
+    assert_eq!(output_448_511, hexadecimal[448 * 2..512 * 2]);
+}
+
+#[test]
+fn trivium_test_2() {
+    let mut key = [false; 80];
+    let iv = [false; 80];
+    key[7] = true;
+
+    let output_0_63    = "38EB86FF730D7A9CAF8DF13A4420540DBB7B651464C87501552041C249F29A64D2FBF515610921EBE06C8F92CECF7F8098FF20CCCC6A62B97BE8EF7454FC80F9".to_string();
+    let output_192_255 = "EAF2625D411F61E41F6BAEEDDD5FE202600BD472F6C9CD1E9134A745D900EF6C023E4486538F09930CFD37157C0EB57C3EF6C954C42E707D52B743AD83CFF297".to_string();
+    let output_256_319 = "9A203CF7B2F3F09C43D188AA13A5A2021EE998C42F777E9B67C3FA221A0AA1B041AA9E86BC2F5C52AFF11F7D9EE480CB1187B20EB46D582743A52D7CD080A24A".to_string();
+    let output_448_511 = "EBF14772061C210843C18CEA2D2A275AE02FCB18E5D7942455FF77524E8A4CA51E369A847D1AEEFB9002FCD02342983CEAFA9D487CC2032B10192CD416310FA4".to_string();
+
+    let mut trivium = TriviumStream::<bool>::new(key, iv);
+
+    let mut vec = Vec::<bool>::with_capacity(512 * 8);
+    while vec.len() < 512 * 8 {
+        vec.push(trivium.next_bool());
+    }
+
+    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+    assert_eq!(output_0_63, hexadecimal[0..64 * 2]);
+    assert_eq!(output_192_255, hexadecimal[192 * 2..256 * 2]);
+    assert_eq!(output_256_319, hexadecimal[256 * 2..320 * 2]);
+    assert_eq!(output_448_511, hexadecimal[448 * 2..512 * 2]);
+}
+
+#[test]
+fn trivium_test_3() {
+    let key = [false; 80];
+    let mut iv = [false; 80];
+    iv[7] = true;
+
+    let output_0_63    = "F8901736640549E3BA7D42EA2D07B9F49233C18D773008BD755585B1A8CBAB86C1E9A9B91F1AD33483FD6EE3696D659C9374260456A36AAE11F033A519CBD5D7".to_string();
+    let output_192_255 = "87423582AF64475C3A9C092E32A53C5FE07D35B4C9CA288A89A43DEF3913EA9237CA43342F3F8E83AD3A5C38D463516F94E3724455656A36279E3E924D442F06".to_string();
+    let output_256_319 = "D94389A90E6F3BF2BB4C8B057339AAD8AA2FEA238C29FCAC0D1FF1CB2535A07058BA995DD44CFC54CCEC54A5405B944C532D74E50EA370CDF1BA1CBAE93FC0B5".to_string();
+    let output_448_511 = "4844151714E56A3A2BBFBA426A1D60F9A4F265210A91EC29259AE2035234091C49FFB1893FA102D425C57C39EB4916F6D148DC83EBF7DE51EEB9ABFE045FB282".to_string();
+
+    let mut trivium = TriviumStream::<bool>::new(key, iv);
+
+    let mut vec = Vec::<bool>::with_capacity(512 * 8);
+    while vec.len() < 512 * 8 {
+        vec.push(trivium.next_bool());
+    }
+
+    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+    assert_eq!(output_0_63, hexadecimal[0..64 * 2]);
+    assert_eq!(output_192_255, hexadecimal[192 * 2..256 * 2]);
+    assert_eq!(output_256_319, hexadecimal[256 * 2..320 * 2]);
+    assert_eq!(output_448_511, hexadecimal[448 * 2..512 * 2]);
+}
+
+#[test]
+fn trivium_test_4() {
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [false; 80];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [false; 80];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+    let output_65472_65535 = "C04C24A6938C8AF8A491D5E481271E0E601338F01067A86A795CA493AA4FF265619B8D448B706B7C88EE8395FC79E5B51AB40245BBF7773AE67DF86FCFB71F30".to_string();
+    let output_65536_65599 = "011A0D7EC32FA102C66C164CFCB189AED9F6982E8C7370A6A37414781192CEB155C534C1C8C9E53FDEADF2D3D0577DAD3A8EB2F6E5265F1E831C86844670BC69".to_string();
+    let output_131008_131071 = "48107374A9CE3AAF78221AE77789247CF6896A249ED75DCE0CF2D30EB9D889A0C61C9F480E5C07381DED9FAB2AD54333E82C89BA92E6E47FD828F1A66A8656E0".to_string();
+
+    let mut trivium = TriviumStream::<bool>::new(key, iv);
+
+    let mut vec = Vec::<bool>::with_capacity(131072 * 8);
+    while vec.len() < 131072 * 8 {
+        vec.push(trivium.next_bool());
+    }
+
+    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+    assert_eq!(output_0_63, hexadecimal[0..64 * 2]);
+    assert_eq!(output_65472_65535, hexadecimal[65472 * 2..65536 * 2]);
+    assert_eq!(output_65536_65599, hexadecimal[65536 * 2..65600 * 2]);
+    assert_eq!(output_131008_131071, hexadecimal[131008 * 2..131072 * 2]);
+}
+
+#[test]
+fn trivium_test_clear_byte() {
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [0u8; 10];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [0u8; 10];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+    let output_65472_65535 = "C04C24A6938C8AF8A491D5E481271E0E601338F01067A86A795CA493AA4FF265619B8D448B706B7C88EE8395FC79E5B51AB40245BBF7773AE67DF86FCFB71F30".to_string();
+    let output_65536_65599 = "011A0D7EC32FA102C66C164CFCB189AED9F6982E8C7370A6A37414781192CEB155C534C1C8C9E53FDEADF2D3D0577DAD3A8EB2F6E5265F1E831C86844670BC69".to_string();
+    let output_131008_131071 = "48107374A9CE3AAF78221AE77789247CF6896A249ED75DCE0CF2D30EB9D889A0C61C9F480E5C07381DED9FAB2AD54333E82C89BA92E6E47FD828F1A66A8656E0".to_string();
+
+    let mut trivium = TriviumStreamByte::<u8>::new(key, iv);
+
+    let mut vec = Vec::<u8>::with_capacity(131072);
+    while vec.len() < 131072 {
+        let outputs = trivium.next_64();
+        for c in outputs {
+            vec.push(c)
+        }
+    }
+
+    let hexadecimal = get_hexagonal_string_from_bytes(vec);
+    assert_eq!(output_0_63, hexadecimal[0..64 * 2]);
+    assert_eq!(output_65472_65535, hexadecimal[65472 * 2..65536 * 2]);
+    assert_eq!(output_65536_65599, hexadecimal[65536 * 2..65600 * 2]);
+    assert_eq!(output_131008_131071, hexadecimal[131008 * 2..131072 * 2]);
+}
+
+#[test]
+fn trivium_test_fhe_long() {
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [false; 80];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [false; 80];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val: u8 = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2 == 1;
+            val >>= 1;
+        }
+    }
+
+    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+
+    let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
+
+    let mut trivium = TriviumStream::<FheBool>::new(cipher_key, iv, &server_key);
+
+    let mut vec = Vec::<bool>::with_capacity(64 * 8);
+    while vec.len() < 64 * 8 {
+        let cipher_outputs = trivium.next_64();
+        for c in cipher_outputs {
+            vec.push(c.decrypt(&client_key))
+        }
+    }
+
+    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
+    assert_eq!(output_0_63, hexadecimal[0..64 * 2]);
+}
+
+#[test]
+fn trivium_test_fhe_byte_long() {
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [0u8; 10];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [0u8; 10];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+
+    let cipher_key = key.map(|x| FheUint8::encrypt(x, &client_key));
+
+    let mut trivium = TriviumStreamByte::<FheUint8>::new(cipher_key, iv, &server_key);
+
+    let mut vec = Vec::<u8>::with_capacity(64);
+    while vec.len() < 64 {
+        let cipher_outputs = trivium.next_64();
+        for c in cipher_outputs {
+            vec.push(c.decrypt(&client_key))
+        }
+    }
+
+    let hexadecimal = get_hexagonal_string_from_bytes(vec);
+    assert_eq!(output_0_63, hexadecimal[0..64 * 2]);
+}
+
+#[test]
+fn trivium_test_fhe_byte_transciphering_long() {
+    let config = ConfigBuilder::default().build();
+    let (client_key, server_key) = generate_keys(config);
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [0u8; 10];
+
+    for i in (0..key_string.len()).step_by(2) {
+        key[i >> 1] = u8::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [0u8; 10];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        iv[i >> 1] = u8::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+    }
+
+    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+
+    let cipher_key = key.map(|x| FheUint8::encrypt(x, &client_key));
+
+    let mut ciphered_message = vec![FheUint64::try_encrypt(0u64, &client_key).unwrap(); 9];
+
+    let mut trivium = TriviumStreamByte::<FheUint8>::new(cipher_key, iv, &server_key);
+
+    let mut vec = Vec::<u64>::with_capacity(8);
+    while vec.len() < 8 {
+        let trans_ciphered_message = trivium.trans_encrypt_64(ciphered_message.pop().unwrap());
+        vec.push(trans_ciphered_message.decrypt(&client_key));
+    }
+
+    let hexadecimal = get_hexagonal_string_from_u64(vec);
+    assert_eq!(output_0_63, hexadecimal[0..64 * 2]);
+}
+
+use tfhe::shortint::prelude::*;
+
+#[test]
+fn trivium_test_shortint_long() {
+    let config = ConfigBuilder::default().build();
+    let (hl_client_key, hl_server_key) = generate_keys(config);
+    let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
+    let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
+
+    let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
+
+    let ksk = KeySwitchingKey::new(
+        (&client_key, &server_key),
+        (&underlying_ck, &underlying_sk),
+        PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
+    );
+
+    let key_string = "0053A6F94C9FF24598EB".to_string();
+    let mut key = [0; 80];
+
+    for i in (0..key_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&key_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            key[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+
+    let iv_string = "0D74DB42A91077DE45AC".to_string();
+    let mut iv = [0; 80];
+
+    for i in (0..iv_string.len()).step_by(2) {
+        let mut val = u64::from_str_radix(&iv_string[i..i + 2], 16).unwrap();
+        for j in 0..8 {
+            iv[8 * (i >> 1) + j] = val % 2;
+            val >>= 1;
+        }
+    }
+    let output_0_63    = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
+
+    let cipher_key = key.map(|x| client_key.encrypt(x));
+
+    let mut ciphered_message = vec![FheUint64::try_encrypt(0u64, &hl_client_key).unwrap(); 9];
+
+    let mut trivium = TriviumStreamShortint::new(cipher_key, iv, server_key, ksk, hl_server_key);
+
+    let mut vec = Vec::<u64>::with_capacity(8);
+    while vec.len() < 8 {
+        let trans_ciphered_message = trivium.trans_encrypt_64(ciphered_message.pop().unwrap());
+        vec.push(trans_ciphered_message.decrypt(&hl_client_key));
+    }
+
+    let hexadecimal = get_hexagonal_string_from_u64(vec);
+    assert_eq!(output_0_63, hexadecimal[0..64 * 2]);
+}
--- a/apps/trivium/src/trivium/trivium_bool.rs
+++ b/apps/trivium/src/trivium/trivium_bool.rs
@@ -0,0 +1,225 @@
+//! This module implements the Trivium stream cipher, using booleans or FheBool
+//! for the representation of the inner bits.
+
+use crate::static_deque::StaticDeque;
+
+use tfhe::prelude::*;
+use tfhe::{set_server_key, unset_server_key, FheBool, ServerKey};
+
+use rayon::prelude::*;
+
+/// Internal trait specifying which operations are necessary for TriviumStream generic type
+pub trait TriviumBoolInput<OpOutput>:
+    Sized
+    + Clone
+    + std::ops::BitXor<Output = OpOutput>
+    + std::ops::BitAnd<Output = OpOutput>
+    + std::ops::Not<Output = OpOutput>
+{
+}
+impl TriviumBoolInput<bool> for bool {}
+impl TriviumBoolInput<bool> for &bool {}
+impl TriviumBoolInput<FheBool> for FheBool {}
+impl TriviumBoolInput<FheBool> for &FheBool {}
+
+/// TriviumStream: a struct implementing the Trivium stream cipher, using T for the internal
+/// representation of bits (bool or FheBool). To be able to compute FHE operations, it also owns
+/// an Option for a ServerKey.
+pub struct TriviumStream<T> {
+    a: StaticDeque<93, T>,
+    b: StaticDeque<84, T>,
+    c: StaticDeque<111, T>,
+    fhe_key: Option<ServerKey>,
+}
+
+impl TriviumStream<bool> {
+    /// Constructor for `TriviumStream<bool>`: arguments are the secret key and the input vector.
+    /// Outputs a TriviumStream object already initialized (1152 steps have been run before
+    /// returning)
+    pub fn new(key: [bool; 80], iv: [bool; 80]) -> TriviumStream<bool> {
+        // Initialization of Trivium registers: a has the secret key, b the input vector,
+        // and c a few ones.
+        let mut a_register = [false; 93];
+        let mut b_register = [false; 84];
+        let mut c_register = [false; 111];
+
+        for i in 0..80 {
+            a_register[93 - 80 + i] = key[i];
+            b_register[84 - 80 + i] = iv[i];
+        }
+
+        c_register[0] = true;
+        c_register[1] = true;
+        c_register[2] = true;
+
+        TriviumStream::<bool>::new_from_registers(a_register, b_register, c_register, None)
+    }
+}
+
+impl TriviumStream<FheBool> {
+    /// Constructor for `TriviumStream<FheBool>`: arguments are the encrypted secret key and input
+    /// vector, and the FHE server key.
+    /// Outputs a TriviumStream object already initialized (1152 steps have been run before
+    /// returning)
+    pub fn new(key: [FheBool; 80], iv: [bool; 80], sk: &ServerKey) -> TriviumStream<FheBool> {
+        set_server_key(sk.clone());
+
+        // Initialization of Trivium registers: a has the secret key, b the input vector,
+        // and c a few ones.
+        let mut a_register = [false; 93].map(FheBool::encrypt_trivial);
+        let mut b_register = [false; 84].map(FheBool::encrypt_trivial);
+        let mut c_register = [false; 111].map(FheBool::encrypt_trivial);
+
+        for i in 0..80 {
+            a_register[93 - 80 + i] = key[i].clone();
+            b_register[84 - 80 + i] = FheBool::encrypt_trivial(iv[i]);
+        }
+
+        c_register[0] = FheBool::try_encrypt_trivial(true).unwrap();
+        c_register[1] = FheBool::try_encrypt_trivial(true).unwrap();
+        c_register[2] = FheBool::try_encrypt_trivial(true).unwrap();
+
+        unset_server_key();
+        TriviumStream::<FheBool>::new_from_registers(
+            a_register,
+            b_register,
+            c_register,
+            Some(sk.clone()),
+        )
+    }
+}
+
+impl<T> TriviumStream<T>
+where
+    T: TriviumBoolInput<T> + std::marker::Send + std::marker::Sync,
+    for<'a> &'a T: TriviumBoolInput<T>,
+{
+    /// Internal generic constructor: arguments are already prepared registers, and an optional FHE
+    /// server key
+    fn new_from_registers(
+        a_register: [T; 93],
+        b_register: [T; 84],
+        c_register: [T; 111],
+        key: Option<ServerKey>,
+    ) -> Self {
+        let mut ret = Self {
+            a: StaticDeque::<93, T>::new(a_register),
+            b: StaticDeque::<84, T>::new(b_register),
+            c: StaticDeque::<111, T>::new(c_register),
+            fhe_key: key,
+        };
+        ret.init();
+        ret
+    }
+
+    /// The specification of Trivium includes running 1152 (= 18*64) unused steps to mix up the
+    /// registers, before starting the proper stream
+    fn init(&mut self) {
+        for _ in 0..18 {
+            self.next_64();
+        }
+    }
+
+    /// Computes one turn of the stream, updating registers and outputting the new bit.
+    pub fn next_bool(&mut self) -> T {
+        match &self.fhe_key {
+            Some(sk) => set_server_key(sk.clone()),
+            None => (),
+        };
+
+        let [o, a, b, c] = self.get_output_and_values(0);
+
+        self.a.push(a);
+        self.b.push(b);
+        self.c.push(c);
+
+        o
+    }
+
+    /// Computes a potential future step of Trivium, n terms in the future. This does not update
+    /// registers, but rather returns with the output, the three values that will be used to
+    /// update the registers, when the time is right. This function is meant to be used in
+    /// parallel.
+    fn get_output_and_values(&self, n: usize) -> [T; 4] {
+        assert!(n < 65);
+
+        let (((temp_a, temp_b), (temp_c, a_and)), (b_and, c_and)) = rayon::join(
+            || {
+                rayon::join(
+                    || {
+                        rayon::join(
+                            || &self.a[65 - n] ^ &self.a[92 - n],
+                            || &self.b[68 - n] ^ &self.b[83 - n],
+                        )
+                    },
+                    || {
+                        rayon::join(
+                            || &self.c[65 - n] ^ &self.c[110 - n],
+                            || &self.a[91 - n] & &self.a[90 - n],
+                        )
+                    },
+                )
+            },
+            || {
+                rayon::join(
+                    || &self.b[82 - n] & &self.b[81 - n],
+                    || &self.c[109 - n] & &self.c[108 - n],
+                )
+            },
+        );
+
+        let ((o, a), (b, c)) = rayon::join(
+            || {
+                rayon::join(
+                    || &(&temp_a ^ &temp_b) ^ &temp_c,
+                    || &temp_c ^ &(&c_and ^ &self.a[68 - n]),
+                )
+            },
+            || {
+                rayon::join(
+                    || &temp_a ^ &(&a_and ^ &self.b[77 - n]),
+                    || &temp_b ^ &(&b_and ^ &self.c[86 - n]),
+                )
+            },
+        );
+
+        [o, a, b, c]
+    }
+
+    /// This calls `get_output_and_values` in parallel 64 times, and stores all results in a Vec.
+    fn get_64_output_and_values(&self) -> Vec<[T; 4]> {
+        (0..64)
+            .into_par_iter()
+            .map(|x| self.get_output_and_values(x))
+            .rev()
+            .collect()
+    }
+
+    /// Computes 64 turns of the stream, outputting the 64 bits all at once in a
+    /// Vec (first value is oldest, last is newest)
+    pub fn next_64(&mut self) -> Vec<T> {
+        match &self.fhe_key {
+            Some(sk) => {
+                rayon::broadcast(|_| set_server_key(sk.clone()));
+            }
+            None => (),
+        }
+        let mut values = self.get_64_output_and_values();
+        match &self.fhe_key {
+            Some(_) => {
+                rayon::broadcast(|_| unset_server_key());
+            }
+            None => (),
+        }
+
+        let mut ret = Vec::<T>::with_capacity(64);
+
+        while let Some([o, a, b, c]) = values.pop() {
+            ret.push(o);
+            self.a.push(a);
+            self.b.push(b);
+            self.c.push(c);
+        }
+        ret
+    }
+}
--- a/apps/trivium/src/trivium/trivium_byte.rs
+++ b/apps/trivium/src/trivium/trivium_byte.rs
@@ -0,0 +1,241 @@
+//! This module implements the Trivium stream cipher, using u8 or FheUint8
+//! for the representation of the inner bits.
+
+use crate::static_deque::{StaticByteDeque, StaticByteDequeInput};
+
+use tfhe::prelude::*;
+use tfhe::{set_server_key, unset_server_key, FheUint8, ServerKey};
+
+use rayon::prelude::*;
+
+/// Internal trait specifying which operations are necessary for TriviumStreamByte generic type
+pub trait TriviumByteInput<OpOutput>:
+    Sized
+    + Clone
+    + Send
+    + Sync
+    + StaticByteDequeInput<OpOutput>
+    + std::ops::BitXor<Output = OpOutput>
+    + std::ops::BitAnd<Output = OpOutput>
+    + std::ops::Shr<u8, Output = OpOutput>
+    + std::ops::Shl<u8, Output = OpOutput>
+    + std::ops::Add<Output = OpOutput>
+{
+}
+impl TriviumByteInput<u8> for u8 {}
+impl TriviumByteInput<u8> for &u8 {}
+impl TriviumByteInput<FheUint8> for FheUint8 {}
+impl TriviumByteInput<FheUint8> for &FheUint8 {}
+
+/// TriviumStreamByte: a struct implementing the Trivium stream cipher, using T for the internal
+/// representation of bits (u8 or FheUint8). To be able to compute FHE operations, it also owns
+/// an Option for a ServerKey.
+/// Since the original Trivium registers' sizes are not a multiple of 8, these registers (which
+/// store byte-like objects) have a size that is the eighth of the closest multiple of 8 above the
+/// originals' sizes.
+pub struct TriviumStreamByte<T> {
+    a_byte: StaticByteDeque<12, T>,
+    b_byte: StaticByteDeque<11, T>,
+    c_byte: StaticByteDeque<14, T>,
+    fhe_key: Option<ServerKey>,
+}
+
+impl TriviumStreamByte<u8> {
+    /// Constructor for `TriviumStreamByte<u8>`: arguments are the secret key and the input vector.
+    /// Outputs a TriviumStream object already initialized (1152 steps have been run before
+    /// returning)
+    pub fn new(key: [u8; 10], iv: [u8; 10]) -> TriviumStreamByte<u8> {
+        // Initialization of Trivium registers: a has the secret key, b the input vector,
+        // and c a few ones.
+        let mut a_byte_reg = [0u8; 12];
+        let mut b_byte_reg = [0u8; 11];
+        let mut c_byte_reg = [0u8; 14];
+
+        for i in 0..10 {
+            a_byte_reg[12 - 10 + i] = key[i];
+            b_byte_reg[11 - 10 + i] = iv[i];
+        }
+
+        // Magic number 14, aka 00001110: this represents the 3 ones at the beginning of the c
+        // registers, with additional zeros to make the register's size a multiple of 8.
+        c_byte_reg[0] = 14;
+
+        let mut ret =
+            TriviumStreamByte::<u8>::new_from_registers(a_byte_reg, b_byte_reg, c_byte_reg, None);
+        ret.init();
+        ret
+    }
+}
+
+impl TriviumStreamByte<FheUint8> {
+    /// Constructor for `TriviumStream<FheUint8>`: arguments are the encrypted secret key and input
+    /// vector, and the FHE server key.
+    /// Outputs a TriviumStream object already initialized (1152 steps have been run before
+    /// returning)
+    pub fn new(
+        key: [FheUint8; 10],
+        iv: [u8; 10],
+        server_key: &ServerKey,
+    ) -> TriviumStreamByte<FheUint8> {
+        set_server_key(server_key.clone());
+
+        // Initialization of Trivium registers: a has the secret key, b the input vector,
+        // and c a few ones.
+        let mut a_byte_reg = [0u8; 12].map(FheUint8::encrypt_trivial);
+        let mut b_byte_reg = [0u8; 11].map(FheUint8::encrypt_trivial);
+        let mut c_byte_reg = [0u8; 14].map(FheUint8::encrypt_trivial);
+
+        for i in 0..10 {
+            a_byte_reg[12 - 10 + i] = key[i].clone();
+            b_byte_reg[11 - 10 + i] = FheUint8::encrypt_trivial(iv[i]);
+        }
+
+        // Magic number 14, aka 00001110: this represents the 3 ones at the beginning of the c
+        // registers, with additional zeros to make the register's size a multiple of 8.
+        c_byte_reg[0] = FheUint8::encrypt_trivial(14u8);
+
+        unset_server_key();
+        let mut ret = TriviumStreamByte::<FheUint8>::new_from_registers(
+            a_byte_reg,
+            b_byte_reg,
+            c_byte_reg,
+            Some(server_key.clone()),
+        );
+        ret.init();
+        ret
+    }
+}
+
+impl<T> TriviumStreamByte<T>
+where
+    T: TriviumByteInput<T> + Send,
+    for<'a> &'a T: TriviumByteInput<T>,
+{
+    /// Internal generic constructor: arguments are already prepared registers, and an optional FHE
+    /// server key
+    fn new_from_registers(
+        a_register: [T; 12],
+        b_register: [T; 11],
+        c_register: [T; 14],
+        sk: Option<ServerKey>,
+    ) -> Self {
+        Self {
+            a_byte: StaticByteDeque::<12, T>::new(a_register),
+            b_byte: StaticByteDeque::<11, T>::new(b_register),
+            c_byte: StaticByteDeque::<14, T>::new(c_register),
+            fhe_key: sk,
+        }
+    }
+
+    /// The specification of Trivium includes running 1152 (= 18*64) unused steps to mix up the
+    /// registers, before starting the proper stream
+    fn init(&mut self) {
+        for _ in 0..18 {
+            self.next_64();
+        }
+    }
+
+    /// Computes 8 potential future step of Trivium, b*8 terms in the future. This does not update
+    /// registers, but rather returns with the output, the three values that will be used to
+    /// update the registers, when the time is right. This function is meant to be used in
+    /// parallel.
+    fn get_output_and_values(&self, b: usize) -> [T; 4] {
+        let n = b * 8 + 7;
+        assert!(n < 65);
+
+        let ((a1, a2, a3, a4, a5), ((b1, b2, b3, b4, b5), (c1, c2, c3, c4, c5))) = rayon::join(
+            || Self::get_bytes(&self.a_byte, [91 - n, 90 - n, 68 - n, 65 - n, 92 - n]),
+            || {
+                rayon::join(
+                    || Self::get_bytes(&self.b_byte, [82 - n, 81 - n, 77 - n, 68 - n, 83 - n]),
+                    || Self::get_bytes(&self.c_byte, [109 - n, 108 - n, 86 - n, 65 - n, 110 - n]),
+                )
+            },
+        );
+
+        let (((temp_a, temp_b), (temp_c, a_and)), (b_and, c_and)) = rayon::join(
+            || {
+                rayon::join(
+                    || rayon::join(|| a4 ^ a5, || b4 ^ b5),
+                    || rayon::join(|| c4 ^ c5, || a1 & a2),
+                )
+            },
+            || rayon::join(|| b1 & b2, || c1 & c2),
+        );
+
+        let (temp_a_2, temp_b_2, temp_c_2) = (temp_a.clone(), temp_b.clone(), temp_c.clone());
+
+        let ((o, a), (b, c)) = rayon::join(
+            || {
+                rayon::join(
+                    || (temp_a_2 ^ temp_b_2) ^ temp_c_2,
+                    || temp_c ^ ((c_and) ^ a3),
+                )
+            },
+            || rayon::join(|| temp_a ^ (a_and ^ b3), || temp_b ^ (b_and ^ c3)),
+        );
+
+        [o, a, b, c]
+    }
+
+    /// This calls `get_output_and_values` in parallel 8 times, and stores all results in a Vec.
+    fn get_64_output_and_values(&self) -> Vec<[T; 4]> {
+        (0..8)
+            .into_par_iter()
+            .map(|i| self.get_output_and_values(i))
+            .collect()
+    }
+
+    /// Computes 64 turns of the stream, outputting the 64 bits (in 8 bytes) all at once in a
+    /// Vec (first value is oldest, last is newest)
+    pub fn next_64(&mut self) -> Vec<T> {
+        match &self.fhe_key {
+            Some(sk) => {
+                rayon::broadcast(|_| set_server_key(sk.clone()));
+            }
+            None => (),
+        }
+        let values = self.get_64_output_and_values();
+        match &self.fhe_key {
+            Some(_) => {
+                rayon::broadcast(|_| unset_server_key());
+            }
+            None => (),
+        }
+
+        let mut bytes = Vec::<T>::with_capacity(8);
+        for [o, a, b, c] in values {
+            self.a_byte.push(a);
+            self.b_byte.push(b);
+            self.c_byte.push(c);
+            bytes.push(o);
+        }
+
+        bytes
+    }
+
+    /// Reconstructs a bunch of 5 bytes in a parallel fashion.
+    fn get_bytes<const N: usize>(
+        reg: &StaticByteDeque<N, T>,
+        offsets: [usize; 5],
+    ) -> (T, T, T, T, T) {
+        let mut ret = offsets
+            .par_iter()
+            .rev()
+            .map(|&i| reg.byte(i))
+            .collect::<Vec<_>>();
+        (
+            ret.pop().unwrap(),
+            ret.pop().unwrap(),
+            ret.pop().unwrap(),
+            ret.pop().unwrap(),
+            ret.pop().unwrap(),
+        )
+    }
+}
+
+impl TriviumStreamByte<FheUint8> {
+    pub fn get_server_key(&self) -> &ServerKey {
+        self.fhe_key.as_ref().unwrap()
+    }
+}
--- a/apps/trivium/src/trivium/trivium_shortint.rs
+++ b/apps/trivium/src/trivium/trivium_shortint.rs
@@ -0,0 +1,189 @@
+use crate::static_deque::StaticDeque;
+
+use tfhe::shortint::prelude::*;
+
+use rayon::prelude::*;
+
+/// TriviumStreamShortint: a struct implementing the Trivium stream cipher, using a generic
+/// Ciphertext for the internal representation of bits (intended to represent a single bit). To be
+/// able to compute FHE operations, it also owns a ServerKey.
+pub struct TriviumStreamShortint {
+    a: StaticDeque<93, Ciphertext>,
+    b: StaticDeque<84, Ciphertext>,
+    c: StaticDeque<111, Ciphertext>,
+    internal_server_key: ServerKey,
+    transciphering_casting_key: KeySwitchingKey,
+    hl_server_key: tfhe::ServerKey,
+}
+
+impl TriviumStreamShortint {
+    /// Constructor for TriviumStreamShortint: arguments are the secret key and the input vector,
+    /// and a ServerKey reference. Outputs a TriviumStream object already initialized (1152
+    /// steps have been run before returning)
+    pub fn new(
+        key: [Ciphertext; 80],
+        iv: [u64; 80],
+        sk: ServerKey,
+        ksk: KeySwitchingKey,
+        hl_sk: tfhe::ServerKey,
+    ) -> Self {
+        // Initialization of Trivium registers: a has the secret key, b the input vector,
+        // and c a few ones.
+        let mut a_register: [Ciphertext; 93] = [0; 93].map(|x| sk.create_trivial(x));
+        let mut b_register: [Ciphertext; 84] = [0; 84].map(|x| sk.create_trivial(x));
+        let mut c_register: [Ciphertext; 111] = [0; 111].map(|x| sk.create_trivial(x));
+
+        for i in 0..80 {
+            a_register[93 - 80 + i] = key[i].clone();
+            b_register[84 - 80 + i] = sk.create_trivial(iv[i]);
+        }
+
+        c_register[0] = sk.create_trivial(1);
+        c_register[1] = sk.create_trivial(1);
+        c_register[2] = sk.create_trivial(1);
+
+        let mut ret = Self {
+            a: StaticDeque::<93, Ciphertext>::new(a_register),
+            b: StaticDeque::<84, Ciphertext>::new(b_register),
+            c: StaticDeque::<111, Ciphertext>::new(c_register),
+            internal_server_key: sk,
+            transciphering_casting_key: ksk,
+            hl_server_key: hl_sk,
+        };
+        ret.init();
+        ret
+    }
+
+    /// The specification of Trivium includes running 1152 (= 18*64) unused steps to mix up the
+    /// registers, before starting the proper stream
+    fn init(&mut self) {
+        for _ in 0..18 {
+            self.next_64();
+        }
+    }
+
+    /// Computes one turn of the stream, updating registers and outputting the new bit.
+    pub fn next_ct(&mut self) -> Ciphertext {
+        let [o, a, b, c] = self.get_output_and_values(0);
+
+        self.a.push(a);
+        self.b.push(b);
+        self.c.push(c);
+
+        o
+    }
+
+    /// Computes a potential future step of Trivium, n terms in the future. This does not update
+    /// registers, but rather returns with the output, the three values that will be used to
+    /// update the registers, when the time is right. This function is meant to be used in
+    /// parallel.
+    fn get_output_and_values(&self, n: usize) -> [Ciphertext; 4] {
+        let (a1, a2, a3, a4, a5) = (
+            &self.a[65 - n],
+            &self.a[92 - n],
+            &self.a[91 - n],
+            &self.a[90 - n],
+            &self.a[68 - n],
+        );
+        let (b1, b2, b3, b4, b5) = (
+            &self.b[68 - n],
+            &self.b[83 - n],
+            &self.b[82 - n],
+            &self.b[81 - n],
+            &self.b[77 - n],
+        );
+        let (c1, c2, c3, c4, c5) = (
+            &self.c[65 - n],
+            &self.c[110 - n],
+            &self.c[109 - n],
+            &self.c[108 - n],
+            &self.c[86 - n],
+        );
+
+        let temp_a = self.internal_server_key.unchecked_add(a1, a2);
+        let temp_b = self.internal_server_key.unchecked_add(b1, b2);
+        let temp_c = self.internal_server_key.unchecked_add(c1, c2);
+
+        let ((new_a, new_b), (new_c, o)) = rayon::join(
+            || {
+                rayon::join(
+                    || {
+                        let mut new_a = self.internal_server_key.unchecked_bitand(c3, c4);
+                        self.internal_server_key
+                            .unchecked_add_assign(&mut new_a, a5);
+                        self.internal_server_key
+                            .unchecked_add_assign(&mut new_a, &temp_c);
+                        self.internal_server_key.message_extract_assign(&mut new_a);
+                        new_a
+                    },
+                    || {
+                        let mut new_b = self.internal_server_key.unchecked_bitand(a3, a4);
+                        self.internal_server_key
+                            .unchecked_add_assign(&mut new_b, b5);
+                        self.internal_server_key
+                            .unchecked_add_assign(&mut new_b, &temp_a);
+                        self.internal_server_key.message_extract_assign(&mut new_b);
+                        new_b
+                    },
+                )
+            },
+            || {
+                rayon::join(
+                    || {
+                        let mut new_c = self.internal_server_key.unchecked_bitand(b3, b4);
+                        self.internal_server_key
+                            .unchecked_add_assign(&mut new_c, c5);
+                        self.internal_server_key
+                            .unchecked_add_assign(&mut new_c, &temp_b);
+                        self.internal_server_key.message_extract_assign(&mut new_c);
+                        new_c
+                    },
+                    || {
+                        self.internal_server_key.bitxor(
+                            &self.internal_server_key.unchecked_add(&temp_a, &temp_b),
+                            &temp_c,
+                        )
+                    },
+                )
+            },
+        );
+
+        [o, new_a, new_b, new_c]
+    }
+
+    /// This calls `get_output_and_values` in parallel 64 times, and stores all results in a Vec.
+    fn get_64_output_and_values(&self) -> Vec<[Ciphertext; 4]> {
+        (0..64)
+            .into_par_iter()
+            .map(|x| self.get_output_and_values(x))
+            .rev()
+            .collect()
+    }
+
+    /// Computes 64 turns of the stream, outputting the 64 bits all at once in a
+    /// Vec (first value is oldest, last is newest)
+    pub fn next_64(&mut self) -> Vec<Ciphertext> {
+        let mut values = self.get_64_output_and_values();
+
+        let mut ret = Vec::<Ciphertext>::with_capacity(64);
+        while let Some([o, a, b, c]) = values.pop() {
+            ret.push(o);
+            self.a.push(a);
+            self.b.push(b);
+            self.c.push(c);
+        }
+        ret
+    }
+
+    pub fn get_internal_server_key(&self) -> &ServerKey {
+        &self.internal_server_key
+    }
+
+    pub fn get_casting_key(&self) -> &KeySwitchingKey {
+        &self.transciphering_casting_key
+    }
+
+    pub fn get_hl_server_key(&self) -> &tfhe::ServerKey {
+        &self.hl_server_key
+    }
+}
--- a/backends/tfhe-cuda-backend/Cargo.toml
+++ b/backends/tfhe-cuda-backend/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "tfhe-cuda-backend"
+version = "0.1.3"
+edition = "2021"
+authors = ["Zama team"]
+license = "BSD-3-Clause-Clear"
+description = "Cuda implementation of TFHE-rs primitives."
+homepage = "https://www.zama.ai/"
+documentation = "https://docs.zama.ai/tfhe-rs"
+repository = "https://github.com/zama-ai/tfhe-rs"
+readme = "README.md"
+keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
+
+[build-dependencies]
+cmake = { version = "0.1" }
+
+[dependencies]
+thiserror = "1.0"
--- a/backends/tfhe-cuda-backend/LICENSE
+++ b/backends/tfhe-cuda-backend/LICENSE
@@ -0,0 +1,28 @@
+BSD 3-Clause Clear License
+
+Copyright © 2024 ZAMA.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this
+list of conditions and the following disclaimer in the documentation and/or other
+materials provided with the distribution.
+
+3. Neither the name of ZAMA nor the names of its contributors may be used to endorse
+or promote products derived from this software without specific prior written permission.
+
+NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE.
+THIS SOFTWARE IS PROVIDED BY THE ZAMA AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ZAMA OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/backends/tfhe-cuda-backend/README.md
+++ b/backends/tfhe-cuda-backend/README.md
@@ -0,0 +1,53 @@
+# TFHE Cuda backend
+
+## Introduction
+
+The `tfhe-cuda-backend` holds the code for GPU acceleration of Zama's variant of TFHE.
+It implements CUDA/C++ functions to perform homomorphic operations on LWE ciphertexts.
+
+It provides functions to allocate memory on the GPU, to copy data back 
+and forth between the CPU and the GPU, to create and destroy Cuda streams, etc.:
+- `cuda_create_stream`, `cuda_destroy_stream`
+- `cuda_malloc`, `cuda_check_valid_malloc`
+- `cuda_memcpy_async_to_cpu`, `cuda_memcpy_async_to_gpu`
+- `cuda_get_number_of_gpus`
+- `cuda_synchronize_device`
+The cryptographic operations it provides are:
+- an amortized implementation of the TFHE programmable bootstrap: `cuda_bootstrap_amortized_lwe_ciphertext_vector_32` and `cuda_bootstrap_amortized_lwe_ciphertext_vector_64`
+- a low latency implementation of the TFHE programmable bootstrap: `cuda_bootstrap_low latency_lwe_ciphertext_vector_32` and `cuda_bootstrap_low_latency_lwe_ciphertext_vector_64`
+- the keyswitch: `cuda_keyswitch_lwe_ciphertext_vector_32` and `cuda_keyswitch_lwe_ciphertext_vector_64`
+- the larger precision programmable bootstrap (wop PBS, which supports up to 16 bits of message while the classical PBS only supports up to 8 bits of message) and its sub-components: `cuda_wop_pbs_64`, `cuda_extract_bits_64`, `cuda_circuit_bootstrap_64`, `cuda_cmux_tree_64`, `cuda_blind_rotation_sample_extraction_64`
+- acceleration for leveled operations: `cuda_negate_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_plaintext_vector_64`, `cuda_mult_lwe_ciphertext_vector_cleartext_vector`.
+
+## Dependencies
+
+**Disclaimer**: Compilation on Windows/Mac is not supported yet. Only Nvidia GPUs are supported. 
+
+- nvidia driver - for example, if you're running Ubuntu 20.04 check this [page](https://linuxconfig.org/how-to-install-the-nvidia-drivers-on-ubuntu-20-04-focal-fossa-linux) for installation
+- [nvcc](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) >= 10.0
+- [gcc](https://gcc.gnu.org/) >= 8.0 - check this [page](https://gist.github.com/ax3l/9489132) for more details about nvcc/gcc compatible versions
+- [cmake](https://cmake.org/) >= 3.24
+
+## Build
+
+The Cuda project held in `tfhe-cuda-backend` can be compiled independently from Concrete in the 
+following way:
+```
+git clone git@github.com:zama-ai/tfhe-rs
+cd backends/tfhe-cuda-backend/cuda
+mkdir build
+cd build
+cmake ..
+make
+```
+The compute capability is detected automatically (with the first GPU information) and set accordingly.
+If your machine does not have an available Nvidia GPU, the compilation will work if you have the nvcc compiler installed. The generated executable will target a 7.0 compute capability (sm_70).
+
+## Links
+
+- [TFHE](https://eprint.iacr.org/2018/421.pdf)
+
+## License
+
+This software is distributed under the BSD-3-Clause-Clear license. If you have any questions,
+please contact us at `hello@zama.ai`.
--- a/backends/tfhe-cuda-backend/build.rs
+++ b/backends/tfhe-cuda-backend/build.rs
@@ -0,0 +1,28 @@
+use std::env;
+use std::process::Command;
+
+fn main() {
+    println!("Build tfhe-cuda-backend");
+    if env::consts::OS == "linux" {
+        let output = Command::new("./get_os_name.sh").output().unwrap();
+        let distribution = String::from_utf8(output.stdout).unwrap();
+        if distribution != "Ubuntu\n" {
+            println!(
+                "cargo:warning=This Linux distribution is not officially supported. \
+                Only Ubuntu is supported by tfhe-cuda-backend at this time. Build may fail\n"
+            );
+        }
+        let dest = cmake::build("cuda");
+        println!("cargo:rustc-link-search=native={}", dest.display());
+        println!("cargo:rustc-link-lib=static=tfhe_cuda_backend");
+        println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
+        println!("cargo:rustc-link-lib=gomp");
+        println!("cargo:rustc-link-lib=cudart");
+        println!("cargo:rustc-link-search=native=/usr/lib/x86_64-linux-gnu/");
+        println!("cargo:rustc-link-lib=stdc++");
+    } else {
+        panic!(
+            "Error: platform not supported, tfhe-cuda-backend not built (only Linux is supported)"
+        );
+    }
+}
--- a/backends/tfhe-cuda-backend/cuda/.cmake-format-config.py
+++ b/backends/tfhe-cuda-backend/cuda/.cmake-format-config.py
@@ -0,0 +1,10 @@
+# -----------------------------
+# Options effecting formatting.
+# -----------------------------
+with section("format"):
+
+  # How wide to allow formatted cmake files
+  line_width = 120
+  
+  # How many spaces to tab for indent
+  tab_size = 2
--- a/backends/tfhe-cuda-backend/cuda/CMakeLists.txt
+++ b/backends/tfhe-cuda-backend/cuda/CMakeLists.txt
@@ -0,0 +1,90 @@
+cmake_minimum_required(VERSION 3.24 FATAL_ERROR)
+project(tfhe_cuda_backend LANGUAGES CXX)
+
+# See if the minimum CUDA version is available. If not, only enable documentation building.
+set(MINIMUM_SUPPORTED_CUDA_VERSION 10.0)
+include(CheckLanguage)
+# See if CUDA is available
+check_language(CUDA)
+# If so, enable CUDA to check the version.
+if(CMAKE_CUDA_COMPILER)
+  enable_language(CUDA)
+endif()
+# If CUDA is not available, or the minimum version is too low do not build
+if(NOT CMAKE_CUDA_COMPILER)
+  message(FATAL_ERROR "Cuda compiler not found.")
+endif()
+
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS ${MINIMUM_SUPPORTED_CUDA_VERSION})
+  message(FATAL_ERROR "CUDA ${MINIMUM_SUPPORTED_CUDA_VERSION} or greater is required for compilation.")
+endif()
+# Get CUDA compute capability
+set(OUTPUTFILE ${CMAKE_CURRENT_SOURCE_DIR}/cuda_script) # No suffix required
+set(CUDAFILE ${CMAKE_CURRENT_SOURCE_DIR}/check_cuda.cu)
+execute_process(COMMAND nvcc -lcuda ${CUDAFILE} -o ${OUTPUTFILE})
+execute_process(
+  COMMAND ${OUTPUTFILE}
+  RESULT_VARIABLE CUDA_RETURN_CODE
+  OUTPUT_VARIABLE ARCH)
+file(REMOVE ${OUTPUTFILE})
+
+if(${CUDA_RETURN_CODE} EQUAL 0)
+  set(CUDA_SUCCESS "TRUE")
+else()
+  set(CUDA_SUCCESS "FALSE")
+endif()
+
+if(${CUDA_SUCCESS})
+  message(STATUS "CUDA Architecture: ${ARCH}")
+  message(STATUS "CUDA Version: ${CUDA_VERSION_STRING}")
+  message(STATUS "CUDA Path: ${CUDA_TOOLKIT_ROOT_DIR}")
+  message(STATUS "CUDA Libraries: ${CUDA_LIBRARIES}")
+  message(STATUS "CUDA Performance Primitives: ${CUDA_npp_LIBRARY}")
+else()
+  message(WARNING ${ARCH})
+endif()
+
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif()
+
+# Add OpenMP support
+find_package(OpenMP REQUIRED)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}  -g")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler ${OpenMP_CXX_FLAGS}")
+if(${CUDA_SUCCESS})
+  set(CMAKE_CUDA_ARCHITECTURES native)
+else()
+  set(CMAKE_CUDA_ARCHITECTURES 70)
+endif()
+
+# in production, should use -arch=sm_70 --ptxas-options=-v to see register spills -lineinfo for better debugging
+set(CMAKE_CUDA_FLAGS
+    "${CMAKE_CUDA_FLAGS} -ccbin ${CMAKE_CXX_COMPILER} -O3 \
+  -std=c++17 --no-exceptions  --expt-relaxed-constexpr -rdc=true \
+  --use_fast_math -Xcompiler -fPIC")
+
+set(INCLUDE_DIR include)
+
+add_subdirectory(src)
+target_include_directories(tfhe_cuda_backend PRIVATE ${INCLUDE_DIR})
+
+# This is required for rust cargo build
+install(TARGETS tfhe_cuda_backend DESTINATION .)
+install(TARGETS tfhe_cuda_backend DESTINATION lib)
+
+# Define a function to add a lint target.
+find_file(CPPLINT NAMES cpplint cpplint.exe)
+if(CPPLINT)
+  # Add a custom target to lint all child projects. Dependencies are specified in child projects.
+  add_custom_target(all_lint)
+  # Don't trigger this target on ALL_BUILD or Visual Studio 'Rebuild Solution'
+  set_target_properties(all_lint PROPERTIES EXCLUDE_FROM_ALL TRUE)
+  # set_target_properties(all_lint PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE)
+endif()
+
+enable_testing()
--- a/backends/tfhe-cuda-backend/cuda/CPPLINT.cfg
+++ b/backends/tfhe-cuda-backend/cuda/CPPLINT.cfg
@@ -0,0 +1,3 @@
+set noparent 
+linelength=240
+filter=-legal/copyright,-readability/todo,-runtime/references,-build/c++17
--- a/backends/tfhe-cuda-backend/cuda/check_cuda.cu
+++ b/backends/tfhe-cuda-backend/cuda/check_cuda.cu
@@ -0,0 +1,22 @@
+#include <stdio.h>
+
+int main(int argc, char **argv) {
+  cudaDeviceProp dP;
+  float min_cc = 3.0;
+
+  int rc = cudaGetDeviceProperties(&dP, 0);
+  if (rc != cudaSuccess) {
+    cudaError_t error = cudaGetLastError();
+    printf("CUDA error: %s", cudaGetErrorString(error));
+    return rc; /* Failure */
+  }
+  if ((dP.major + (dP.minor / 10)) < min_cc) {
+    printf("Min Compute Capability of %2.1f required:  %d.%d found\n Not "
+           "Building CUDA Code",
+           min_cc, dP.major, dP.minor);
+    return 1; /* Failure */
+  } else {
+    printf("-arch=sm_%d%d", dP.major, dP.minor);
+    return 0; /* Success */
+  }
+}
--- a/backends/tfhe-cuda-backend/cuda/format_tfhe_cuda_backend.sh
+++ b/backends/tfhe-cuda-backend/cuda/format_tfhe_cuda_backend.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+find ./{include,src} -iregex '^.*\.\(cpp\|cu\|h\|cuh\)$' -print | xargs clang-format-15 -i -style='file'
+cmake-format -i CMakeLists.txt -c .cmake-format-config.py
+
+find ./{include,src} -type f -name "CMakeLists.txt" | xargs -I % sh -c 'cmake-format -i % -c .cmake-format-config.py'
--- a/backends/tfhe-cuda-backend/cuda/include/bootstrap.h
+++ b/backends/tfhe-cuda-backend/cuda/include/bootstrap.h
@@ -0,0 +1,118 @@
+#ifndef CUDA_BOOTSTRAP_H
+#define CUDA_BOOTSTRAP_H
+
+#include "device.h"
+#include <cstdint>
+
+enum PBS_TYPE { MULTI_BIT = 0, LOW_LAT = 1, AMORTIZED = 2 };
+
+extern "C" {
+void cuda_fourier_polynomial_mul(void *input1, void *input2, void *output,
+                                 cuda_stream_t *stream,
+                                 uint32_t polynomial_size,
+                                 uint32_t total_polynomials);
+
+void cuda_convert_lwe_bootstrap_key_32(void *dest, void *src,
+                                       cuda_stream_t *stream,
+                                       uint32_t input_lwe_dim,
+                                       uint32_t glwe_dim, uint32_t level_count,
+                                       uint32_t polynomial_size);
+
+void cuda_convert_lwe_bootstrap_key_64(void *dest, void *src,
+                                       cuda_stream_t *stream,
+                                       uint32_t input_lwe_dim,
+                                       uint32_t glwe_dim, uint32_t level_count,
+                                       uint32_t polynomial_size);
+
+void scratch_cuda_bootstrap_amortized_32(
+    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
+    uint32_t max_shared_memory, bool allocate_gpu_memory);
+
+void scratch_cuda_bootstrap_amortized_64(
+    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
+    uint32_t max_shared_memory, bool allocate_gpu_memory);
+
+void cuda_bootstrap_amortized_lwe_ciphertext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
+    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
+    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
+
+void cuda_bootstrap_amortized_lwe_ciphertext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
+    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
+    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
+
+void cleanup_cuda_bootstrap_amortized(cuda_stream_t *stream,
+                                      int8_t **pbs_buffer);
+
+void scratch_cuda_bootstrap_low_latency_32(
+    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t level_count,
+    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
+    bool allocate_gpu_memory);
+
+void scratch_cuda_bootstrap_low_latency_64(
+    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t level_count,
+    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
+    bool allocate_gpu_memory);
+
+void cuda_bootstrap_low_latency_lwe_ciphertext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
+    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
+    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
+
+void cuda_bootstrap_low_latency_lwe_ciphertext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
+    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t base_log, uint32_t level_count, uint32_t num_samples,
+    uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
+
+void cleanup_cuda_bootstrap_low_latency(cuda_stream_t *stream,
+                                        int8_t **pbs_buffer);
+
+uint64_t get_buffer_size_bootstrap_amortized_64(
+    uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory);
+
+uint64_t get_buffer_size_bootstrap_low_latency_64(
+    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
+    uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory);
+}
+
+#ifdef __CUDACC__
+__device__ inline int get_start_ith_ggsw(int i, uint32_t polynomial_size,
+                                         int glwe_dimension,
+                                         uint32_t level_count);
+
+template <typename T>
+__device__ T *get_ith_mask_kth_block(T *ptr, int i, int k, int level,
+                                     uint32_t polynomial_size,
+                                     int glwe_dimension, uint32_t level_count);
+
+template <typename T>
+__device__ T *get_ith_body_kth_block(T *ptr, int i, int k, int level,
+                                     uint32_t polynomial_size,
+                                     int glwe_dimension, uint32_t level_count);
+
+template <typename T>
+__device__ T *get_multi_bit_ith_lwe_gth_group_kth_block(
+    T *ptr, int g, int i, int k, int level, uint32_t grouping_factor,
+    uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t level_count);
+
+#endif
+
+#endif // CUDA_BOOTSTRAP_H
--- a/backends/tfhe-cuda-backend/cuda/include/bootstrap_multibit.h
+++ b/backends/tfhe-cuda-backend/cuda/include/bootstrap_multibit.h
@@ -0,0 +1,46 @@
+#ifndef CUDA_MULTI_BIT_H
+#define CUDA_MULTI_BIT_H
+
+#include <cstdint>
+
+extern "C" {
+void cuda_convert_lwe_multi_bit_bootstrap_key_64(
+    void *dest, void *src, cuda_stream_t *stream, uint32_t input_lwe_dim,
+    uint32_t glwe_dim, uint32_t level_count, uint32_t polynomial_size,
+    uint32_t grouping_factor);
+
+void cuda_multi_bit_pbs_lwe_ciphertext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
+    void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t grouping_factor, uint32_t base_log, uint32_t level_count,
+    uint32_t num_samples, uint32_t num_luts, uint32_t lwe_idx,
+    uint32_t max_shared_memory, uint32_t chunk_size = 0);
+
+void scratch_cuda_multi_bit_pbs_64(
+    cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t lwe_dimension,
+    uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
+    uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
+    uint32_t max_shared_memory, bool allocate_gpu_memory,
+    uint32_t chunk_size = 0);
+
+void cleanup_cuda_multi_bit_pbs(cuda_stream_t *stream, int8_t **pbs_buffer);
+}
+#ifdef __CUDACC__
+__host__ uint32_t get_lwe_chunk_size(uint32_t lwe_dimension,
+                                     uint32_t level_count,
+                                     uint32_t glwe_dimension,
+                                     uint32_t num_samples);
+
+__host__ uint32_t get_average_lwe_chunk_size(uint32_t lwe_dimension,
+                                             uint32_t level_count,
+                                             uint32_t glwe_dimension,
+                                             uint32_t ct_count);
+
+__host__ uint64_t get_max_buffer_size_multibit_bootstrap(
+    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
+    uint32_t level_count, uint32_t max_input_lwe_ciphertext_count);
+#endif
+
+#endif // CUDA_MULTI_BIT_H
--- a/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
+++ b/backends/tfhe-cuda-backend/cuda/include/ciphertext.h
@@ -0,0 +1,18 @@
+#ifndef CUDA_CIPHERTEXT_H
+#define CUDA_CIPHERTEXT_H
+
+#include <cstdint>
+
+extern "C" {
+void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *dest, void *src,
+                                                  void *v_stream,
+                                                  uint32_t gpu_index,
+                                                  uint32_t number_of_cts,
+                                                  uint32_t lwe_dimension);
+void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *dest, void *src,
+                                                  void *v_stream,
+                                                  uint32_t gpu_index,
+                                                  uint32_t number_of_cts,
+                                                  uint32_t lwe_dimension);
+};
+#endif
--- a/backends/tfhe-cuda-backend/cuda/include/device.h
+++ b/backends/tfhe-cuda-backend/cuda/include/device.h
@@ -0,0 +1,88 @@
+#ifndef DEVICE_H
+#define DEVICE_H
+
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cuda_runtime.h>
+
+#define synchronize_threads_in_block() __syncthreads()
+
+extern "C" {
+
+struct cuda_stream_t {
+  cudaStream_t stream;
+  uint32_t gpu_index;
+
+  cuda_stream_t(uint32_t gpu_index) {
+    this->gpu_index = gpu_index;
+
+    cudaStreamCreate(&stream);
+  }
+
+  void release() {
+    cudaSetDevice(gpu_index);
+    cudaStreamDestroy(stream);
+  }
+
+  void synchronize() { cudaStreamSynchronize(stream); }
+};
+
+cuda_stream_t *cuda_create_stream(uint32_t gpu_index);
+
+int cuda_destroy_stream(cuda_stream_t *stream);
+
+void *cuda_malloc(uint64_t size, uint32_t gpu_index);
+
+void *cuda_malloc_async(uint64_t size, cuda_stream_t *stream);
+
+int cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
+
+int cuda_check_support_cooperative_groups();
+
+int cuda_memcpy_to_cpu(void *dest, const void *src, uint64_t size);
+
+int cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
+                             cuda_stream_t *stream);
+
+int cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size,
+                                 cuda_stream_t *stream);
+
+int cuda_memcpy_to_gpu(void *dest, void *src, uint64_t size);
+
+int cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
+                             cuda_stream_t *stream);
+
+int cuda_memset_async(void *dest, uint64_t val, uint64_t size,
+                      cuda_stream_t *stream);
+
+int cuda_get_number_of_gpus();
+
+int cuda_synchronize_device(uint32_t gpu_index);
+
+int cuda_drop(void *ptr, uint32_t gpu_index);
+
+int cuda_drop_async(void *ptr, cuda_stream_t *stream);
+
+int cuda_get_max_shared_memory(uint32_t gpu_index);
+
+int cuda_synchronize_stream(cuda_stream_t *stream);
+
+#define check_cuda_error(ans)                                                  \
+  { cuda_error((ans), __FILE__, __LINE__); }
+inline void cuda_error(cudaError_t code, const char *file, int line,
+                       bool abort = true) {
+  if (code != cudaSuccess) {
+    fprintf(stderr, "Cuda error: %s %s %d\n", cudaGetErrorString(code), file,
+            line);
+    if (abort)
+      exit(code);
+  }
+}
+}
+
+template <typename Torus>
+void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value,
+                          Torus n);
+#endif
--- a/backends/tfhe-cuda-backend/cuda/include/helper_debug.cuh
+++ b/backends/tfhe-cuda-backend/cuda/include/helper_debug.cuh
@@ -0,0 +1,100 @@
+#include "cuComplex.h"
+#include "thrust/complex.h"
+#include <iostream>
+#include <string>
+#include <type_traits>
+
+#define PRINT_VARS
+#ifdef PRINT_VARS
+#define PRINT_DEBUG_5(var, begin, end, step, cond)                             \
+  _print_debug(var, #var, begin, end, step, cond, "", false)
+#define PRINT_DEBUG_6(var, begin, end, step, cond, text)                       \
+  _print_debug(var, #var, begin, end, step, cond, text, true)
+#define CAT(A, B) A##B
+#define PRINT_SELECT(NAME, NUM) CAT(NAME##_, NUM)
+#define GET_COUNT(_1, _2, _3, _4, _5, _6, COUNT, ...) COUNT
+#define VA_SIZE(...) GET_COUNT(__VA_ARGS__, 6, 5, 4, 3, 2, 1)
+#define PRINT_DEBUG(...)                                                       \
+  PRINT_SELECT(PRINT_DEBUG, VA_SIZE(__VA_ARGS__))(__VA_ARGS__)
+#else
+#define PRINT_DEBUG(...)
+#endif
+
+template <typename T>
+__device__ typename std::enable_if<std::is_unsigned<T>::value, void>::type
+_print_debug(T *var, const char *var_name, int start, int end, int step,
+             bool cond, const char *text, bool has_text) {
+  __syncthreads();
+  if (cond) {
+    if (has_text)
+      printf("%s\n", text);
+    for (int i = start; i < end; i += step) {
+      printf("%s[%u]: %u\n", var_name, i, var[i]);
+    }
+  }
+  __syncthreads();
+}
+
+template <typename T>
+__device__ typename std::enable_if<std::is_signed<T>::value, void>::type
+_print_debug(T *var, const char *var_name, int start, int end, int step,
+             bool cond, const char *text, bool has_text) {
+  __syncthreads();
+  if (cond) {
+    if (has_text)
+      printf("%s\n", text);
+    for (int i = start; i < end; i += step) {
+      printf("%s[%u]: %d\n", var_name, i, var[i]);
+    }
+  }
+  __syncthreads();
+}
+
+template <typename T>
+__device__ typename std::enable_if<std::is_floating_point<T>::value, void>::type
+_print_debug(T *var, const char *var_name, int start, int end, int step,
+             bool cond, const char *text, bool has_text) {
+  __syncthreads();
+  if (cond) {
+    if (has_text)
+      printf("%s\n", text);
+    for (int i = start; i < end; i += step) {
+      printf("%s[%u]: %.15f\n", var_name, i, var[i]);
+    }
+  }
+  __syncthreads();
+}
+
+template <typename T>
+__device__
+    typename std::enable_if<std::is_same<T, thrust::complex<double>>::value,
+                            void>::type
+    _print_debug(T *var, const char *var_name, int start, int end, int step,
+                 bool cond, const char *text, bool has_text) {
+  __syncthreads();
+  if (cond) {
+    if (has_text)
+      printf("%s\n", text);
+    for (int i = start; i < end; i += step) {
+      printf("%s[%u]: %.15f , %.15f\n", var_name, i, var[i].real(),
+             var[i].imag());
+    }
+  }
+  __syncthreads();
+}
+
+template <typename T>
+__device__
+    typename std::enable_if<std::is_same<T, cuDoubleComplex>::value, void>::type
+    _print_debug(T *var, const char *var_name, int start, int end, int step,
+                 bool cond, const char *text, bool has_text) {
+  __syncthreads();
+  if (cond) {
+    if (has_text)
+      printf("%s\n", text);
+    for (int i = start; i < end; i += step) {
+      printf("%s[%u]: %.15f , %.15f\n", var_name, i, var[i].x, var[i].y);
+    }
+  }
+  __syncthreads();
+}
--- a/backends/tfhe-cuda-backend/cuda/include/integer.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer.h
--- a/backends/tfhe-cuda-backend/cuda/include/keyswitch.h
+++ b/backends/tfhe-cuda-backend/cuda/include/keyswitch.h
@@ -0,0 +1,21 @@
+#ifndef CNCRT_KS_H_
+#define CNCRT_KS_H_
+
+#include <cstdint>
+
+extern "C" {
+
+void cuda_keyswitch_lwe_ciphertext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
+    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
+    uint32_t level_count, uint32_t num_samples);
+
+void cuda_keyswitch_lwe_ciphertext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
+    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
+    uint32_t level_count, uint32_t num_samples);
+}
+
+#endif // CNCRT_KS_H_
--- a/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
+++ b/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h
@@ -0,0 +1,50 @@
+#ifndef CUDA_LINALG_H_
+#define CUDA_LINALG_H_
+
+#include "bootstrap.h"
+#include <cstdint>
+#include <device.h>
+
+extern "C" {
+
+void cuda_negate_lwe_ciphertext_vector_32(cuda_stream_t *stream,
+                                          void *lwe_array_out,
+                                          void *lwe_array_in,
+                                          uint32_t input_lwe_dimension,
+                                          uint32_t input_lwe_ciphertext_count);
+void cuda_negate_lwe_ciphertext_vector_64(cuda_stream_t *stream,
+                                          void *lwe_array_out,
+                                          void *lwe_array_in,
+                                          uint32_t input_lwe_dimension,
+                                          uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_32(cuda_stream_t *stream,
+                                       void *lwe_array_out,
+                                       void *lwe_array_in_1,
+                                       void *lwe_array_in_2,
+                                       uint32_t input_lwe_dimension,
+                                       uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_64(cuda_stream_t *stream,
+                                       void *lwe_array_out,
+                                       void *lwe_array_in_1,
+                                       void *lwe_array_in_2,
+                                       uint32_t input_lwe_dimension,
+                                       uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
+    void *plaintext_array_in, uint32_t input_lwe_dimension,
+    uint32_t input_lwe_ciphertext_count);
+void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
+    void *plaintext_array_in, uint32_t input_lwe_dimension,
+    uint32_t input_lwe_ciphertext_count);
+void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
+    void *cleartext_array_in, uint32_t input_lwe_dimension,
+    uint32_t input_lwe_ciphertext_count);
+void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
+    void *cleartext_array_in, uint32_t input_lwe_dimension,
+    uint32_t input_lwe_ciphertext_count);
+}
+
+#endif // CUDA_LINALG_H_
--- a/backends/tfhe-cuda-backend/cuda/src/CMakeLists.txt
+++ b/backends/tfhe-cuda-backend/cuda/src/CMakeLists.txt
@@ -0,0 +1,18 @@
+set(SOURCES
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bit_extraction.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bitwise_ops.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bootstrap.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/bootstrap_multibit.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/ciphertext.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/circuit_bootstrap.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/device.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/integer.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/keyswitch.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/linear_algebra.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/shifts.h
+    ${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/vertical_packing.h)
+file(GLOB_RECURSE SOURCES "*.cu")
+add_library(tfhe_cuda_backend STATIC ${SOURCES})
+set_target_properties(tfhe_cuda_backend PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+target_link_libraries(tfhe_cuda_backend PUBLIC cudart OpenMP::OpenMP_CXX)
+target_include_directories(tfhe_cuda_backend PRIVATE .)
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu
@@ -0,0 +1 @@
+#include "ciphertext.cuh"
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh
@@ -0,0 +1,44 @@
+#ifndef CUDA_CIPHERTEXT_CUH
+#define CUDA_CIPHERTEXT_CUH
+
+#include "ciphertext.h"
+#include "device.h"
+#include <cstdint>
+
+template <typename T>
+void cuda_convert_lwe_ciphertext_vector_to_gpu(T *dest, T *src,
+                                               cuda_stream_t *stream,
+                                               uint32_t number_of_cts,
+                                               uint32_t lwe_dimension) {
+  cudaSetDevice(stream->gpu_index);
+  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
+  cuda_memcpy_async_to_gpu(dest, src, size, stream);
+}
+
+void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *dest, void *src,
+                                                  cuda_stream_t *stream,
+                                                  uint32_t number_of_cts,
+                                                  uint32_t lwe_dimension) {
+  cuda_convert_lwe_ciphertext_vector_to_gpu<uint64_t>(
+      (uint64_t *)dest, (uint64_t *)src, stream, number_of_cts, lwe_dimension);
+}
+
+template <typename T>
+void cuda_convert_lwe_ciphertext_vector_to_cpu(T *dest, T *src,
+                                               cuda_stream_t *stream,
+                                               uint32_t number_of_cts,
+                                               uint32_t lwe_dimension) {
+  cudaSetDevice(stream->gpu_index);
+  uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
+  cuda_memcpy_async_to_cpu(dest, src, size, stream);
+}
+
+void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *dest, void *src,
+                                                  cuda_stream_t *stream,
+                                                  uint32_t number_of_cts,
+                                                  uint32_t lwe_dimension) {
+  cuda_convert_lwe_ciphertext_vector_to_cpu<uint64_t>(
+      (uint64_t *)dest, (uint64_t *)src, stream, number_of_cts, lwe_dimension);
+}
+
+#endif
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/gadget.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/gadget.cuh
@@ -0,0 +1,162 @@
+#ifndef CNCRT_CRYPTO_CUH
+#define CNCRT_CRPYTO_CUH
+
+#include "device.h"
+#include <cstdint>
+
+/**
+ * GadgetMatrix implements the iterator design pattern to decompose a set of
+ * num_poly consecutive polynomials with degree params::degree. A total of
+ * level_count levels is expected and each call to decompose_and_compress_next()
+ * writes to the result the next level. It is also possible to advance an
+ * arbitrary amount of levels by using decompose_and_compress_level().
+ *
+ * This class always decomposes the entire set of num_poly polynomials.
+ * By default, it works on a single polynomial.
+ */
+#pragma once
+template <typename T, class params> class GadgetMatrix {
+private:
+  uint32_t level_count;
+  uint32_t base_log;
+  uint32_t mask;
+  uint32_t halfbg;
+  uint32_t num_poly;
+  T offset;
+  int current_level;
+  T mask_mod_b;
+  T *state;
+
+public:
+  __device__ GadgetMatrix(uint32_t base_log, uint32_t level_count, T *state,
+                          uint32_t num_poly = 1)
+      : base_log(base_log), level_count(level_count), num_poly(num_poly),
+        state(state) {
+
+    mask_mod_b = (1ll << base_log) - 1ll;
+    current_level = level_count;
+    int tid = threadIdx.x;
+    for (int i = 0; i < num_poly * params::opt; i++) {
+      state[tid] >>= (sizeof(T) * 8 - base_log * level_count);
+      tid += params::degree / params::opt;
+    }
+    synchronize_threads_in_block();
+  }
+
+  // Decomposes all polynomials at once
+  __device__ void decompose_and_compress_next(double2 *result) {
+    for (int j = 0; j < num_poly; j++) {
+      auto result_slice = result + j * params::degree / 2;
+      decompose_and_compress_next_polynomial(result_slice, j);
+    }
+  }
+
+  // Decomposes a single polynomial
+  __device__ void decompose_and_compress_next_polynomial(double2 *result,
+                                                         int j) {
+    if (j == 0)
+      current_level -= 1;
+
+    int tid = threadIdx.x;
+    auto state_slice = state + j * params::degree;
+    for (int i = 0; i < params::opt / 2; i++) {
+      T res_re = state_slice[tid] & mask_mod_b;
+      T res_im = state_slice[tid + params::degree / 2] & mask_mod_b;
+      state_slice[tid] >>= base_log;
+      state_slice[tid + params::degree / 2] >>= base_log;
+      T carry_re = ((res_re - 1ll) | state_slice[tid]) & res_re;
+      T carry_im =
+          ((res_im - 1ll) | state_slice[tid + params::degree / 2]) & res_im;
+      carry_re >>= (base_log - 1);
+      carry_im >>= (base_log - 1);
+      state_slice[tid] += carry_re;
+      state_slice[tid + params::degree / 2] += carry_im;
+      res_re -= carry_re << base_log;
+      res_im -= carry_im << base_log;
+
+      result[tid].x = (int32_t)res_re;
+      result[tid].y = (int32_t)res_im;
+
+      tid += params::degree / params::opt;
+    }
+    synchronize_threads_in_block();
+  }
+
+  // Decomposes a single polynomial
+  __device__ void
+  decompose_and_compress_next_polynomial_elements(double2 *result, int j) {
+    if (j == 0)
+      current_level -= 1;
+
+    int tid = threadIdx.x;
+    auto state_slice = state + j * params::degree;
+    for (int i = 0; i < params::opt / 2; i++) {
+      T res_re = state_slice[tid] & mask_mod_b;
+      T res_im = state_slice[tid + params::degree / 2] & mask_mod_b;
+      state_slice[tid] >>= base_log;
+      state_slice[tid + params::degree / 2] >>= base_log;
+      T carry_re = ((res_re - 1ll) | state_slice[tid]) & res_re;
+      T carry_im =
+          ((res_im - 1ll) | state_slice[tid + params::degree / 2]) & res_im;
+      carry_re >>= (base_log - 1);
+      carry_im >>= (base_log - 1);
+      state_slice[tid] += carry_re;
+      state_slice[tid + params::degree / 2] += carry_im;
+      res_re -= carry_re << base_log;
+      res_im -= carry_im << base_log;
+
+      result[i].x = (int32_t)res_re;
+      result[i].y = (int32_t)res_im;
+
+      tid += params::degree / params::opt;
+    }
+    synchronize_threads_in_block();
+  }
+
+  __device__ void decompose_and_compress_level(double2 *result, int level) {
+    for (int i = 0; i < level_count - level; i++)
+      decompose_and_compress_next(result);
+  }
+};
+
+template <typename T> class GadgetMatrixSingle {
+private:
+  uint32_t level_count;
+  uint32_t base_log;
+  uint32_t mask;
+  uint32_t halfbg;
+  T offset;
+
+public:
+  __device__ GadgetMatrixSingle(uint32_t base_log, uint32_t level_count)
+      : base_log(base_log), level_count(level_count) {
+    uint32_t bg = 1 << base_log;
+    this->halfbg = bg / 2;
+    this->mask = bg - 1;
+    T temp = 0;
+    for (int i = 0; i < this->level_count; i++) {
+      temp += 1ULL << (sizeof(T) * 8 - (i + 1) * this->base_log);
+    }
+    this->offset = temp * this->halfbg;
+  }
+
+  __device__ T decompose_one_level_single(T element, uint32_t level) {
+    T s = element + this->offset;
+    uint32_t decal = (sizeof(T) * 8 - (level + 1) * this->base_log);
+    T temp1 = (s >> decal) & this->mask;
+    return (T)(temp1 - this->halfbg);
+  }
+};
+
+template <typename Torus>
+__device__ Torus decompose_one(Torus &state, Torus mask_mod_b, int base_log) {
+  Torus res = state & mask_mod_b;
+  state >>= base_log;
+  Torus carry = ((res - 1ll) | state) & res;
+  carry >>= base_log - 1;
+  state += carry;
+  res -= carry << base_log;
+  return res;
+}
+
+#endif // CNCRT_CRPYTO_H
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ggsw.cuh
@@ -0,0 +1,74 @@
+#ifndef CNCRT_GGSW_CUH
+#define CNCRT_GGSW_CUH
+
+#include "device.h"
+#include "fft/bnsmfft.cuh"
+#include "polynomial/parameters.cuh"
+
+template <typename T, typename ST, class params, sharedMemDegree SMD>
+__global__ void device_batch_fft_ggsw_vector(double2 *dest, T *src,
+                                             int8_t *device_mem) {
+
+  extern __shared__ int8_t sharedmem[];
+  double2 *selected_memory;
+
+  if constexpr (SMD == FULLSM)
+    selected_memory = (double2 *)sharedmem;
+  else
+    selected_memory = (double2 *)device_mem[blockIdx.x * params::degree];
+
+  // Compression
+  int offset = blockIdx.x * blockDim.x;
+
+  int tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    ST x = src[(tid) + params::opt * offset];
+    ST y = src[(tid + params::degree / 2) + params::opt * offset];
+    selected_memory[tid].x = x / (double)std::numeric_limits<T>::max();
+    selected_memory[tid].y = y / (double)std::numeric_limits<T>::max();
+    tid += params::degree / params::opt;
+  }
+  synchronize_threads_in_block();
+
+  // Switch to the FFT space
+  NSMFFT_direct<HalfDegree<params>>(selected_memory);
+  synchronize_threads_in_block();
+
+  // Write the output to global memory
+  tid = threadIdx.x;
+#pragma unroll
+  for (int j = 0; j < params::opt / 2; j++) {
+    dest[tid + (params::opt >> 1) * offset] = selected_memory[tid];
+    tid += params::degree / params::opt;
+  }
+}
+
+/**
+ * Applies the FFT transform on sequence of GGSW ciphertexts already in the
+ * global memory
+ */
+template <typename T, typename ST, class params>
+void batch_fft_ggsw_vector(cuda_stream_t *stream, double2 *dest, T *src,
+                           int8_t *d_mem, uint32_t r, uint32_t glwe_dim,
+                           uint32_t polynomial_size, uint32_t level_count,
+                           uint32_t gpu_index, uint32_t max_shared_memory) {
+  cudaSetDevice(stream->gpu_index);
+
+  int shared_memory_size = sizeof(double) * polynomial_size;
+
+  int gridSize = r * (glwe_dim + 1) * (glwe_dim + 1) * level_count;
+  int blockSize = polynomial_size / params::opt;
+
+  if (max_shared_memory < shared_memory_size) {
+    device_batch_fft_ggsw_vector<T, ST, params, NOSM>
+        <<<gridSize, blockSize, 0, stream->stream>>>(dest, src, d_mem);
+  } else {
+    device_batch_fft_ggsw_vector<T, ST, params, FULLSM>
+        <<<gridSize, blockSize, shared_memory_size, stream->stream>>>(dest, src,
+                                                                      d_mem);
+  }
+  check_cuda_error(cudaGetLastError());
+}
+
+#endif // CNCRT_GGSW_CUH
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cu
@@ -0,0 +1,48 @@
+#include "keyswitch.cuh"
+#include "keyswitch.h"
+#include <cstdint>
+
+/* Perform keyswitch on a batch of 32 bits input LWE ciphertexts.
+ * Head out to the equivalent operation on 64 bits for more details.
+ */
+void cuda_keyswitch_lwe_ciphertext_vector_32(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
+    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
+    uint32_t level_count, uint32_t num_samples) {
+  cuda_keyswitch_lwe_ciphertext_vector(
+      stream, static_cast<uint32_t *>(lwe_array_out),
+      static_cast<uint32_t *>(lwe_output_indexes),
+      static_cast<uint32_t *>(lwe_array_in),
+      static_cast<uint32_t *>(lwe_input_indexes), static_cast<uint32_t *>(ksk),
+      lwe_dimension_in, lwe_dimension_out, base_log, level_count, num_samples);
+}
+
+/* Perform keyswitch on a batch of 64 bits input LWE ciphertexts.
+ *
+ * - `v_stream` is a void pointer to the Cuda stream to be used in the kernel
+ * launch
+ * - `gpu_index` is the index of the GPU to be used in the kernel launch
+ *  - lwe_array_out: output batch of num_samples keyswitched ciphertexts c =
+ * (a0,..an-1,b) where n is the output LWE dimension (lwe_dimension_out)
+ *  - lwe_array_in: input batch of num_samples LWE ciphertexts, containing
+ * lwe_dimension_in mask values + 1 body value
+ *  - ksk: the keyswitch key to be used in the operation
+ *  - base log: the log of the base used in the decomposition (should be the one
+ * used to create the ksk)
+ *
+ * This function calls a wrapper to a device kernel that performs the keyswitch
+ * 	- num_samples blocks of threads are launched
+ */
+void cuda_keyswitch_lwe_ciphertext_vector_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
+    void *lwe_array_in, void *lwe_input_indexes, void *ksk,
+    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
+    uint32_t level_count, uint32_t num_samples) {
+  cuda_keyswitch_lwe_ciphertext_vector(
+      stream, static_cast<uint64_t *>(lwe_array_out),
+      static_cast<uint64_t *>(lwe_output_indexes),
+      static_cast<uint64_t *>(lwe_array_in),
+      static_cast<uint64_t *>(lwe_input_indexes), static_cast<uint64_t *>(ksk),
+      lwe_dimension_in, lwe_dimension_out, base_log, level_count, num_samples);
+}
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/keyswitch.cuh
@@ -0,0 +1,144 @@
+#ifndef CNCRT_KS_CUH
+#define CNCRT_KS_CUH
+
+#include "device.h"
+#include "gadget.cuh"
+#include "polynomial/polynomial_math.cuh"
+#include "torus.cuh"
+#include <thread>
+#include <vector>
+
+template <typename Torus>
+__device__ Torus *get_ith_block(Torus *ksk, int i, int level,
+                                uint32_t lwe_dimension_out,
+                                uint32_t level_count) {
+  int pos = i * level_count * (lwe_dimension_out + 1) +
+            level * (lwe_dimension_out + 1);
+  Torus *ptr = &ksk[pos];
+  return ptr;
+}
+
+/*
+ * keyswitch kernel
+ * Each thread handles a piece of the following equation:
+ * $$GLWE_s2(\Delta.m+e) = (0,0,..,0,b) - \sum_{i=0,k-1} <Dec(a_i),
+ * (GLWE_s2(s1_i q/beta),..,GLWE(s1_i q/beta^l)>$$ where k is the dimension of
+ * the GLWE ciphertext. If the polynomial dimension in GLWE is > 1, this
+ * equation is solved for each polynomial coefficient. where Dec denotes the
+ * decomposition with base beta and l levels and the inner product is done
+ * between the decomposition of a_i and l GLWE encryptions of s1_i q/\beta^j,
+ * with j in [1,l] We obtain a GLWE encryption of Delta.m (with Delta the
+ * scaling factor) under key s2 instead of s1, with an increased noise
+ *
+ */
+template <typename Torus>
+__global__ void
+keyswitch(Torus *lwe_array_out, Torus *lwe_output_indexes, Torus *lwe_array_in,
+          Torus *lwe_input_indexes, Torus *ksk, uint32_t lwe_dimension_in,
+          uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
+          int lwe_lower, int lwe_upper, int cutoff) {
+  int tid = threadIdx.x;
+
+  extern __shared__ int8_t sharedmem[];
+
+  Torus *local_lwe_array_out = (Torus *)sharedmem;
+
+  auto block_lwe_array_in = get_chunk(
+      lwe_array_in, lwe_input_indexes[blockIdx.x], lwe_dimension_in + 1);
+  auto block_lwe_array_out = get_chunk(
+      lwe_array_out, lwe_output_indexes[blockIdx.x], lwe_dimension_out + 1);
+
+  auto gadget = GadgetMatrixSingle<Torus>(base_log, level_count);
+
+  int lwe_part_per_thd;
+  if (tid < cutoff) {
+    lwe_part_per_thd = lwe_upper;
+  } else {
+    lwe_part_per_thd = lwe_lower;
+  }
+  __syncthreads();
+
+  for (int k = 0; k < lwe_part_per_thd; k++) {
+    int idx = tid + k * blockDim.x;
+    local_lwe_array_out[idx] = 0;
+  }
+  __syncthreads();
+
+  if (tid == 0) {
+    local_lwe_array_out[lwe_dimension_out] =
+        block_lwe_array_in[lwe_dimension_in];
+  }
+
+  for (int i = 0; i < lwe_dimension_in; i++) {
+
+    __syncthreads();
+
+    Torus a_i =
+        round_to_closest_multiple(block_lwe_array_in[i], base_log, level_count);
+
+    Torus state = a_i >> (sizeof(Torus) * 8 - base_log * level_count);
+    Torus mask_mod_b = (1ll << base_log) - 1ll;
+
+    for (int j = 0; j < level_count; j++) {
+      auto ksk_block = get_ith_block(ksk, i, j, lwe_dimension_out, level_count);
+      Torus decomposed = decompose_one<Torus>(state, mask_mod_b, base_log);
+      for (int k = 0; k < lwe_part_per_thd; k++) {
+        int idx = tid + k * blockDim.x;
+        local_lwe_array_out[idx] -= (Torus)ksk_block[idx] * decomposed;
+      }
+    }
+  }
+
+  for (int k = 0; k < lwe_part_per_thd; k++) {
+    int idx = tid + k * blockDim.x;
+    block_lwe_array_out[idx] = local_lwe_array_out[idx];
+  }
+}
+
+/// assume lwe_array_in in the gpu
+template <typename Torus>
+__host__ void cuda_keyswitch_lwe_ciphertext_vector(
+    cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
+    Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *ksk,
+    uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
+    uint32_t level_count, uint32_t num_samples) {
+
+  cudaSetDevice(stream->gpu_index);
+  constexpr int ideal_threads = 128;
+
+  int lwe_dim = lwe_dimension_out + 1;
+  int lwe_lower, lwe_upper, cutoff;
+  if (lwe_dim % ideal_threads == 0) {
+    lwe_lower = lwe_dim / ideal_threads;
+    lwe_upper = lwe_dim / ideal_threads;
+    cutoff = 0;
+  } else {
+    int y =
+        ceil((double)lwe_dim / (double)ideal_threads) * ideal_threads - lwe_dim;
+    cutoff = ideal_threads - y;
+    lwe_lower = lwe_dim / ideal_threads;
+    lwe_upper = (int)ceil((double)lwe_dim / (double)ideal_threads);
+  }
+
+  int lwe_size_after = (lwe_dimension_out + 1) * num_samples;
+
+  int shared_mem = sizeof(Torus) * (lwe_dimension_out + 1);
+
+  cuda_memset_async(lwe_array_out, 0, sizeof(Torus) * lwe_size_after, stream);
+  check_cuda_error(cudaGetLastError());
+
+  dim3 grid(num_samples, 1, 1);
+  dim3 threads(ideal_threads, 1, 1);
+
+  //    cudaFuncSetAttribute(keyswitch<Torus>,
+  //                         cudaFuncAttributeMaxDynamicSharedMemorySize,
+  //                         shared_mem);
+
+  keyswitch<<<grid, threads, shared_mem, stream->stream>>>(
+      lwe_array_out, lwe_output_indexes, lwe_array_in, lwe_input_indexes, ksk,
+      lwe_dimension_in, lwe_dimension_out, base_log, level_count, lwe_lower,
+      lwe_upper, cutoff);
+  check_cuda_error(cudaGetLastError());
+}
+
+#endif
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh
@@ -0,0 +1,74 @@
+#ifndef CNCRT_TORUS_CUH
+#define CNCRT_TORUS_CUH
+
+#include "types/int128.cuh"
+#include <limits>
+
+template <typename T>
+__device__ inline void typecast_double_to_torus(double x, T &r) {
+  r = T(x);
+}
+
+template <>
+__device__ inline void typecast_double_to_torus<uint32_t>(double x,
+                                                          uint32_t &r) {
+  r = __double2uint_rn(x);
+}
+
+template <>
+__device__ inline void typecast_double_to_torus<uint64_t>(double x,
+                                                          uint64_t &r) {
+  // The ull intrinsic does not behave in the same way on all architectures and
+  // on some platforms this causes the cmux tree test to fail
+  // Hence the intrinsic is not used here
+  uint128 nnnn = make_uint128_from_float(x);
+  uint64_t lll = nnnn.lo_;
+  r = lll;
+}
+
+template <typename T>
+__device__ inline T round_to_closest_multiple(T x, uint32_t base_log,
+                                              uint32_t level_count) {
+  T shift = sizeof(T) * 8 - level_count * base_log;
+  T mask = 1ll << (shift - 1);
+  T b = (x & mask) >> (shift - 1);
+  T res = x >> shift;
+  res += b;
+  res <<= shift;
+  return res;
+}
+
+template <typename T>
+__device__ __forceinline__ void rescale_torus_element(T element, T &output,
+                                                      uint32_t log_shift) {
+  output =
+      round((double)element / (double(std::numeric_limits<T>::max()) + 1.0) *
+            (double)log_shift);
+}
+
+template <typename T>
+__device__ __forceinline__ T rescale_torus_element(T element,
+                                                   uint32_t log_shift) {
+  return round((double)element / (double(std::numeric_limits<T>::max()) + 1.0) *
+               (double)log_shift);
+}
+
+template <>
+__device__ __forceinline__ void
+rescale_torus_element<uint32_t>(uint32_t element, uint32_t &output,
+                                uint32_t log_shift) {
+  output =
+      round(__uint2double_rn(element) /
+            (__uint2double_rn(std::numeric_limits<uint32_t>::max()) + 1.0) *
+            __uint2double_rn(log_shift));
+}
+
+template <>
+__device__ __forceinline__ void
+rescale_torus_element<uint64_t>(uint64_t element, uint64_t &output,
+                                uint32_t log_shift) {
+  output = round(__ull2double_rn(element) /
+                 (__ull2double_rn(std::numeric_limits<uint64_t>::max()) + 1.0) *
+                 __uint2double_rn(log_shift));
+}
+#endif // CNCRT_TORUS_H
--- a/backends/tfhe-cuda-backend/cuda/src/device.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/device.cu
@@ -0,0 +1,350 @@
+#include "device.h"
+#include <cstdint>
+#include <cuda_runtime.h>
+
+/// Unsafe function to create a CUDA stream, must check first that GPU exists
+cuda_stream_t *cuda_create_stream(uint32_t gpu_index) {
+  cudaSetDevice(gpu_index);
+  cuda_stream_t *stream = new cuda_stream_t(gpu_index);
+  return stream;
+}
+
+/// Unsafe function to destroy CUDA stream, must check first the GPU exists
+int cuda_destroy_stream(cuda_stream_t *stream) {
+  stream->release();
+  return 0;
+}
+
+/// Unsafe function that will try to allocate even if gpu_index is invalid
+/// or if there's not enough memory. A safe wrapper around it must call
+/// cuda_check_valid_malloc() first
+void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
+  cudaSetDevice(gpu_index);
+  void *ptr;
+  cudaMalloc((void **)&ptr, size);
+  check_cuda_error(cudaGetLastError());
+
+  return ptr;
+}
+
+/// Allocates a size-byte array at the device memory. Tries to do it
+/// asynchronously.
+void *cuda_malloc_async(uint64_t size, cuda_stream_t *stream) {
+  cudaSetDevice(stream->gpu_index);
+  void *ptr;
+
+#ifndef CUDART_VERSION
+#error CUDART_VERSION Undefined!
+#elif (CUDART_VERSION >= 11020)
+  int support_async_alloc;
+  check_cuda_error(cudaDeviceGetAttribute(&support_async_alloc,
+                                          cudaDevAttrMemoryPoolsSupported,
+                                          stream->gpu_index));
+
+  if (support_async_alloc) {
+    check_cuda_error(cudaMallocAsync((void **)&ptr, size, stream->stream));
+  } else {
+    check_cuda_error(cudaMalloc((void **)&ptr, size));
+  }
+#else
+  check_cuda_error(cudaMalloc((void **)&ptr, size));
+#endif
+  return ptr;
+}
+
+/// Checks that allocation is valid
+/// 0: valid
+/// -1: invalid, not enough memory in device
+/// -2: invalid, gpu index doesn't exist
+int cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
+
+  if (gpu_index >= cuda_get_number_of_gpus()) {
+    // error code: invalid gpu_index
+    return -2;
+  }
+  cudaSetDevice(gpu_index);
+  size_t total_mem, free_mem;
+  cudaMemGetInfo(&free_mem, &total_mem);
+  if (size > free_mem) {
+    // error code: not enough memory
+    return -1;
+  }
+  return 0;
+}
+
+/// Returns
+///  -> 0 if Cooperative Groups is not supported.
+///  -> 1 otherwise
+int cuda_check_support_cooperative_groups() {
+  int cooperative_groups_supported = 0;
+  cudaDeviceGetAttribute(&cooperative_groups_supported,
+                         cudaDevAttrCooperativeLaunch, 0);
+
+  return cooperative_groups_supported > 0;
+}
+
+/// Tries to copy memory to the GPU asynchronously
+/// 0: success
+/// -1: error, invalid device pointer
+/// -2: error, gpu index doesn't exist
+/// -3: error, zero copy size
+int cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
+                             cuda_stream_t *stream) {
+  if (size == 0) {
+    // error code: zero copy size
+    return -3;
+  }
+
+  if (stream->gpu_index >= cuda_get_number_of_gpus()) {
+    // error code: invalid gpu_index
+    return -2;
+  }
+  cudaPointerAttributes attr;
+  cudaPointerGetAttributes(&attr, dest);
+  if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
+    // error code: invalid device pointer
+    return -1;
+  }
+
+  cudaSetDevice(stream->gpu_index);
+  check_cuda_error(
+      cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream->stream));
+  return 0;
+}
+
+/// Tries to copy memory to the GPU synchronously
+/// 0: success
+/// -1: error, invalid device pointer
+/// -2: error, gpu index doesn't exist
+/// -3: error, zero copy size
+int cuda_memcpy_to_gpu(void *dest, void *src, uint64_t size) {
+  if (size == 0) {
+    // error code: zero copy size
+    return -3;
+  }
+
+  cudaPointerAttributes attr;
+  cudaPointerGetAttributes(&attr, dest);
+  if (attr.type != cudaMemoryTypeDevice) {
+    // error code: invalid device pointer
+    return -1;
+  }
+
+  check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyHostToDevice));
+  return 0;
+}
+
+/// Tries to copy memory to the CPU synchronously
+/// 0: success
+/// -1: error, invalid device pointer
+/// -2: error, gpu index doesn't exist
+/// -3: error, zero copy size
+int cuda_memcpy_to_cpu(void *dest, void *src, uint64_t size) {
+  if (size == 0) {
+    // error code: zero copy size
+    return -3;
+  }
+
+  cudaPointerAttributes attr;
+  cudaPointerGetAttributes(&attr, src);
+  if (attr.type != cudaMemoryTypeDevice) {
+    // error code: invalid device pointer
+    return -1;
+  }
+
+  check_cuda_error(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToHost));
+  return 0;
+}
+
+/// Tries to copy memory within a GPU asynchronously
+/// 0: success
+/// -1: error, invalid device pointer
+/// -2: error, gpu index doesn't exist
+/// -3: error, zero copy size
+int cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size,
+                                 cuda_stream_t *stream) {
+  if (size == 0) {
+    // error code: zero copy size
+    return -3;
+  }
+
+  if (stream->gpu_index >= cuda_get_number_of_gpus()) {
+    // error code: invalid gpu_index
+    return -2;
+  }
+  cudaPointerAttributes attr_dest;
+  cudaPointerGetAttributes(&attr_dest, dest);
+  if (attr_dest.device != stream->gpu_index &&
+      attr_dest.type != cudaMemoryTypeDevice) {
+    // error code: invalid device pointer
+    return -1;
+  }
+  cudaPointerAttributes attr_src;
+  cudaPointerGetAttributes(&attr_src, src);
+  if (attr_src.device != stream->gpu_index &&
+      attr_src.type != cudaMemoryTypeDevice) {
+    // error code: invalid device pointer
+    return -1;
+  }
+  if (attr_src.device != attr_dest.device) {
+    // error code: different devices
+    return -1;
+  }
+
+  cudaSetDevice(stream->gpu_index);
+  check_cuda_error(cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice,
+                                   stream->stream));
+  return 0;
+}
+
+/// Synchronizes device
+/// 0: success
+/// -2: error, gpu index doesn't exist
+int cuda_synchronize_device(uint32_t gpu_index) {
+  if (gpu_index >= cuda_get_number_of_gpus()) {
+    // error code: invalid gpu_index
+    return -2;
+  }
+  cudaSetDevice(gpu_index);
+  cudaDeviceSynchronize();
+  return 0;
+}
+
+int cuda_memset_async(void *dest, uint64_t val, uint64_t size,
+                      cuda_stream_t *stream) {
+  if (size == 0) {
+    // error code: zero copy size
+    return -3;
+  }
+
+  if (stream->gpu_index >= cuda_get_number_of_gpus()) {
+    // error code: invalid gpu_index
+    return -2;
+  }
+  cudaPointerAttributes attr;
+  cudaPointerGetAttributes(&attr, dest);
+  if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
+    // error code: invalid device pointer
+    return -1;
+  }
+  cudaSetDevice(stream->gpu_index);
+  check_cuda_error(cudaMemsetAsync(dest, val, size, stream->stream));
+  return 0;
+}
+
+template <typename Torus>
+__global__ void cuda_set_value_kernel(Torus *array, Torus value, Torus n) {
+  int index = threadIdx.x + blockIdx.x * blockDim.x;
+  if (index < n)
+    array[index] = value;
+}
+
+template <typename Torus>
+void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value,
+                          Torus n) {
+  int block_size = 256;
+  int num_blocks = (n + block_size - 1) / block_size;
+
+  // Launch the kernel
+  cuda_set_value_kernel<<<num_blocks, block_size, 0, *stream>>>(d_array, value,
+                                                                n);
+}
+
+/// Explicitly instantiate cuda_set_value_async for 32 and 64 bits
+template void cuda_set_value_async(cudaStream_t *stream, uint64_t *d_array,
+                                   uint64_t value, uint64_t n);
+template void cuda_set_value_async(cudaStream_t *stream, uint32_t *d_array,
+                                   uint32_t value, uint32_t n);
+
+/// Tries to copy memory to the GPU asynchronously
+/// 0: success
+/// -1: error, invalid device pointer
+/// -2: error, gpu index doesn't exist
+/// -3: error, zero copy size
+int cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
+                             cuda_stream_t *stream) {
+  if (size == 0) {
+    // error code: zero copy size
+    return -3;
+  }
+
+  if (stream->gpu_index >= cuda_get_number_of_gpus()) {
+    // error code: invalid gpu_index
+    return -2;
+  }
+  cudaPointerAttributes attr;
+  cudaPointerGetAttributes(&attr, src);
+  if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
+    // error code: invalid device pointer
+    return -1;
+  }
+
+  cudaSetDevice(stream->gpu_index);
+  check_cuda_error(
+      cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream->stream));
+  return 0;
+}
+
+/// Return number of GPUs available
+int cuda_get_number_of_gpus() {
+  int num_gpus;
+  cudaGetDeviceCount(&num_gpus);
+  return num_gpus;
+}
+
+/// Drop a cuda array
+int cuda_drop(void *ptr, uint32_t gpu_index) {
+  if (gpu_index >= cuda_get_number_of_gpus()) {
+    // error code: invalid gpu_index
+    return -2;
+  }
+  cudaSetDevice(gpu_index);
+  check_cuda_error(cudaFree(ptr));
+  return 0;
+}
+
+/// Drop a cuda array. Tries to do it asynchronously
+int cuda_drop_async(void *ptr, cuda_stream_t *stream) {
+
+  cudaSetDevice(stream->gpu_index);
+#ifndef CUDART_VERSION
+#error CUDART_VERSION Undefined!
+#elif (CUDART_VERSION >= 11020)
+  int support_async_alloc;
+  check_cuda_error(cudaDeviceGetAttribute(&support_async_alloc,
+                                          cudaDevAttrMemoryPoolsSupported,
+                                          stream->gpu_index));
+
+  if (support_async_alloc) {
+    check_cuda_error(cudaFreeAsync(ptr, stream->stream));
+  } else {
+    check_cuda_error(cudaFree(ptr));
+  }
+#else
+  check_cuda_error(cudaFree(ptr));
+#endif
+  return 0;
+}
+
+/// Get the maximum size for the shared memory
+int cuda_get_max_shared_memory(uint32_t gpu_index) {
+  if (gpu_index >= cuda_get_number_of_gpus()) {
+    // error code: invalid gpu_index
+    return -2;
+  }
+  cudaSetDevice(gpu_index);
+  cudaDeviceProp prop;
+  cudaGetDeviceProperties(&prop, gpu_index);
+  int max_shared_memory = 0;
+  if (prop.major >= 6) {
+    max_shared_memory = prop.sharedMemPerMultiprocessor;
+  } else {
+    max_shared_memory = prop.sharedMemPerBlock;
+  }
+  return max_shared_memory;
+}
+
+int cuda_synchronize_stream(cuda_stream_t *stream) {
+  stream->synchronize();
+  return 0;
+}
--- a/backends/tfhe-cuda-backend/cuda/src/fft/bnsmfft.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/fft/bnsmfft.cuh
@@ -0,0 +1,725 @@
+#ifndef GPU_BOOTSTRAP_FFT_CUH
+#define GPU_BOOTSTRAP_FFT_CUH
+
+#include "polynomial/functions.cuh"
+#include "polynomial/parameters.cuh"
+#include "twiddles.cuh"
+#include "types/complex/operations.cuh"
+
+/*
+ * Direct negacyclic FFT:
+ *   - before the FFT the N real coefficients are stored into a
+ *     N/2 sized complex with the even coefficients in the real part
+ *     and the odd coefficients in the imaginary part. This is referred to
+ *     as the half-size FFT
+ *   - when calling BNSMFFT_direct for the forward negacyclic FFT of PBS,
+ *     opt is divided by 2 because the butterfly pattern is always applied
+ *     between pairs of coefficients
+ *   - instead of twisting each coefficient A_j before the FFT by
+ *     multiplying by the w^j roots of unity (aka twiddles, w=exp(-i pi /N)),
+ *     the FFT is modified, and for each level k of the FFT the twiddle:
+ *     w_j,k = exp(-i pi j/2^k)
+ *     is replaced with:
+ *     \zeta_j,k = exp(-i pi (2j-1)/2^k)
+ */
+template <class params> __device__ void NSMFFT_direct(double2 *A) {
+
+  /* We don't make bit reverse here, since twiddles are already reversed
+   *  Each thread is always in charge of "opt/2" pairs of coefficients,
+   *  which is why we always loop through N/2 by N/opt strides
+   *  The pragma unroll instruction tells the compiler to unroll the
+   *  full loop, which should increase performance
+   */
+
+  size_t tid = threadIdx.x;
+  size_t twid_id;
+  size_t i1, i2;
+  double2 u, v, w;
+  // level 1
+  // we don't make actual complex multiplication on level1 since we have only
+  // one twiddle, it's real and image parts are equal, so we can multiply
+  // it with simpler operations
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    i1 = tid;
+    i2 = tid + params::degree / 2;
+
+    u = A[i1];
+    v = A[i2] * (double2){0.707106781186547461715008466854,
+                          0.707106781186547461715008466854};
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 2
+  // from this level there are more than one twiddles and none of them has equal
+  // real and imag parts, so complete complex multiplication is needed
+  // for each level params::degree / 2^level represents number of coefficients
+  // inside divided chunk of specific level
+  //
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 4);
+    i1 = 2 * (params::degree / 4) * twid_id + (tid & (params::degree / 4 - 1));
+    i2 = i1 + params::degree / 4;
+
+    w = negtwiddles[twid_id + 2];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 3
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 8);
+    i1 = 2 * (params::degree / 8) * twid_id + (tid & (params::degree / 8 - 1));
+    i2 = i1 + params::degree / 8;
+
+    w = negtwiddles[twid_id + 4];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 4
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 16);
+    i1 =
+        2 * (params::degree / 16) * twid_id + (tid & (params::degree / 16 - 1));
+    i2 = i1 + params::degree / 16;
+
+    w = negtwiddles[twid_id + 8];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 5
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 32);
+    i1 =
+        2 * (params::degree / 32) * twid_id + (tid & (params::degree / 32 - 1));
+    i2 = i1 + params::degree / 32;
+
+    w = negtwiddles[twid_id + 16];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 6
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 64);
+    i1 =
+        2 * (params::degree / 64) * twid_id + (tid & (params::degree / 64 - 1));
+    i2 = i1 + params::degree / 64;
+
+    w = negtwiddles[twid_id + 32];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 7
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 128);
+    i1 = 2 * (params::degree / 128) * twid_id +
+         (tid & (params::degree / 128 - 1));
+    i2 = i1 + params::degree / 128;
+
+    w = negtwiddles[twid_id + 64];
+    u = A[i1];
+    v = A[i2] * w;
+
+    A[i1] += v;
+    A[i2] = u - v;
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // from level 8, we need to check size of params degree, because we support
+  // minimum actual polynomial size = 256,  when compressed size is halfed and
+  // minimum supported compressed size is 128, so we always need first 7
+  // levels of butterfy operation, since butterfly levels are hardcoded
+  // we need to check if polynomial size is big enough to require specific level
+  // of butterfly.
+  if constexpr (params::degree >= 256) {
+    // level 8
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 256);
+      i1 = 2 * (params::degree / 256) * twid_id +
+           (tid & (params::degree / 256 - 1));
+      i2 = i1 + params::degree / 256;
+
+      w = negtwiddles[twid_id + 128];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 512) {
+    // level 9
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 512);
+      i1 = 2 * (params::degree / 512) * twid_id +
+           (tid & (params::degree / 512 - 1));
+      i2 = i1 + params::degree / 512;
+
+      w = negtwiddles[twid_id + 256];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 1024) {
+    // level 10
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 1024);
+      i1 = 2 * (params::degree / 1024) * twid_id +
+           (tid & (params::degree / 1024 - 1));
+      i2 = i1 + params::degree / 1024;
+
+      w = negtwiddles[twid_id + 512];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 2048) {
+    // level 11
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 2048);
+      i1 = 2 * (params::degree / 2048) * twid_id +
+           (tid & (params::degree / 2048 - 1));
+      i2 = i1 + params::degree / 2048;
+
+      w = negtwiddles[twid_id + 1024];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 4096) {
+    // level 12
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 4096);
+      i1 = 2 * (params::degree / 4096) * twid_id +
+           (tid & (params::degree / 4096 - 1));
+      i2 = i1 + params::degree / 4096;
+
+      w = negtwiddles[twid_id + 2048];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  // compressed size = 8192 is actual polynomial size = 16384.
+  // from this size, twiddles can't fit in constant memory,
+  // so from here, butterfly operation access device memory.
+  if constexpr (params::degree >= 8192) {
+    // level 13
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 8192);
+      i1 = 2 * (params::degree / 8192) * twid_id +
+           (tid & (params::degree / 8192 - 1));
+      i2 = i1 + params::degree / 8192;
+
+      w = negtwiddles13[twid_id];
+      u = A[i1];
+      v = A[i2] * w;
+
+      A[i1] += v;
+      A[i2] = u - v;
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+}
+
+/*
+ * negacyclic inverse fft
+ */
+template <class params> __device__ void NSMFFT_inverse(double2 *A) {
+
+  /* We don't make bit reverse here, since twiddles are already reversed
+   *  Each thread is always in charge of "opt/2" pairs of coefficients,
+   *  which is why we always loop through N/2 by N/opt strides
+   *  The pragma unroll instruction tells the compiler to unroll the
+   *  full loop, which should increase performance
+   */
+
+  size_t tid = threadIdx.x;
+  size_t twid_id;
+  size_t i1, i2;
+  double2 u, w;
+
+  // divide input by compressed polynomial size
+  tid = threadIdx.x;
+  for (size_t i = 0; i < params::opt; ++i) {
+    A[tid] /= params::degree;
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // none of the twiddles have equal real and imag part, so
+  // complete complex multiplication has to be done
+  // here we have more than one twiddle
+  // mapping in backward fft is reversed
+  // butterfly operation is started from last level
+
+  // compressed size = 8192 is actual polynomial size = 16384.
+  // twiddles for this size can't fit in constant memory so
+  // butterfly operation for this level acess device memory to fetch
+  // twiddles
+  if constexpr (params::degree >= 8192) {
+    // level 13
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 8192);
+      i1 = 2 * (params::degree / 8192) * twid_id +
+           (tid & (params::degree / 8192 - 1));
+      i2 = i1 + params::degree / 8192;
+
+      w = negtwiddles13[twid_id];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 4096) {
+    // level 12
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 4096);
+      i1 = 2 * (params::degree / 4096) * twid_id +
+           (tid & (params::degree / 4096 - 1));
+      i2 = i1 + params::degree / 4096;
+
+      w = negtwiddles[twid_id + 2048];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 2048) {
+    // level 11
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 2048);
+      i1 = 2 * (params::degree / 2048) * twid_id +
+           (tid & (params::degree / 2048 - 1));
+      i2 = i1 + params::degree / 2048;
+
+      w = negtwiddles[twid_id + 1024];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 1024) {
+    // level 10
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 1024);
+      i1 = 2 * (params::degree / 1024) * twid_id +
+           (tid & (params::degree / 1024 - 1));
+      i2 = i1 + params::degree / 1024;
+
+      w = negtwiddles[twid_id + 512];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 512) {
+    // level 9
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 512);
+      i1 = 2 * (params::degree / 512) * twid_id +
+           (tid & (params::degree / 512 - 1));
+      i2 = i1 + params::degree / 512;
+
+      w = negtwiddles[twid_id + 256];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  if constexpr (params::degree >= 256) {
+    // level 8
+    tid = threadIdx.x;
+#pragma unroll
+    for (size_t i = 0; i < params::opt / 2; ++i) {
+      twid_id = tid / (params::degree / 256);
+      i1 = 2 * (params::degree / 256) * twid_id +
+           (tid & (params::degree / 256 - 1));
+      i2 = i1 + params::degree / 256;
+
+      w = negtwiddles[twid_id + 128];
+      u = A[i1] - A[i2];
+
+      A[i1] += A[i2];
+      A[i2] = u * conjugate(w);
+
+      tid += params::degree / params::opt;
+    }
+    __syncthreads();
+  }
+
+  // below level 8, we don't need to check size of params degree, because we
+  // support minimum actual polynomial size = 256,  when compressed size is
+  // halfed and minimum supported compressed size is 128, so we always need
+  // last 7 levels of butterfy operation, since butterfly levels are hardcoded
+  // we don't need to check if polynomial size is big enough to require
+  // specific level of butterfly.
+  // level 7
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 128);
+    i1 = 2 * (params::degree / 128) * twid_id +
+         (tid & (params::degree / 128 - 1));
+    i2 = i1 + params::degree / 128;
+
+    w = negtwiddles[twid_id + 64];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 6
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 64);
+    i1 =
+        2 * (params::degree / 64) * twid_id + (tid & (params::degree / 64 - 1));
+    i2 = i1 + params::degree / 64;
+
+    w = negtwiddles[twid_id + 32];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 5
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 32);
+    i1 =
+        2 * (params::degree / 32) * twid_id + (tid & (params::degree / 32 - 1));
+    i2 = i1 + params::degree / 32;
+
+    w = negtwiddles[twid_id + 16];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 4
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 16);
+    i1 =
+        2 * (params::degree / 16) * twid_id + (tid & (params::degree / 16 - 1));
+    i2 = i1 + params::degree / 16;
+
+    w = negtwiddles[twid_id + 8];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 3
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 8);
+    i1 = 2 * (params::degree / 8) * twid_id + (tid & (params::degree / 8 - 1));
+    i2 = i1 + params::degree / 8;
+
+    w = negtwiddles[twid_id + 4];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 2
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 4);
+    i1 = 2 * (params::degree / 4) * twid_id + (tid & (params::degree / 4 - 1));
+    i2 = i1 + params::degree / 4;
+
+    w = negtwiddles[twid_id + 2];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // level 1
+  tid = threadIdx.x;
+#pragma unroll
+  for (size_t i = 0; i < params::opt / 2; ++i) {
+    twid_id = tid / (params::degree / 2);
+    i1 = 2 * (params::degree / 2) * twid_id + (tid & (params::degree / 2 - 1));
+    i2 = i1 + params::degree / 2;
+
+    w = negtwiddles[twid_id + 1];
+    u = A[i1] - A[i2];
+
+    A[i1] += A[i2];
+    A[i2] = u * conjugate(w);
+
+    tid += params::degree / params::opt;
+  }
+  __syncthreads();
+}
+
+/*
+ * global batch fft
+ * does fft in half size
+ * unrolling half size fft result in half size + 1 elements
+ * this function must be called with actual degree
+ * function takes as input already compressed input
+ */
+template <class params, sharedMemDegree SMD>
+__global__ void batch_NSMFFT(double2 *d_input, double2 *d_output,
+                             double2 *buffer) {
+  extern __shared__ double2 sharedMemoryFFT[];
+  double2 *fft = (SMD == NOSM) ? &buffer[blockIdx.x * params::degree / 2]
+                               : sharedMemoryFFT;
+  int tid = threadIdx.x;
+
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    fft[tid] = d_input[blockIdx.x * (params::degree / 2) + tid];
+    tid = tid + params::degree / params::opt;
+  }
+  __syncthreads();
+  NSMFFT_direct<HalfDegree<params>>(fft);
+  __syncthreads();
+
+  tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    d_output[blockIdx.x * (params::degree / 2) + tid] = fft[tid];
+    tid = tid + params::degree / params::opt;
+  }
+}
+
+/*
+ * global batch polynomial multiplication
+ * only used for fft tests
+ * d_input1 and d_output must not have the same pointer
+ * d_input1 can be modified inside the function
+ */
+template <class params, sharedMemDegree SMD>
+__global__ void batch_polynomial_mul(double2 *d_input1, double2 *d_input2,
+                                     double2 *d_output, double2 *buffer) {
+  extern __shared__ double2 sharedMemoryFFT[];
+  double2 *fft = (SMD == NOSM) ? &buffer[blockIdx.x * params::degree / 2]
+                               : sharedMemoryFFT;
+
+  // Move first polynomial into shared memory(if possible otherwise it will
+  // be moved in device buffer)
+  int tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    fft[tid] = d_input1[blockIdx.x * (params::degree / 2) + tid];
+    tid = tid + params::degree / params::opt;
+  }
+
+  // Perform direct negacyclic fourier transform
+  __syncthreads();
+  NSMFFT_direct<HalfDegree<params>>(fft);
+  __syncthreads();
+
+  // Put the result of direct fft inside input1
+  tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    d_input1[blockIdx.x * (params::degree / 2) + tid] = fft[tid];
+    tid = tid + params::degree / params::opt;
+  }
+  __syncthreads();
+
+  // Move first polynomial into shared memory(if possible otherwise it will
+  // be moved in device buffer)
+  tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    fft[tid] = d_input2[blockIdx.x * (params::degree / 2) + tid];
+    tid = tid + params::degree / params::opt;
+  }
+
+  // Perform direct negacyclic fourier transform on the second polynomial
+  __syncthreads();
+  NSMFFT_direct<HalfDegree<params>>(fft);
+  __syncthreads();
+
+  // calculate pointwise multiplication inside fft buffer
+  tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    fft[tid] *= d_input1[blockIdx.x * (params::degree / 2) + tid];
+    tid = tid + params::degree / params::opt;
+  }
+
+  // Perform backward negacyclic fourier transform
+  __syncthreads();
+  NSMFFT_inverse<HalfDegree<params>>(fft);
+  __syncthreads();
+
+  // copy results in output buffer
+  tid = threadIdx.x;
+#pragma unroll
+  for (int i = 0; i < params::opt / 2; i++) {
+    d_output[blockIdx.x * (params::degree / 2) + tid] = fft[tid];
+    tid = tid + params::degree / params::opt;
+  }
+}
+
+#endif // GPU_BOOTSTRAP_FFT_CUH
--- a/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu
--- a/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cuh
@@ -0,0 +1,13 @@
+#ifndef GPU_BOOTSTRAP_TWIDDLES_CUH
+#define GPU_BOOTSTRAP_TWIDDLES_CUH
+
+/*
+ * 'negtwiddles' are stored in constant memory for faster access times
+ * because of it's limitied size, only twiddles for up to 2^12 polynomial size
+ * can be stored there, twiddles for 2^13 are stored in device memory
+ * 'negtwiddles13'
+ */
+
+extern __constant__ double2 negtwiddles[4096];
+extern __device__ double2 negtwiddles13[4096];
+#endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cu
@@ -0,0 +1,51 @@
+#include "integer/bitwise_ops.cuh"
+
+void scratch_cuda_integer_radix_bitop_kb_64(
+    cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t big_lwe_dimension,
+    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
+    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
+    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
+    uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
+    bool allocate_gpu_memory) {
+
+  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
+                          big_lwe_dimension, small_lwe_dimension, ks_level,
+                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
+                          message_modulus, carry_modulus);
+
+  scratch_cuda_integer_radix_bitop_kb<uint64_t>(
+      stream, (int_bitop_buffer<uint64_t> **)mem_ptr, lwe_ciphertext_count,
+      params, op_type, allocate_gpu_memory);
+}
+
+void cuda_bitop_integer_radix_ciphertext_kb_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_1,
+    void *lwe_array_2, int8_t *mem_ptr, void *bsk, void *ksk,
+    uint32_t lwe_ciphertext_count) {
+
+  host_integer_radix_bitop_kb<uint64_t>(
+      stream, static_cast<uint64_t *>(lwe_array_out),
+      static_cast<uint64_t *>(lwe_array_1),
+      static_cast<uint64_t *>(lwe_array_2),
+      (int_bitop_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
+      lwe_ciphertext_count);
+}
+
+void cuda_bitnot_integer_radix_ciphertext_kb_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
+    int8_t *mem_ptr, void *bsk, void *ksk, uint32_t lwe_ciphertext_count) {
+
+  host_integer_radix_bitnot_kb<uint64_t>(
+      stream, static_cast<uint64_t *>(lwe_array_out),
+      static_cast<uint64_t *>(lwe_array_in),
+      (int_bitop_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
+      lwe_ciphertext_count);
+}
+
+void cleanup_cuda_integer_bitop(cuda_stream_t *stream, int8_t **mem_ptr_void) {
+
+  int_bitop_buffer<uint64_t> *mem_ptr =
+      (int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
+  mem_ptr->release(stream);
+}
--- a/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/bitwise_ops.cuh
@@ -0,0 +1,51 @@
+#ifndef CUDA_INTEGER_BITWISE_OPS_CUH
+#define CUDA_INTEGER_BITWISE_OPS_CUH
+
+#include "crypto/keyswitch.cuh"
+#include "device.h"
+#include "integer.cuh"
+#include "integer.h"
+#include "pbs/bootstrap_low_latency.cuh"
+#include "pbs/bootstrap_multibit.cuh"
+#include "polynomial/functions.cuh"
+#include "utils/kernel_dimensions.cuh"
+#include <omp.h>
+
+template <typename Torus>
+__host__ void
+host_integer_radix_bitop_kb(cuda_stream_t *stream, Torus *lwe_array_out,
+                            Torus *lwe_array_1, Torus *lwe_array_2,
+                            int_bitop_buffer<Torus> *mem_ptr, void *bsk,
+                            Torus *ksk, uint32_t num_radix_blocks) {
+
+  auto lut = mem_ptr->lut;
+
+  integer_radix_apply_bivariate_lookup_table_kb<Torus>(
+      stream, lwe_array_out, lwe_array_1, lwe_array_2, bsk, ksk,
+      num_radix_blocks, lut);
+}
+
+template <typename Torus>
+__host__ void
+host_integer_radix_bitnot_kb(cuda_stream_t *stream, Torus *lwe_array_out,
+                             Torus *lwe_array_in,
+                             int_bitop_buffer<Torus> *mem_ptr, void *bsk,
+                             Torus *ksk, uint32_t num_radix_blocks) {
+
+  auto lut = mem_ptr->lut;
+
+  integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      stream, lwe_array_out, lwe_array_in, bsk, ksk, num_radix_blocks, lut);
+}
+
+template <typename Torus>
+__host__ void scratch_cuda_integer_radix_bitop_kb(
+    cuda_stream_t *stream, int_bitop_buffer<Torus> **mem_ptr,
+    uint32_t num_radix_blocks, int_radix_params params, BITOP_TYPE op,
+    bool allocate_gpu_memory) {
+
+  *mem_ptr = new int_bitop_buffer<Torus>(stream, op, params, num_radix_blocks,
+                                         allocate_gpu_memory);
+}
+
+#endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
@@ -0,0 +1,45 @@
+#include "integer/cmux.cuh"
+
+void scratch_cuda_integer_radix_cmux_kb_64(
+    cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
+    uint32_t polynomial_size, uint32_t big_lwe_dimension,
+    uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
+    uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
+    uint32_t lwe_ciphertext_count, uint32_t message_modulus,
+    uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
+
+  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
+                          big_lwe_dimension, small_lwe_dimension, ks_level,
+                          ks_base_log, pbs_level, pbs_base_log, grouping_factor,
+                          message_modulus, carry_modulus);
+
+  std::function<uint64_t(uint64_t)> predicate_lut_f =
+      [](uint64_t x) -> uint64_t { return x == 1; };
+
+  scratch_cuda_integer_radix_cmux_kb(
+      stream, (int_cmux_buffer<uint64_t> **)mem_ptr, predicate_lut_f,
+      lwe_ciphertext_count, params, allocate_gpu_memory);
+}
+
+void cuda_cmux_integer_radix_ciphertext_kb_64(
+    cuda_stream_t *stream, void *lwe_array_out, void *lwe_condition,
+    void *lwe_array_true, void *lwe_array_false, int8_t *mem_ptr, void *bsk,
+    void *ksk, uint32_t lwe_ciphertext_count) {
+
+  host_integer_radix_cmux_kb<uint64_t>(
+      stream, static_cast<uint64_t *>(lwe_array_out),
+      static_cast<uint64_t *>(lwe_condition),
+      static_cast<uint64_t *>(lwe_array_true),
+      static_cast<uint64_t *>(lwe_array_false),
+      (int_cmux_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
+
+      lwe_ciphertext_count);
+}
+
+void cleanup_cuda_integer_radix_cmux(cuda_stream_t *stream,
+                                     int8_t **mem_ptr_void) {
+
+  int_cmux_buffer<uint64_t> *mem_ptr =
+      (int_cmux_buffer<uint64_t> *)(*mem_ptr_void);
+  mem_ptr->release(stream);
+}
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cuh
@@ -0,0 +1,100 @@
+#ifndef CUDA_INTEGER_CMUX_CUH
+#define CUDA_INTEGER_CMUX_CUH
+
+#include "integer.cuh"
+#include <omp.h>
+
+template <typename Torus>
+__host__ void zero_out_if(cuda_stream_t *stream, Torus *lwe_array_out,
+                          Torus *lwe_array_input, Torus *lwe_condition,
+                          int_zero_out_if_buffer<Torus> *mem_ptr,
+                          int_radix_lut<Torus> *predicate, void *bsk,
+                          Torus *ksk, uint32_t num_radix_blocks) {
+  auto params = mem_ptr->params;
+
+  int big_lwe_size = params.big_lwe_dimension + 1;
+
+  // Left message is shifted
+  int num_blocks = 0, num_threads = 0;
+  int num_entries = (params.big_lwe_dimension + 1);
+  getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
+
+  // We can't use integer_radix_apply_bivariate_lookup_table_kb since the
+  // second operand is fixed
+  auto tmp_lwe_array_input = mem_ptr->tmp;
+  for (int i = 0; i < num_radix_blocks; i++) {
+    auto lwe_array_out_block = tmp_lwe_array_input + i * big_lwe_size;
+    auto lwe_array_input_block = lwe_array_input + i * big_lwe_size;
+
+    device_pack_bivariate_blocks<<<num_blocks, num_threads, 0,
+                                   stream->stream>>>(
+        lwe_array_out_block, lwe_array_input_block, lwe_condition,
+        predicate->lwe_indexes, params.big_lwe_dimension,
+        params.message_modulus, 1);
+    check_cuda_error(cudaGetLastError());
+  }
+
+  integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      stream, lwe_array_out, tmp_lwe_array_input, bsk, ksk, num_radix_blocks,
+      predicate);
+}
+
+template <typename Torus>
+__host__ void
+host_integer_radix_cmux_kb(cuda_stream_t *stream, Torus *lwe_array_out,
+                           Torus *lwe_condition, Torus *lwe_array_true,
+                           Torus *lwe_array_false,
+                           int_cmux_buffer<Torus> *mem_ptr, void *bsk,
+                           Torus *ksk, uint32_t num_radix_blocks) {
+
+  auto params = mem_ptr->params;
+
+  // Since our CPU threads will be working on different streams we shall assert
+  // the work in the main stream is completed
+  stream->synchronize();
+  auto true_stream = mem_ptr->zero_if_true_buffer->local_stream;
+  auto false_stream = mem_ptr->zero_if_false_buffer->local_stream;
+
+#pragma omp parallel sections
+  {
+    // Both sections may be executed in parallel
+#pragma omp section
+    {
+      auto mem_true = mem_ptr->zero_if_true_buffer;
+      zero_out_if(true_stream, mem_ptr->tmp_true_ct, lwe_array_true,
+                  lwe_condition, mem_true, mem_ptr->inverted_predicate_lut, bsk,
+                  ksk, num_radix_blocks);
+    }
+#pragma omp section
+    {
+      auto mem_false = mem_ptr->zero_if_false_buffer;
+      zero_out_if(false_stream, mem_ptr->tmp_false_ct, lwe_array_false,
+                  lwe_condition, mem_false, mem_ptr->predicate_lut, bsk, ksk,
+                  num_radix_blocks);
+    }
+  }
+  cuda_synchronize_stream(true_stream);
+  cuda_synchronize_stream(false_stream);
+
+  // If the condition was true, true_ct will have kept its value and false_ct
+  // will be 0 If the condition was false, true_ct will be 0 and false_ct will
+  // have kept its value
+  auto added_cts = mem_ptr->tmp_true_ct;
+  host_addition(stream, added_cts, mem_ptr->tmp_true_ct, mem_ptr->tmp_false_ct,
+                params.big_lwe_dimension, num_radix_blocks);
+
+  integer_radix_apply_univariate_lookup_table_kb<Torus>(
+      stream, lwe_array_out, added_cts, bsk, ksk, num_radix_blocks,
+      mem_ptr->message_extract_lut);
+}
+
+template <typename Torus>
+__host__ void scratch_cuda_integer_radix_cmux_kb(
+    cuda_stream_t *stream, int_cmux_buffer<Torus> **mem_ptr,
+    std::function<Torus(Torus)> predicate_lut_f, uint32_t num_radix_blocks,
+    int_radix_params params, bool allocate_gpu_memory) {
+
+  *mem_ptr = new int_cmux_buffer<Torus>(stream, predicate_lut_f, params,
+                                        num_radix_blocks, allocate_gpu_memory);
+}
+#endif
--- a/Show More
+++ b/Show More