feat(ntt): impl crt/ntt backend

feat(core): add GLWE linear algebra primitives
- add appropriate tests and doctest
2026-01-11 07:38:08 -05:00 · 2023-09-19 14:57:09 +02:00 · 2023-09-19 11:41:16 +02:00 · 2023-09-19 09:19:47 +02:00 · 2023-09-18 17:19:48 +02:00 · 2023-09-18 14:35:06 +02:00
319 changed files with 20823 additions and 5854 deletions
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -5,13 +5,3 @@ failure-output = "final"
 fail-fast = false
 retries = 0
 slow-timeout = "5m"
-
-
-[[profile.ci.overrides]]
-filter = 'test(/^.*param_message_1_carry_[567]_ks_pbs$/) or test(/^.*param_message_4_carry_4_ks_pbs$/)'
-retries = 3
-
-[[profile.ci.overrides]]
-filter = 'test(/^.*param_message_[23]_carry_[23]_ks_pbs$/)'
-retries = 1
-
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -51,7 +51,7 @@ jobs:
          echo "Fork git sha: ${{ inputs.fork_git_sha }}"

      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: ${{ inputs.fork_repo }}
          ref: ${{ inputs.fork_git_sha }}
@@ -66,6 +66,10 @@ jobs:
          toolchain: stable
          default: true

+      - name: Run concrete-csprng tests
+        run: |
+          make test_concrete_csprng
+
      - name: Run core tests
        run: |
          AVX512_SUPPORT=ON make test_core_crypto
@@ -109,7 +113,7 @@ jobs:
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -50,7 +50,7 @@ jobs:
          echo "Fork git sha: ${{ inputs.fork_git_sha }}"

      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: ${{ inputs.fork_repo }}
          ref: ${{ inputs.fork_git_sha }}
@@ -76,7 +76,7 @@ jobs:
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/aws_tfhe_multi_bit_tests.yml
+++ b/.github/workflows/aws_tfhe_multi_bit_tests.yml
@@ -50,7 +50,7 @@ jobs:
          echo "Fork git sha: ${{ inputs.fork_git_sha }}"

      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: ${{ inputs.fork_repo }}
          ref: ${{ inputs.fork_git_sha }}
@@ -80,7 +80,7 @@ jobs:
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -50,7 +50,7 @@ jobs:
          echo "Fork git sha: ${{ inputs.fork_git_sha }}"

      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: ${{ inputs.fork_repo }}
          ref: ${{ inputs.fork_git_sha }}
@@ -65,6 +65,10 @@ jobs:
          toolchain: stable
          default: true

+      - name: Run concrete-csprng tests
+        run: |
+          make test_concrete_csprng
+
      - name: Run core tests
        run: |
          AVX512_SUPPORT=ON make test_core_crypto
@@ -100,7 +104,7 @@ jobs:
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -50,7 +50,7 @@ jobs:
          echo "Fork git sha: ${{ inputs.fork_git_sha }}"

      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: ${{ inputs.fork_repo }}
          ref: ${{ inputs.fork_git_sha }}
@@ -77,7 +77,7 @@ jobs:
      - name: Slack Notification
        if: ${{ always() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/boolean_benchmark.yml
+++ b/.github/workflows/boolean_benchmark.yml
@@ -43,7 +43,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0

@@ -88,13 +88,13 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_boolean
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: zama-ai/slab
          path: slab
@@ -117,7 +117,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/cargo_build.yml
+++ b/.github/workflows/cargo_build.yml
@@ -21,12 +21,16 @@ jobs:
      fail-fast: false

    steps:
-      - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+      - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac

      - name: Run pcc checks
        run: |
          make pcc

+      - name: Build concrete-csprng
+        run: |
+          make build_concrete_csprng
+
      - name: Build Release core
        run: |
          make build_core AVX512_SUPPORT=ON
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -0,0 +1,111 @@
+name: Code Coverage
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
+        type: string
+
+jobs:
+  code-coverage:
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
+      cancel-in-progress: true
+    runs-on: ${{ inputs.runner_name }}
+    steps:
+      # Step used for log purpose.
+      - name: Instance configuration used
+        run: |
+          echo "ID: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Fork repo: ${{ inputs.fork_repo }}"
+          echo "Fork git sha: ${{ inputs.fork_git_sha }}"
+
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        with:
+          toolchain: stable
+          default: true
+
+      - name: Check for file changes
+        id: changed-files
+        uses: tj-actions/changed-files@6ee9cdc5816333acda68e01cf12eedc619e28316
+        with:
+          files_yaml: |
+            tfhe:
+              - tfhe/src/**
+            concrete_csprng:
+              - concrete-csprng/src/**
+
+      - name: Generate Keys
+        run: |
+          make GEN_KEY_CACHE_COVERAGE_ONLY=TRUE gen_key_cache
+
+      - name: Run boolean coverage
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
+        run: |
+          make test_boolean_cov
+
+      - name: Run shortint coverage
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
+        run: |
+          make test_shortint_cov
+
+      - name: Upload tfhe coverage to Codecov
+        uses: codecov/codecov-action@eaaf4bedf32dbdc6b720b63067d99c4d77d6047d
+        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          directory: ./coverage/
+          fail_ci_if_error: true
+          files: shortint/cobertura.xml,boolean/cobertura.xml
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Code coverage finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/csprng_randomness_testing.yml
+++ b/.github/workflows/csprng_randomness_testing.yml
@@ -0,0 +1,74 @@
+name: CSPRNG randomness testing Workflow
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+    # All the inputs are provided by Slab
+    inputs:
+      instance_id:
+        description: "AWS instance ID"
+        type: string
+      instance_image_id:
+        description: "AWS instance AMI ID"
+        type: string
+      instance_type:
+        description: "AWS instance product type"
+        type: string
+      runner_name:
+        description: "Action runner name"
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      fork_repo:
+        description: 'Name of forked repo as user/repo'
+        type: string
+      fork_git_sha:
+        description: 'Git SHA to checkout from fork'
+        type: string
+
+jobs:
+  csprng-randomness-teting:
+    name: CSPRNG randomness testing
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
+      cancel-in-progress: true
+    runs-on: ${{ inputs.runner_name }}
+
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
+        with:
+          repository: ${{ inputs.fork_repo }}
+          ref: ${{ inputs.fork_git_sha }}
+
+      - name: Set up home
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install latest stable
+        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
+        with:
+          toolchain: stable
+          default: true
+
+      - name: Dieharder randomness test suite
+        run: |
+          make dieharder_csprng
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "concrete-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/integer_benchmark.yml
+++ b/.github/workflows/integer_benchmark.yml
@@ -44,7 +44,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0

@@ -69,7 +69,7 @@ jobs:
            parse_integer_benches

      - name: Upload csv results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_csv_integer
          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
@@ -90,13 +90,13 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_integer
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: zama-ai/slab
          path: slab
@@ -119,7 +119,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/integer_full_benchmark.yml
+++ b/.github/workflows/integer_full_benchmark.yml
@@ -30,11 +30,12 @@ jobs:
    name: Execute integer benchmarks for all operations flavor
    runs-on: ${{ github.event.inputs.runner_name }}
    if: ${{ !cancelled() }}
+    continue-on-error: true
    strategy:
      max-parallel: 1
      matrix:
        command: [ integer, integer_multi_bit]
-        op_flavor: [ default, default_scalar, smart, smart_scalar, smart_parallelized, smart_scalar_parallelized, unchecked, unchecked_scalar, misc ]
+        op_flavor: [ default, default_comp, default_scalar, default_scalar_comp, smart, smart_comp, smart_scalar, smart_parallelized, smart_parallelized_comp, smart_scalar_parallelized, unchecked, unchecked_comp, unchecked_scalar, unchecked_scalar_comp, misc ]
    steps:
      - name: Instance configuration used
        run: |
@@ -44,7 +45,7 @@ jobs:
          echo "Request ID: ${{ inputs.request_id }}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0

@@ -66,7 +67,7 @@ jobs:
          override: true

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: zama-ai/slab
          path: slab
@@ -90,7 +91,7 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -117,7 +118,7 @@ jobs:
    steps:
      - name: Notify
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/integer_multi_bit_benchmark.yml
+++ b/.github/workflows/integer_multi_bit_benchmark.yml
@@ -44,7 +44,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0

@@ -69,7 +69,7 @@ jobs:
            parse_integer_benches

      - name: Upload csv results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_csv_integer
          path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
@@ -90,13 +90,13 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_integer
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: zama-ai/slab
          path: slab
@@ -119,7 +119,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/m1_tests.yml
+++ b/.github/workflows/m1_tests.yml
@@ -28,7 +28,7 @@ jobs:
    runs-on: ["self-hosted", "m1mac"]

    steps:
-      - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+      - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac

      - name: Install latest stable
        uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
@@ -40,6 +40,10 @@ jobs:
        run: |
          make pcc

+      - name: Build concrete-csprng
+        run: |
+          make build_concrete_csprng
+
      - name: Build Release core
        run: |
          make build_core
@@ -64,6 +68,10 @@ jobs:
        run: |
          make build_c_api

+      - name: Run concrete-csprng tests
+        run: |
+          make test_concrete_csprng
+
      - name: Run core tests
        run: |
          make test_core_crypto
@@ -124,7 +132,7 @@ jobs:
      - name: Slack Notification
        if: ${{ needs.cargo-builds.result != 'skipped' }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ needs.cargo-builds.result }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release.yml
+++ b/.github/workflows/make_release.yml
@@ -30,7 +30,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0

@@ -49,7 +49,7 @@ jobs:

      - name: Publish web package
        if: ${{ inputs.push_web_package }}
-        uses: JS-DevTools/npm-publish@5a85faf05d2ade2d5b6682bfe5359915d5159c6c
+        uses: JS-DevTools/npm-publish@fe72237be0920f7a0cafd6a966c9b929c9466e9b
        with:
          token: ${{ secrets.NPM_TOKEN }}
          package: tfhe/pkg/package.json
@@ -65,7 +65,7 @@ jobs:

      - name: Publish Node package
        if: ${{ inputs.push_node_package }}
-        uses: JS-DevTools/npm-publish@5a85faf05d2ade2d5b6682bfe5359915d5159c6c
+        uses: JS-DevTools/npm-publish@fe72237be0920f7a0cafd6a966c9b929c9466e9b
        with:
          token: ${{ secrets.NPM_TOKEN }}
          package: tfhe/pkg/package.json
@@ -74,11 +74,11 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
-          SLACK_MESSAGE: "Integer benchmarks failed. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "tfhe release failed: (${{ env.ACTION_RUN_URL }})"
          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/make_release_concrete_csprng.yml
+++ b/.github/workflows/make_release_concrete_csprng.yml
@@ -0,0 +1,42 @@
+# Publish new release of tfhe-rs on various platform.
+name: Publish concrete-csprng release
+
+on:
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: "Dry-run"
+        type: boolean
+        default: true
+
+env:
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  publish_release:
+    name: Publish concrete-csprng Release
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
+        with:
+          fetch-depth: 0
+
+      - name: Publish crate.io package
+        env:
+          CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
+        run: |
+          cargo publish -p concrete-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "concrete-csprng release failed: (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -0,0 +1,51 @@
+# Perform a security check on all the cryptographic parameters set
+name: Parameters curves security check
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+
+on:
+  push:
+    branches:
+      - "main"
+  workflow_dispatch:
+
+jobs:
+  params-curves-security-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
+
+      - name: Checkout lattice-estimator
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
+        with:
+          repository: malb/lattice-estimator
+          path: lattice_estimator
+
+      - name: Install Sage
+        run: |
+          sudo apt update
+          sudo apt install -y sagemath
+
+      - name: Collect parameters
+        run: |
+          make write_params_to_file
+
+      - name: Perform security check
+        run: |
+          PYTHONPATH=lattice_estimator sage ci/lattice_estimator.sage
+
+      - name: Slack Notification
+        if: ${{ always() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+          SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+          SLACK_MESSAGE: "Security check for parameters finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/.github/workflows/pbs_benchmark.yml
+++ b/.github/workflows/pbs_benchmark.yml
@@ -43,7 +43,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0

@@ -78,13 +78,13 @@ jobs:
          --throughput

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_pbs
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: zama-ai/slab
          path: slab
@@ -107,7 +107,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/shortint_benchmark.yml
+++ b/.github/workflows/shortint_benchmark.yml
@@ -43,7 +43,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0

@@ -88,13 +88,13 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_shortint
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: zama-ai/slab
          path: slab
@@ -117,7 +117,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/shortint_full_benchmark.yml
+++ b/.github/workflows/shortint_full_benchmark.yml
@@ -43,7 +43,7 @@ jobs:
          echo "Request ID: ${{ inputs.request_id }}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0

@@ -65,7 +65,7 @@ jobs:
          override: true

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: zama-ai/slab
          path: slab
@@ -104,7 +104,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
          path: ${{ env.RESULTS_FILENAME }}
@@ -131,7 +131,7 @@ jobs:
    steps:
      - name: Notify
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/start_benchmarks.yml
+++ b/.github/workflows/start_benchmarks.yml
@@ -42,13 +42,13 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@a96679dfee2a1e64b1db5a210c0ffaf1f2cb24ce
+        uses: tj-actions/changed-files@6ee9cdc5816333acda68e01cf12eedc619e28316
        with:
          files_yaml: |
            common_benches:
@@ -85,7 +85,7 @@ jobs:
              - .github/workflows/wasm_client_benchmark.yml

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: zama-ai/slab
          path: slab
--- a/.github/workflows/start_full_benchmarks.yml
+++ b/.github/workflows/start_full_benchmarks.yml
@@ -1,11 +1,10 @@
-# Start all benchmark jobs on Slab CI bot.
-name: Start all benchmarks
+# Start all benchmark jobs, including full shortint and integer, on Slab CI bot.
+name: Start full suite benchmarks

 on:
-# TODO schedule event would be activated later, once the cost of running full benchmarks would be assessed
-#  schedule:
-#    # Job will be triggered each Saturday at 1a.m.
-#    - cron: '0 1 * * 6'
+  schedule:
+    # Job will be triggered each Saturday at 1a.m.
+    - cron: '0 1 * * 6'
  workflow_dispatch:

 jobs:
@@ -17,12 +16,12 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout tfhe-rs
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: zama-ai/slab
          path: slab
--- a/.github/workflows/sync_on_push.yml
+++ b/.github/workflows/sync_on_push.yml
@@ -13,11 +13,11 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0
      - name: Save repo
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: repo-archive
          path: '.'
--- a/.github/workflows/trigger_aws_tests_on_pr.yml
+++ b/.github/workflows/trigger_aws_tests_on_pr.yml
@@ -12,6 +12,16 @@ jobs:
    permissions:
      pull-requests: write
    steps:
+      - name: Get current labels
+        uses: snnaplab/get-labels-action@f426df40304808ace3b5282d4f036515f7609576
+
+      - name: Remove approved label
+        if: ${{ github.event_name == 'pull_request' && contains(fromJSON(env.LABELS), 'approved') }}
+        uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          labels: approved
+
      - name: Launch fast tests
        if: ${{ github.event_name == 'pull_request' }}
        uses: mshick/add-pr-comment@a65df5f64fc741e91c59b8359a4bc56e57aaf5b1
@@ -20,8 +30,17 @@ jobs:
          message: |
            @slab-ci cpu_fast_test

+      - name: Add approved label
+        uses: actions-ecosystem/action-add-labels@18f1af5e3544586314bbe15c0273249c770b2daf
+        if: ${{ github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && !contains(fromJSON(env.LABELS), 'approved') }}
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          labels: approved
+
+      # PR label 'approved' presence is checked to avoid running the full test suite several times
+      # in case of multiple approvals without new commits in between.
      - name: Launch full tests suite
-        if: ${{ github.event_name == 'pull_request_review' && github.event.review.state == 'approved' }}
+        if: ${{ github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && !contains(fromJSON(env.LABELS), 'approved') }}
        uses: mshick/add-pr-comment@a65df5f64fc741e91c59b8359a4bc56e57aaf5b1
        with:
          allow-repeats: true
@@ -32,3 +51,4 @@ jobs:
            @slab-ci cpu_integer_test
            @slab-ci cpu_multi_bit_test
            @slab-ci cpu_wasm_test
+            @slab-ci csprng_randomness_testing
--- a/.github/workflows/wasm_client_benchmark.yml
+++ b/.github/workflows/wasm_client_benchmark.yml
@@ -43,7 +43,7 @@ jobs:
          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"

      - name: Checkout tfhe-rs repo with tags
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          fetch-depth: 0

@@ -89,13 +89,13 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
+        uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32
        with:
          name: ${{ github.sha }}_wasm
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
-        uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9
+        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac
        with:
          repository: zama-ai/slab
          path: slab
@@ -118,7 +118,7 @@ jobs:
      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
-        uses: rtCamp/action-slack-notify@12e36fc18b0689399306c2e0b3e0f2978b7f1ee7
+        uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,9 @@ target/
 # Some of our bench outputs
 /tfhe/benchmarks_parameters
 **/*.csv
+
+# dieharder run log
+dieharder_run.log
+
+# Coverage reports
+./coverage/
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [workspace]
 resolver = "2"
-members = ["tfhe", "tasks", "apps/trivium"]
+members = ["tfhe", "tasks", "apps/trivium", "concrete-csprng"]

 [profile.bench]
 lto = "fat"
--- a/100
+++ b/100
@@ -3,8 +3,7 @@ OS:=$(shell uname)
 RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
 CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
 TARGET_ARCH_FEATURE:=$(shell ./scripts/get_arch_feature.sh)
-RS_BUILD_TOOLCHAIN:=$(shell \
-	( (echo $(TARGET_ARCH_FEATURE) | grep -q x86) && echo stable) || echo $(RS_CHECK_TOOLCHAIN))
+RS_BUILD_TOOLCHAIN:=stable
 CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
 CARGO_PROFILE?=release
 MIN_RUST_VERSION:=$(shell grep rust-version tfhe/Cargo.toml | cut -d '=' -f 2 | xargs)
@@ -12,10 +11,12 @@ AVX512_SUPPORT?=OFF
 WASM_RUSTFLAGS:=
 BIG_TESTS_INSTANCE?=FALSE
 GEN_KEY_CACHE_MULTI_BIT_ONLY?=FALSE
+GEN_KEY_CACHE_COVERAGE_ONLY?=FALSE
 PARSE_INTEGER_BENCH_CSV_FILE?=tfhe_rs_integer_benches.csv
 FAST_TESTS?=FALSE
 FAST_BENCH?=FALSE
 BENCH_OP_FLAVOR?=DEFAULT
+COVERAGE_EXCLUDED_FILES = tfhe/benches/*,apps/trivium/src/*,tfhe/examples/*,tasks/src/*
 # This is done to avoid forgetting it, we still precise the RUSTFLAGS in the commands to be able to
 # copy paste the command in the terminal and change them if required without forgetting the flags
 export RUSTFLAGS?=-C target-cpu=native
@@ -32,6 +33,12 @@ else
 		MULTI_BIT_ONLY=
 endif

+ifeq ($(GEN_KEY_CACHE_COVERAGE_ONLY),TRUE)
+		COVERAGE_ONLY=--coverage-only
+else
+		COVERAGE_ONLY=
+endif
+
 # Variables used only for regex_engine example
 REGEX_STRING?=''
 REGEX_PATTERN?=''
@@ -80,6 +87,21 @@ install_node:
 	$(SHELL) -i -c 'nvm install node' || \
 	( echo "Unable to install node, unknown error." && exit 1 )

+.PHONY: install_dieharder # Install dieharder for apt distributions or macOS
+install_dieharder:
+	@dieharder -h > /dev/null 2>&1 || \
+	if [[ "$(OS)" == "Linux" ]]; then \
+		sudo apt update && sudo apt install -y dieharder; \
+	elif [[ "$(OS)" == "Darwin" ]]; then\
+		brew install dieharder; \
+	fi || ( echo "Unable to install dieharder, unknown error." && exit 1 )
+
+.PHONY: install_tarpaulin # Install tarpaulin to perform code coverage
+install_tarpaulin: install_rs_build_toolchain
+	@cargo tarpaulin --version > /dev/null 2>&1 || \
+	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cargo-tarpaulin --locked || \
+	( echo "Unable to install cargo tarpaulin, unknown error." && exit 1 )
+
 .PHONY: fmt # Format rust code
 fmt: install_rs_check_toolchain
 	cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt
@@ -138,25 +160,38 @@ clippy_tasks:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
 		-p tasks -- --no-deps -D warnings

+.PHONY: clippy_trivium # Run clippy lints on Trivium app
+clippy_trivium: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy -p tfhe-trivium \
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
+		-p tfhe -- --no-deps -D warnings
+
 .PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
 clippy_all_targets:
 	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
 		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache \
 		-p tfhe -- --no-deps -D warnings

+.PHONY: clippy_concrete_csprng # Run clippy lints on concrete-csprng
+clippy_concrete_csprng:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
+		--features=$(TARGET_ARCH_FEATURE) \
+		-p concrete-csprng -- --no-deps -D warnings
+
 .PHONY: clippy_all # Run all clippy targets
 clippy_all: clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets clippy_c_api \
-clippy_js_wasm_api clippy_tasks clippy_core
+clippy_js_wasm_api clippy_tasks clippy_core clippy_concrete_csprng clippy_trivium

 .PHONY: clippy_fast # Run main clippy targets
-clippy_fast: clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core
+clippy_fast: clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core \
+clippy_concrete_csprng

 .PHONY: gen_key_cache # Run the script to generate keys and cache them for shortint tests
 gen_key_cache: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
 		--example generates_test_keys \
-		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe -- \
-		$(MULTI_BIT_ONLY)
+		--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache -p tfhe -- \
+		$(MULTI_BIT_ONLY) $(COVERAGE_ONLY)

 .PHONY: build_core # Build core_crypto without experimental features
 build_core: install_rs_build_toolchain install_rs_check_toolchain
@@ -199,7 +234,7 @@ build_tfhe_full: install_rs_build_toolchain
 .PHONY: build_c_api # Build the C API for boolean, shortint and integer
 build_c_api: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
+		--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api, \
 		-p tfhe

 .PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
@@ -231,6 +266,11 @@ build_node_js_api: install_rs_build_toolchain install_wasm_pack
 		wasm-pack build --release --target=nodejs \
 		-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api

+.PHONY: build_concrete_csprng # Build concrete_csprng
+build_concrete_csprng: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-csprng --all-targets
+
 .PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
 test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
@@ -245,6 +285,14 @@ test_boolean: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
 		--features=$(TARGET_ARCH_FEATURE),boolean -p tfhe -- boolean::

+.PHONY: test_boolean_cov # Run the tests of the boolean module with code coverage
+test_boolean_cov: install_rs_check_toolchain install_tarpaulin
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
+		--out Xml --output-dir coverage/boolean --line --engine Llvm --timeout 500 \
+		--exclude-files $(COVERAGE_EXCLUDED_FILES) \
+		--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,__coverage \
+		-p tfhe -- boolean::
+
 .PHONY: test_c_api_rs # Run the rust tests for the C API
 test_c_api_rs: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
@@ -278,6 +326,14 @@ test_shortint: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
 		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe -- shortint::

+.PHONY: test_shortint_cov # Run the tests of the shortint module with code coverage
+test_shortint_cov: install_rs_check_toolchain install_tarpaulin
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
+		--out Xml --output-dir coverage/shortint --line --engine Llvm --timeout 500 \
+		--exclude-files $(COVERAGE_EXCLUDED_FILES) \
+		--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,__coverage \
+		-p tfhe -- shortint::
+
 .PHONY: test_integer_ci # Run the tests for integer ci
 test_integer_ci: install_rs_build_toolchain install_cargo_nextest
 	BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
@@ -327,14 +383,17 @@ test_examples: test_sha256_bool test_regex_engine
 .PHONY: test_trivium # Run tests for trivium
 test_trivium: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		trivium --features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
-		-- --test-threads=1
+		-p tfhe-trivium -- --test-threads=1 trivium::

 .PHONY: test_kreyvium # Run tests for kreyvium
 test_kreyvium: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-		kreyvium --features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
-		-- --test-threads=1
+		-p tfhe-trivium -- --test-threads=1 kreyvium::
+
+.PHONY: test_concrete_csprng # Run concrete-csprng tests
+test_concrete_csprng:
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
+		--features=$(TARGET_ARCH_FEATURE) -p concrete-csprng

 .PHONY: doc # Build rust doc
 doc: install_rs_check_toolchain
@@ -412,6 +471,10 @@ no_tfhe_typo:
 no_dbg_log:
 	@./scripts/no_dbg_calls.sh

+.PHONY: dieharder_csprng # Run the dieharder test suite on our CSPRNG implementation
+dieharder_csprng: install_dieharder build_concrete_csprng
+	./scripts/dieharder_test.sh
+
 #
 # Benchmarks
 #
@@ -438,6 +501,15 @@ bench_shortint: install_rs_check_toolchain
 	--bench shortint-bench \
 	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,$(AVX512_FEATURE) -p tfhe

+.PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
+bench_shortint_multi_bit: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
+	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	--bench shortint-bench \
+	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,$(AVX512_FEATURE) -p tfhe --
+
+
 .PHONY: bench_boolean # Run benchmarks for boolean
 bench_boolean: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
@@ -495,6 +567,12 @@ parse_wasm_benchmarks: install_rs_check_toolchain
 	--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
 	-- web_wasm_parallel_tests/test/benchmark_results

+.PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
+write_params_to_file: install_rs_check_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
+	--example write_params_to_file \
+	--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache
+
 #
 # Real use case examples
 #
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64-un
 ```toml
 tfhe = { version = "*", features = ["boolean", "shortint", "integer", "aarch64-unix"] }
 ```
-Note: users with ARM devices must use `TFHE-rs` by compiling using the `nightly` toolchain.
+Note: users with ARM devices must compile `TFHE-rs` using a stable toolchain with version >= 1.72.


 + For x86_64-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) 
@@ -92,10 +92,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
    // On the server side:
    set_server_key(server_keys);

-    // Clear equivalent computations: 1344 * 8 = 10752
+    // Clear equivalent computations: 1344 * 5 = 6720
    let encrypted_res_mul = &encrypted_a * &encrypted_b;

-    // Clear equivalent computations: 1344 >> 8 = 42
+    // Clear equivalent computations: 1344 >> 5 = 42
    encrypted_a = &encrypted_res_mul >> &encrypted_b;

    // Clear equivalent computations: let casted_a = a as u8;
--- a/apps/trivium/src/kreyvium/kreyvium.rs
+++ b/apps/trivium/src/kreyvium/kreyvium.rs
@@ -80,9 +80,9 @@ impl KreyviumStream<FheBool> {

        // Initialization of Kreyvium registers: a has the secret key, b the input vector,
        // and c a few ones.
-        let mut a_register = [false; 93].map(|x| FheBool::encrypt_trivial(x));
-        let mut b_register = [false; 84].map(|x| FheBool::encrypt_trivial(x));
-        let mut c_register = [false; 111].map(|x| FheBool::encrypt_trivial(x));
+        let mut a_register = [false; 93].map(FheBool::encrypt_trivial);
+        let mut b_register = [false; 84].map(FheBool::encrypt_trivial);
+        let mut c_register = [false; 111].map(FheBool::encrypt_trivial);

        for i in 0..93 {
            a_register[i] = key[128 - 93 + i].clone();
@@ -99,7 +99,7 @@ impl KreyviumStream<FheBool> {

        key.reverse();
        iv.reverse();
-        let iv = iv.map(|x| FheBool::encrypt_trivial(x));
+        let iv = iv.map(FheBool::encrypt_trivial);

        unset_server_key();
        KreyviumStream::<FheBool>::new_from_registers(
@@ -149,7 +149,7 @@ where
    }

    /// Computes one turn of the stream, updating registers and outputting the new bit.
-    pub fn next(&mut self) -> T {
+    pub fn next_bool(&mut self) -> T {
        match &self.fhe_key {
            Some(sk) => set_server_key(sk.clone()),
            None => (),
--- a/apps/trivium/src/kreyvium/kreyvium_byte.rs
+++ b/apps/trivium/src/kreyvium/kreyvium_byte.rs
@@ -54,18 +54,15 @@ impl KreyviumStreamByte<u8> {
        let mut c_byte_reg = [0u8; 14];

        // Copy key bits into a register
-        for b in 0..12 {
-            a_byte_reg[b] = key_bytes[b + 4];
-        }
+        a_byte_reg.copy_from_slice(&key_bytes[4..]);
+
        // Copy iv bits into a register
-        for b in 0..11 {
-            b_byte_reg[b] = iv_bytes[b + 5];
-        }
+        b_byte_reg.copy_from_slice(&iv_bytes[5..]);
+
        // Copy a lot of ones in the c register
        c_byte_reg[0] = 252;
-        for b in 1..8 {
-            c_byte_reg[b] = 255;
-        }
+        c_byte_reg[1..8].fill(255);
+
        // Copy iv bits in the c register
        c_byte_reg[8] = (iv_bytes[0] << 4) | 31;
        for b in 9..14 {
@@ -100,23 +97,22 @@ impl KreyviumStreamByte<FheUint8> {

        // Initialization of Kreyvium registers: a has the secret key, b the input vector,
        // and c a few ones.
-        let mut a_byte_reg = [0u8; 12].map(|x| FheUint8::encrypt_trivial(x));
-        let mut b_byte_reg = [0u8; 11].map(|x| FheUint8::encrypt_trivial(x));
-        let mut c_byte_reg = [0u8; 14].map(|x| FheUint8::encrypt_trivial(x));
+        let mut a_byte_reg = [0u8; 12].map(FheUint8::encrypt_trivial);
+        let mut b_byte_reg = [0u8; 11].map(FheUint8::encrypt_trivial);
+        let mut c_byte_reg = [0u8; 14].map(FheUint8::encrypt_trivial);

        // Copy key bits into a register
-        for b in 0..12 {
-            a_byte_reg[b] = key_bytes[b + 4].clone();
-        }
+        a_byte_reg.clone_from_slice(&key_bytes[4..]);
+
        // Copy iv bits into a register
        for b in 0..11 {
            b_byte_reg[b] = FheUint8::encrypt_trivial(iv_bytes[b + 5]);
        }
        // Copy a lot of ones in the c register
        c_byte_reg[0] = FheUint8::encrypt_trivial(252u8);
-        for b in 1..8 {
-            c_byte_reg[b] = FheUint8::encrypt_trivial(255u8);
-        }
+
+        c_byte_reg[1..8].fill_with(|| FheUint8::encrypt_trivial(255u8));
+
        // Copy iv bits in the c register
        c_byte_reg[8] = FheUint8::encrypt_trivial((&iv_bytes[0] << 4u8) | 31u8);
        for b in 9..14 {
@@ -292,6 +288,6 @@ where

 impl KreyviumStreamByte<FheUint8> {
    pub fn get_server_key(&self) -> &ServerKey {
-        &self.fhe_key.as_ref().unwrap()
+        self.fhe_key.as_ref().unwrap()
    }
 }
--- a/apps/trivium/src/kreyvium/kreyvium_shortint.rs
+++ b/apps/trivium/src/kreyvium/kreyvium_shortint.rs
@@ -75,7 +75,7 @@ impl KreyviumStreamShortint {
    }

    /// Computes one turn of the stream, updating registers and outputting the new bit.
-    pub fn next(&mut self) -> Ciphertext {
+    pub fn next_ct(&mut self) -> Ciphertext {
        let [o, a, b, c] = self.get_output_and_values(0);

        self.a.push(a);
@@ -149,7 +149,7 @@ impl KreyviumStreamShortint {
                            .unchecked_add_assign(&mut new_c, c5);
                        self.internal_server_key
                            .unchecked_add_assign(&mut new_c, &temp_b);
-                        self.internal_server_key.clear_carry_assign(&mut new_c);
+                        self.internal_server_key.message_extract_assign(&mut new_c);
                        new_c
                    },
                    || {
--- a/apps/trivium/src/kreyvium/mod.rs
+++ b/apps/trivium/src/kreyvium/mod.rs
@@ -1,3 +1,4 @@
+#[allow(clippy::module_inception)]
 mod kreyvium;
 pub use kreyvium::KreyviumStream;

--- a/apps/trivium/src/kreyvium/test.rs
+++ b/apps/trivium/src/kreyvium/test.rs
@@ -56,7 +56,7 @@ fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
            _ => (),
        };
    }
-    return hexadecimal;
+    hexadecimal
 }

 fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
@@ -65,7 +65,7 @@ fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
    for test in a {
        hexadecimal.push_str(&format!("{:02X?}", test));
    }
-    return hexadecimal;
+    hexadecimal
 }

 fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
@@ -73,7 +73,7 @@ fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
    for test in a {
        hexadecimal.push_str(&format!("{:016X?}", test));
    }
-    return hexadecimal;
+    hexadecimal
 }

 #[test]
@@ -86,7 +86,7 @@ fn kreyvium_test_1() {

    let mut vec = Vec::<bool>::with_capacity(64);
    while vec.len() < 64 {
-        vec.push(kreyvium.next());
+        vec.push(kreyvium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -105,7 +105,7 @@ fn kreyvium_test_2() {

    let mut vec = Vec::<bool>::with_capacity(64);
    while vec.len() < 64 {
-        vec.push(kreyvium.next());
+        vec.push(kreyvium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -124,7 +124,7 @@ fn kreyvium_test_3() {

    let mut vec = Vec::<bool>::with_capacity(64);
    while vec.len() < 64 {
-        vec.push(kreyvium.next());
+        vec.push(kreyvium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -161,7 +161,7 @@ fn kreyvium_test_4() {

    let mut vec = Vec::<bool>::with_capacity(64);
    while vec.len() < 64 {
-        vec.push(kreyvium.next());
+        vec.push(kreyvium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
--- a/apps/trivium/src/static_deque/mod.rs
+++ b/apps/trivium/src/static_deque/mod.rs
@@ -1,3 +1,4 @@
+#[allow(clippy::module_inception)]
 mod static_deque;
 pub use static_deque::StaticDeque;
 mod static_byte_deque;
--- a/apps/trivium/src/static_deque/static_byte_deque.rs
+++ b/apps/trivium/src/static_deque/static_byte_deque.rs
@@ -77,7 +77,7 @@ where
        }

        let byte_next: &T = &self.deque[i / 8 + 1];
-        return (byte << bit_idx) | (byte_next >> (8 - bit_idx as u8));
+        (byte << bit_idx) | (byte_next >> (8 - bit_idx))
    }
 }

@@ -101,7 +101,7 @@ mod tests {
        assert!(deque.bit(7) == 0);

        // second youngest: 128
-        assert!(deque.bit(8 + 0) == 0);
+        assert!(deque.bit(8) == 0);
        assert!(deque.bit(8 + 1) == 0);
        assert!(deque.bit(8 + 2) == 0);
        assert!(deque.bit(8 + 3) == 0);
@@ -111,7 +111,7 @@ mod tests {
        assert!(deque.bit(8 + 7) > 0);

        // oldest: 64
-        assert!(deque.bit(16 + 0) == 0);
+        assert!(deque.bit(16) == 0);
        assert!(deque.bit(16 + 1) == 0);
        assert!(deque.bit(16 + 2) == 0);
        assert!(deque.bit(16 + 3) == 0);
--- a/apps/trivium/src/trivium/mod.rs
+++ b/apps/trivium/src/trivium/mod.rs
@@ -1,5 +1,5 @@
-mod trivium;
-pub use trivium::TriviumStream;
+mod trivium_bool;
+pub use trivium_bool::TriviumStream;

 mod trivium_byte;
 pub use trivium_byte::TriviumStreamByte;
--- a/apps/trivium/src/trivium/test.rs
+++ b/apps/trivium/src/trivium/test.rs
@@ -56,7 +56,7 @@ fn get_hexadecimal_string_from_lsb_first_stream(a: Vec<bool>) -> String {
            _ => (),
        };
    }
-    return hexadecimal;
+    hexadecimal
 }

 fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
@@ -65,7 +65,7 @@ fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
    for test in a {
        hexadecimal.push_str(&format!("{:02X?}", test));
    }
-    return hexadecimal;
+    hexadecimal
 }

 fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
@@ -73,7 +73,7 @@ fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
    for test in a {
        hexadecimal.push_str(&format!("{:016X?}", test));
    }
-    return hexadecimal;
+    hexadecimal
 }

 #[test]
@@ -89,7 +89,7 @@ fn trivium_test_1() {

    let mut vec = Vec::<bool>::with_capacity(512 * 8);
    while vec.len() < 512 * 8 {
-        vec.push(trivium.next());
+        vec.push(trivium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -114,7 +114,7 @@ fn trivium_test_2() {

    let mut vec = Vec::<bool>::with_capacity(512 * 8);
    while vec.len() < 512 * 8 {
-        vec.push(trivium.next());
+        vec.push(trivium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -139,7 +139,7 @@ fn trivium_test_3() {

    let mut vec = Vec::<bool>::with_capacity(512 * 8);
    while vec.len() < 512 * 8 {
-        vec.push(trivium.next());
+        vec.push(trivium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
@@ -182,7 +182,7 @@ fn trivium_test_4() {

    let mut vec = Vec::<bool>::with_capacity(131072 * 8);
    while vec.len() < 131072 * 8 {
-        vec.push(trivium.next());
+        vec.push(trivium.next_bool());
    }

    let hexadecimal = get_hexadecimal_string_from_lsb_first_stream(vec);
--- a/apps/trivium/src/trivium/trivium_bool.rs
+++ b/apps/trivium/src/trivium/trivium_bool.rs
@@ -66,9 +66,9 @@ impl TriviumStream<FheBool> {

        // Initialization of Trivium registers: a has the secret key, b the input vector,
        // and c a few ones.
-        let mut a_register = [false; 93].map(|x| FheBool::encrypt_trivial(x));
-        let mut b_register = [false; 84].map(|x| FheBool::encrypt_trivial(x));
-        let mut c_register = [false; 111].map(|x| FheBool::encrypt_trivial(x));
+        let mut a_register = [false; 93].map(FheBool::encrypt_trivial);
+        let mut b_register = [false; 84].map(FheBool::encrypt_trivial);
+        let mut c_register = [false; 111].map(FheBool::encrypt_trivial);

        for i in 0..80 {
            a_register[93 - 80 + i] = key[i].clone();
@@ -121,7 +121,7 @@ where
    }

    /// Computes one turn of the stream, updating registers and outputting the new bit.
-    pub fn next(&mut self) -> T {
+    pub fn next_bool(&mut self) -> T {
        match &self.fhe_key {
            Some(sk) => set_server_key(sk.clone()),
            None => (),
--- a/apps/trivium/src/trivium/trivium_byte.rs
+++ b/apps/trivium/src/trivium/trivium_byte.rs
@@ -81,9 +81,9 @@ impl TriviumStreamByte<FheUint8> {

        // Initialization of Trivium registers: a has the secret key, b the input vector,
        // and c a few ones.
-        let mut a_byte_reg = [0u8; 12].map(|x| FheUint8::encrypt_trivial(x));
-        let mut b_byte_reg = [0u8; 11].map(|x| FheUint8::encrypt_trivial(x));
-        let mut c_byte_reg = [0u8; 14].map(|x| FheUint8::encrypt_trivial(x));
+        let mut a_byte_reg = [0u8; 12].map(FheUint8::encrypt_trivial);
+        let mut b_byte_reg = [0u8; 11].map(FheUint8::encrypt_trivial);
+        let mut c_byte_reg = [0u8; 14].map(FheUint8::encrypt_trivial);

        for i in 0..10 {
            a_byte_reg[12 - 10 + i] = key[i].clone();
@@ -236,6 +236,6 @@ where

 impl TriviumStreamByte<FheUint8> {
    pub fn get_server_key(&self) -> &ServerKey {
-        &self.fhe_key.as_ref().unwrap()
+        self.fhe_key.as_ref().unwrap()
    }
 }
--- a/apps/trivium/src/trivium/trivium_shortint.rs
+++ b/apps/trivium/src/trivium/trivium_shortint.rs
@@ -63,7 +63,7 @@ impl TriviumStreamShortint {
    }

    /// Computes one turn of the stream, updating registers and outputting the new bit.
-    pub fn next(&mut self) -> Ciphertext {
+    pub fn next_ct(&mut self) -> Ciphertext {
        let [o, a, b, c] = self.get_output_and_values(0);

        self.a.push(a);
@@ -113,7 +113,7 @@ impl TriviumStreamShortint {
                            .unchecked_add_assign(&mut new_a, a5);
                        self.internal_server_key
                            .unchecked_add_assign(&mut new_a, &temp_c);
-                        self.internal_server_key.clear_carry_assign(&mut new_a);
+                        self.internal_server_key.message_extract_assign(&mut new_a);
                        new_a
                    },
                    || {
@@ -122,7 +122,7 @@ impl TriviumStreamShortint {
                            .unchecked_add_assign(&mut new_b, b5);
                        self.internal_server_key
                            .unchecked_add_assign(&mut new_b, &temp_a);
-                        self.internal_server_key.clear_carry_assign(&mut new_b);
+                        self.internal_server_key.message_extract_assign(&mut new_b);
                        new_b
                    },
                )
@@ -135,7 +135,7 @@ impl TriviumStreamShortint {
                            .unchecked_add_assign(&mut new_c, c5);
                        self.internal_server_key
                            .unchecked_add_assign(&mut new_c, &temp_b);
-                        self.internal_server_key.clear_carry_assign(&mut new_c);
+                        self.internal_server_key.message_extract_assign(&mut new_c);
                        new_c
                    },
                    || {
--- a/ci/benchmark_parser.py
+++ b/ci/benchmark_parser.py
@@ -108,12 +108,25 @@ def recursive_parse(directory, walk_subdirs=False, name_suffix="", compute_throu
                    )
                )

+                # This is a special case where PBS are blasted as vector LWE ciphertext with
+                # variable length to saturate the machine. To get the actual throughput we need to
+                # multiply by the length of the vector.
+                if "PBS_throughput" in test_name and "chunk" in test_name:
+                    try:
+                        multiplier = int(test_name.split("chunk")[0].split("_")[-1])
+                    except ValueError:
+                        parsing_failures.append((full_name,
+                                                 "failed to extract throughput multiplier"))
+                        continue
+                else:
+                    multiplier = 1
+
                if stat_name == "mean" and compute_throughput:
                    test_suffix = "ops-per-sec"
                    test_name_parts.append(test_suffix)
                    result_values.append(
                        _create_point(
-                            compute_ops_per_second(value),
+                            multiplier * compute_ops_per_second(value),
                            "_".join(test_name_parts),
                            bench_class,
                            "throughput",
@@ -129,7 +142,7 @@ def recursive_parse(directory, walk_subdirs=False, name_suffix="", compute_throu
                        test_name_parts.append(test_suffix)
                        result_values.append(
                            _create_point(
-                                compute_ops_per_dollar(value, hardware_hourly_cost),
+                                multiplier * compute_ops_per_dollar(value, hardware_hourly_cost),
                                "_".join(test_name_parts),
                                bench_class,
                                "throughput",
--- a/ci/lattice_estimator.sage
+++ b/ci/lattice_estimator.sage
@@ -0,0 +1,75 @@
+"""
+lattice_estimator
+-----------------
+
+Test cryptographic parameters set against several attacks to estimate their security level.
+"""
+import pathlib
+import sys
+sys.path.insert(1, 'lattice-estimator')
+from estimator import *
+
+
+model = RC.BDGL16
+
+def check_security(filename):
+    """
+    Run lattice estimator to determine if a parameters set is secure or not.
+
+    :param filename: name of the file containing parameters set
+
+    :return: :class:`list` of parameters to update
+    """
+    filepath = pathlib.Path("ci", filename)
+    load(filepath)
+    print(f"Parsing parameters in {filepath}")
+
+    to_update = []
+
+    for param in all_params:
+        if param.tag.startswith("TFHE_LIB_PARAMETERS"):
+            # This third-party parameters set is known to be less secure, just skip the analysis.
+            continue
+
+        print(f"\t{param.tag}...\t", end= "")
+
+        try:
+            # The lattice estimator is not able to manage such large dimension.
+            # If we have the security for smaller `n` then we have security for larger ones.
+            if param.n >= 32768:
+                param = param.updated(n = 16384)
+
+            usvp_level = LWE.primal_usvp(param, red_cost_model = model)
+            dual_level = LWE.dual_hybrid(param, red_cost_model = model)
+
+            estimator_level = log(min(usvp_level["rop"], dual_level["rop"]),2 )
+            if estimator_level < 127:
+                print("FAIL")
+                reason = f"attained security level = {estimator_level} bits target is 128 bits"
+                to_update.append((param, reason))
+                continue
+        except Exception as err:
+            print("FAIL")
+            to_update.append((param, f"{repr(err)}"))
+        else:
+            print("OK")
+
+    return to_update
+
+
+if __name__ == "__main__":
+    params_to_update = []
+
+    for params_filename in ("boolean_parameters_lattice_estimator.sage",
+                            "shortint_classic_parameters_lattice_estimator.sage",
+                            "shortint_multi_bit_parameters_lattice_estimator.sage"):
+        params_to_update.extend(check_security(params_filename))
+
+    if params_to_update:
+        print("Some parameters need update")
+        print("----------------------------")
+        for param, reason in params_to_update:
+            print(f"[{param.tag}] reason: {reason} (param)")
+        sys.exit(int(1))  # Explicit conversion is needed to make this call work
+    else:
+        print("All parameters passed the security check")
--- a/ci/slab.toml
+++ b/ci/slab.toml
@@ -1,16 +1,16 @@
 [profile.cpu-big]
 region = "eu-west-3"
-image_id = "ami-0ab73f5bd11708a85"
+image_id = "ami-051942e4055555752"
 instance_type = "m6i.32xlarge"

 [profile.cpu-small]
 region = "eu-west-3"
-image_id = "ami-0ab73f5bd11708a85"
+image_id = "ami-051942e4055555752"
 instance_type = "m6i.4xlarge"

 [profile.bench]
 region = "eu-west-3"
-image_id = "ami-0ab73f5bd11708a85"
+image_id = "ami-051942e4055555752"
 instance_type = "m6i.metal"

 [command.cpu_test]
@@ -77,3 +77,13 @@ check_run_name = "PBS CPU AWS Benchmarks"
 workflow = "wasm_client_benchmark.yml"
 profile = "cpu-small"
 check_run_name = "WASM Client AWS Benchmarks"
+
+[command.csprng_randomness_testing]
+workflow = "csprng_randomness_testing.yml"
+profile = "cpu-small"
+check_run_name = "CSPRNG randomness testing"
+
+[command.code_coverage]
+workflow = "code_coverage.yml"
+profile = "cpu-small"
+check_run_name = "Code coverage"
--- a/codecov.yml
+++ b/codecov.yml
@@ -0,0 +1,4 @@
+coverage:
+  status:
+    # Disable patch checks in GitHub until all tfhe-rs layers have coverage implemented.
+    patch: false
--- a/concrete-csprng/Cargo.toml
+++ b/concrete-csprng/Cargo.toml
@@ -0,0 +1,52 @@
+[package]
+name = "concrete-csprng"
+version = "0.4.0"
+edition = "2021"
+license = "BSD-3-Clause-Clear"
+description = "Cryptographically Secure PRNG used in the TFHE-rs library."
+homepage = "https://zama.ai/"
+documentation = "https://docs.zama.ai/tfhe-rs"
+repository = "https://github.com/zama-ai/tfhe-rs"
+readme = "README.md"
+keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
+
+[dependencies]
+aes = "0.8.2"
+rayon = { version = "1.5.0", optional = true }
+
+[target.'cfg(target_os = "macos")'.dependencies]
+libc = "0.2.133"
+
+[dev-dependencies]
+rand = "0.8.3"
+criterion = "0.3"
+clap = "=4.2.7"
+
+[features]
+parallel = ["rayon"]
+seeder_x86_64_rdseed = []
+seeder_unix = []
+generator_x86_64_aesni = []
+generator_fallback = []
+generator_aarch64_aes = []
+
+x86_64 = [
+    "parallel",
+    "seeder_x86_64_rdseed",
+    "generator_x86_64_aesni",
+    "generator_fallback",
+]
+x86_64-unix = ["x86_64", "seeder_unix"]
+aarch64 = ["parallel", "generator_aarch64_aes", "generator_fallback"]
+aarch64-unix = ["aarch64", "seeder_unix"]
+
+[[bench]]
+name = "benchmark"
+path = "benches/benchmark.rs"
+harness = false
+required-features = ["seeder_x86_64_rdseed", "generator_x86_64_aesni"]
+
+[[example]]
+name = "generate"
+path = "examples/generate.rs"
+required-features = ["seeder_unix", "generator_fallback"]
--- a/concrete-csprng/LICENSE
+++ b/concrete-csprng/LICENSE
@@ -0,0 +1,28 @@
+BSD 3-Clause Clear License
+
+Copyright © 2023 ZAMA.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this
+list of conditions and the following disclaimer in the documentation and/or other
+materials provided with the distribution.
+
+3. Neither the name of ZAMA nor the names of its contributors may be used to endorse
+or promote products derived from this software without specific prior written permission.
+
+NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE.
+THIS SOFTWARE IS PROVIDED BY THE ZAMA AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ZAMA OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/concrete-csprng/README.md
+++ b/concrete-csprng/README.md
@@ -0,0 +1,23 @@
+# Concrete CSPRNG
+
+This crate contains a fast *Cryptographically Secure Pseudoramdon Number Generator*, used in the
+['concrete-core'](https://crates.io/crates/concrete-core) library, you can find it [here](../concrete-core/) in this repo.
+
+The implementation is based on the AES blockcipher used in CTR mode, as described in the ISO/IEC
+18033-4 standard.
+
+Two implementations are available, an accelerated one on x86_64 CPUs with the `aes` feature and the `sse2` feature, and a pure software one that can be used on other platforms.
+
+The crate also makes two seeders available, one needing the x86_64 feature `rdseed` and another one based on the Unix random device `/dev/random` the latter requires the user to provide a secret.
+
+## Running the benchmarks
+
+To execute the benchmarks on an x86_64 platform:
+```shell
+RUSTFLAGS="-Ctarget-cpu=native" cargo bench --features=seeder_x86_64_rdseed,generator_x86_64_aesni 
+```
+
+## License
+
+This software is distributed under the BSD-3-Clause-Clear license. If you have any questions,
+please contact us at `hello@zama.ai`.
--- a/concrete-csprng/benches/benchmark.rs
+++ b/concrete-csprng/benches/benchmark.rs
@@ -0,0 +1,54 @@
+use concrete_csprng::generators::{
+    AesniRandomGenerator, BytesPerChild, ChildrenCount, RandomGenerator,
+};
+use concrete_csprng::seeders::{RdseedSeeder, Seeder};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+// The number of bytes to generate during one benchmark iteration.
+const N_GEN: usize = 1_000_000;
+
+fn parent_generate(c: &mut Criterion) {
+    let mut seeder = RdseedSeeder;
+    let mut generator = AesniRandomGenerator::new(seeder.seed());
+    c.bench_function("parent_generate", |b| {
+        b.iter(|| {
+            (0..N_GEN).for_each(|_| {
+                generator.next();
+            })
+        })
+    });
+}
+
+fn child_generate(c: &mut Criterion) {
+    let mut seeder = RdseedSeeder;
+    let mut generator = AesniRandomGenerator::new(seeder.seed());
+    let mut generator = generator
+        .try_fork(ChildrenCount(1), BytesPerChild(N_GEN * 10_000))
+        .unwrap()
+        .next()
+        .unwrap();
+    c.bench_function("child_generate", |b| {
+        b.iter(|| {
+            (0..N_GEN).for_each(|_| {
+                generator.next();
+            })
+        })
+    });
+}
+
+fn fork(c: &mut Criterion) {
+    let mut seeder = RdseedSeeder;
+    let mut generator = AesniRandomGenerator::new(seeder.seed());
+    c.bench_function("fork", |b| {
+        b.iter(|| {
+            black_box(
+                generator
+                    .try_fork(ChildrenCount(2048), BytesPerChild(2048))
+                    .unwrap(),
+            )
+        })
+    });
+}
+
+criterion_group!(benches, parent_generate, child_generate, fork);
+criterion_main!(benches);
--- a/concrete-csprng/build.rs
+++ b/concrete-csprng/build.rs
@@ -0,0 +1,112 @@
+// To have clear error messages during compilation about why some piece of code may not be available
+// we decided to check the features compatibility with the target configuration in this script.
+
+use std::collections::HashMap;
+use std::env;
+
+// See https://doc.rust-lang.org/reference/conditional-compilation.html#target_arch for various
+// compilation configuration
+
+// Can be easily extended if needed
+pub struct FeatureRequirement {
+    pub feature_name: &'static str,
+    // target_arch requirement
+    pub feature_req_target_arch: Option<&'static str>,
+    // target_family requirement
+    pub feature_req_target_family: Option<&'static str>,
+}
+
+// We implement a version of default that is const which is not possible through the Default trait
+impl FeatureRequirement {
+    // As we cannot use cfg!(feature = "feature_name") with something else than a literal, we need
+    // a reference to the HashMap we populate with the enabled features
+    fn is_activated(&self, build_activated_features: &HashMap<&'static str, bool>) -> bool {
+        *build_activated_features.get(self.feature_name).unwrap()
+    }
+
+    // panics if the requirements are not met
+    fn check_requirements(&self) {
+        let target_arch = get_target_arch_cfg();
+        if let Some(feature_req_target_arch) = self.feature_req_target_arch {
+            if feature_req_target_arch != target_arch {
+                panic!(
+                    "Feature `{}` requires target_arch `{}`, current cfg: `{}`",
+                    self.feature_name, feature_req_target_arch, target_arch
+                )
+            }
+        }
+
+        let target_family = get_target_family_cfg();
+        if let Some(feature_req_target_family) = self.feature_req_target_family {
+            if feature_req_target_family != target_family {
+                panic!(
+                    "Feature `{}` requires target_family `{}`, current cfg: `{}`",
+                    self.feature_name, feature_req_target_family, target_family
+                )
+            }
+        }
+    }
+}
+
+// const vecs are not yet a thing so use a fixed size array (update the array size when adding
+// requirements)
+static FEATURE_REQUIREMENTS: [FeatureRequirement; 4] = [
+    FeatureRequirement {
+        feature_name: "seeder_x86_64_rdseed",
+        feature_req_target_arch: Some("x86_64"),
+        feature_req_target_family: None,
+    },
+    FeatureRequirement {
+        feature_name: "generator_x86_64_aesni",
+        feature_req_target_arch: Some("x86_64"),
+        feature_req_target_family: None,
+    },
+    FeatureRequirement {
+        feature_name: "seeder_unix",
+        feature_req_target_arch: None,
+        feature_req_target_family: Some("unix"),
+    },
+    FeatureRequirement {
+        feature_name: "generator_aarch64_aes",
+        feature_req_target_arch: Some("aarch64"),
+        feature_req_target_family: None,
+    },
+];
+
+// For a "feature_name" feature_cfg!("feature_name") expands to
+// ("feature_name", cfg!(feature = "feature_name"))
+macro_rules! feature_cfg {
+    ($feat_name:literal) => {
+        ($feat_name, cfg!(feature = $feat_name))
+    };
+}
+
+// Static HashMap would require an additional crate (phf or lazy static e.g.), so we just write a
+// function that returns the HashMap we are interested in
+fn get_feature_enabled_status() -> HashMap<&'static str, bool> {
+    HashMap::from([
+        feature_cfg!("seeder_x86_64_rdseed"),
+        feature_cfg!("generator_x86_64_aesni"),
+        feature_cfg!("seeder_unix"),
+        feature_cfg!("generator_aarch64_aes"),
+    ])
+}
+
+// See https://stackoverflow.com/a/43435335/18088947 for the inspiration of this code
+fn get_target_arch_cfg() -> String {
+    env::var("CARGO_CFG_TARGET_ARCH").expect("CARGO_CFG_TARGET_ARCH is not set")
+}
+
+fn get_target_family_cfg() -> String {
+    env::var("CARGO_CFG_TARGET_FAMILY").expect("CARGO_CFG_TARGET_FAMILY is not set")
+}
+
+fn main() {
+    let feature_enabled_status = get_feature_enabled_status();
+
+    // This will panic if some requirements for a feature are not met
+    FEATURE_REQUIREMENTS
+        .iter()
+        .filter(|&req| FeatureRequirement::is_activated(req, &feature_enabled_status))
+        .for_each(FeatureRequirement::check_requirements);
+}
--- a/concrete-csprng/examples/generate.rs
+++ b/concrete-csprng/examples/generate.rs
@@ -0,0 +1,113 @@
+//! This program uses the concrete csprng to generate an infinite stream of random bytes on
+//! the program stdout. It can also generate a fixed number of bytes by passing a value along the
+//! optional argument `--bytes_total`. For testing purpose.
+use clap::{value_parser, Arg, Command};
+#[cfg(feature = "generator_x86_64_aesni")]
+use concrete_csprng::generators::AesniRandomGenerator as ActivatedRandomGenerator;
+#[cfg(feature = "generator_aarch64_aes")]
+use concrete_csprng::generators::NeonAesRandomGenerator as ActivatedRandomGenerator;
+#[cfg(all(
+    not(feature = "generator_x86_64_aesni"),
+    not(feature = "generator_aarch64_aes"),
+    feature = "generator_fallback"
+))]
+use concrete_csprng::generators::SoftwareRandomGenerator as ActivatedRandomGenerator;
+
+use concrete_csprng::generators::RandomGenerator;
+
+#[cfg(target_os = "macos")]
+use concrete_csprng::seeders::AppleSecureEnclaveSeeder as ActivatedSeeder;
+#[cfg(all(not(target_os = "macos"), feature = "seeder_x86_64_rdseed"))]
+use concrete_csprng::seeders::RdseedSeeder as ActivatedSeeder;
+#[cfg(all(
+    not(target_os = "macos"),
+    not(feature = "seeder_x86_64_rdseed"),
+    feature = "seeder_unix"
+))]
+use concrete_csprng::seeders::UnixSeeder as ActivatedSeeder;
+
+use concrete_csprng::seeders::Seeder;
+
+use std::io::prelude::*;
+use std::io::{stdout, StdoutLock};
+
+fn write_bytes(
+    buffer: &mut [u8],
+    generator: &mut ActivatedRandomGenerator,
+    stdout: &mut StdoutLock<'_>,
+) -> std::io::Result<()> {
+    buffer.iter_mut().zip(generator).for_each(|(b, g)| *b = g);
+    stdout.write_all(buffer)
+}
+
+fn infinite_bytes_generation(
+    buffer: &mut [u8],
+    generator: &mut ActivatedRandomGenerator,
+    stdout: &mut StdoutLock<'_>,
+) {
+    while write_bytes(buffer, generator, stdout).is_ok() {}
+}
+
+fn bytes_generation(
+    bytes_total: usize,
+    buffer: &mut [u8],
+    generator: &mut ActivatedRandomGenerator,
+    stdout: &mut StdoutLock<'_>,
+) {
+    let quotient = bytes_total / buffer.len();
+    let remaining = bytes_total % buffer.len();
+
+    for _ in 0..quotient {
+        write_bytes(buffer, generator, stdout).unwrap();
+    }
+
+    write_bytes(&mut buffer[0..remaining], generator, stdout).unwrap()
+}
+
+pub fn main() {
+    let matches = Command::new(
+        "Generate a stream of random numbers, specify no flags for infinite generation",
+    )
+    .arg(
+        Arg::new("bytes_total")
+            .short('b')
+            .long("bytes_total")
+            .value_parser(value_parser!(usize))
+            .help("Total number of bytes that has to be generated"),
+    )
+    .get_matches();
+
+    // Ugly hack to be able to use UnixSeeder
+    #[cfg(all(
+        not(target_os = "macos"),
+        not(feature = "seeder_x86_64_rdseed"),
+        feature = "seeder_unix"
+    ))]
+    let new_seeder = || ActivatedSeeder::new(0);
+    #[cfg(not(all(
+        not(target_os = "macos"),
+        not(feature = "seeder_x86_64_rdseed"),
+        feature = "seeder_unix"
+    )))]
+    let new_seeder = || ActivatedSeeder;
+
+    let mut seeder = new_seeder();
+    let seed = seeder.seed();
+    // Don't print on std out
+    eprintln!("seed={seed:?}");
+    let mut generator = ActivatedRandomGenerator::new(seed);
+    let stdout = stdout();
+    let mut buffer = [0u8; 16];
+
+    // lock stdout as there is a single thread running
+    let mut stdout = stdout.lock();
+
+    match matches.get_one::<usize>("bytes_total") {
+        Some(&total) => {
+            bytes_generation(total, &mut buffer, &mut generator, &mut stdout);
+        }
+        None => {
+            infinite_bytes_generation(&mut buffer, &mut generator, &mut stdout);
+        }
+    };
+}
--- a/concrete-csprng/src/generators/aes_ctr/block_cipher.rs
+++ b/concrete-csprng/src/generators/aes_ctr/block_cipher.rs
@@ -0,0 +1,20 @@
+use crate::generators::aes_ctr::index::AesIndex;
+use crate::generators::aes_ctr::BYTES_PER_BATCH;
+
+/// Represents a key used in the AES block cipher.
+#[derive(Clone, Copy)]
+pub struct AesKey(pub u128);
+
+/// A trait for AES block ciphers.
+///
+/// Note:
+/// -----
+///
+/// The block cipher is used in a batched manner (to reduce amortized cost on special hardware).
+/// For this reason we only expose a `generate_batch` method.
+pub trait AesBlockCipher: Clone + Send + Sync {
+    /// Instantiate a new generator from a secret key.
+    fn new(key: AesKey) -> Self;
+    /// Generates the batch corresponding to the given index.
+    fn generate_batch(&mut self, index: AesIndex) -> [u8; BYTES_PER_BATCH];
+}
--- a/concrete-csprng/src/generators/aes_ctr/generic.rs
+++ b/concrete-csprng/src/generators/aes_ctr/generic.rs
@@ -0,0 +1,379 @@
+use crate::generators::aes_ctr::block_cipher::{AesBlockCipher, AesKey};
+use crate::generators::aes_ctr::index::TableIndex;
+use crate::generators::aes_ctr::states::{BufferPointer, ShiftAction, State};
+use crate::generators::aes_ctr::BYTES_PER_BATCH;
+use crate::generators::{ByteCount, BytesPerChild, ChildrenCount, ForkError};
+
+// Usually, to work with iterators and parallel iterators, we would use opaque types such as
+// `impl Iterator<..>`. Unfortunately, it is not yet possible to return existential types in
+// traits, which we would need for `RandomGenerator`. For this reason, we have to use the
+// full type name where needed. Hence the following trait aliases definition:
+
+/// A type alias for the children iterator closure type.
+pub type ChildrenClosure<BlockCipher> =
+    fn((usize, (Box<BlockCipher>, TableIndex, BytesPerChild))) -> AesCtrGenerator<BlockCipher>;
+
+/// A type alias for the children iterator type.
+pub type ChildrenIterator<BlockCipher> = std::iter::Map<
+    std::iter::Zip<
+        std::ops::Range<usize>,
+        std::iter::Repeat<(Box<BlockCipher>, TableIndex, BytesPerChild)>,
+    >,
+    ChildrenClosure<BlockCipher>,
+>;
+
+/// A type implementing the `RandomGenerator` api using the AES block cipher in counter mode.
+#[derive(Clone)]
+pub struct AesCtrGenerator<BlockCipher: AesBlockCipher> {
+    // The block cipher used in the background
+    pub(crate) block_cipher: Box<BlockCipher>,
+    // The state corresponding to the latest outputted byte.
+    pub(crate) state: State,
+    // The last legal index. This makes bound check faster.
+    pub(crate) last: TableIndex,
+    // The buffer containing the current batch of aes calls.
+    pub(crate) buffer: [u8; BYTES_PER_BATCH],
+}
+
+#[allow(unused)] // to please clippy when tests are not activated
+impl<BlockCipher: AesBlockCipher> AesCtrGenerator<BlockCipher> {
+    /// Generates a new csprng.
+    ///
+    /// Note :
+    /// ------
+    ///
+    /// The `start_index` given as input, points to the first byte that will be outputted by the
+    /// generator. If not given, this one is automatically set to the second table index. The
+    /// first table index is not used to prevent an edge case from happening: since `state` is
+    /// supposed to contain the index of the previous byte, the initial value must be decremented.
+    /// Using the second value prevents wrapping to the max index, which would make the bound
+    /// checking fail.
+    ///
+    /// The `bound_index` given as input, points to the first byte that can __not__ be legally
+    /// outputted by the generator. If not given, the bound is automatically set to the last
+    /// table index.
+    pub fn new(
+        key: AesKey,
+        start_index: Option<TableIndex>,
+        bound_index: Option<TableIndex>,
+    ) -> AesCtrGenerator<BlockCipher> {
+        AesCtrGenerator::from_block_cipher(
+            Box::new(BlockCipher::new(key)),
+            start_index.unwrap_or(TableIndex::SECOND),
+            bound_index.unwrap_or(TableIndex::LAST),
+        )
+    }
+
+    /// Generates a csprng from an existing block cipher.
+    pub fn from_block_cipher(
+        block_cipher: Box<BlockCipher>,
+        start_index: TableIndex,
+        bound_index: TableIndex,
+    ) -> AesCtrGenerator<BlockCipher> {
+        assert!(start_index < bound_index);
+        let last = bound_index.decremented();
+        let buffer = [0u8; BYTES_PER_BATCH];
+        let state = State::new(start_index);
+        AesCtrGenerator {
+            block_cipher,
+            state,
+            last,
+            buffer,
+        }
+    }
+
+    /// Returns the table index related to the previous random byte.
+    pub fn table_index(&self) -> TableIndex {
+        self.state.table_index()
+    }
+
+    /// Returns the bound of the generator if any.
+    ///
+    /// The bound is the table index of the first byte that can not be outputted by the generator.
+    pub fn get_bound(&self) -> TableIndex {
+        self.last.incremented()
+    }
+
+    /// Returns whether the generator is bounded or not.
+    pub fn is_bounded(&self) -> bool {
+        self.get_bound() != TableIndex::LAST
+    }
+
+    /// Computes the number of bytes that can still be outputted by the generator.
+    ///
+    /// Note :
+    /// ------
+    ///
+    /// Note that `ByteCount` uses the `u128` datatype to store the byte count. Unfortunately, the
+    /// number of remaining bytes is in ⟦0;2¹³² -1⟧. When the number is greater than 2¹²⁸ - 1,
+    /// we saturate the count at 2¹²⁸ - 1.
+    pub fn remaining_bytes(&self) -> ByteCount {
+        TableIndex::distance(&self.last, &self.state.table_index()).unwrap()
+    }
+
+    /// Outputs the next random byte.
+    pub fn generate_next(&mut self) -> u8 {
+        self.next()
+            .expect("Tried to generate a byte after the bound.")
+    }
+
+    /// Tries to fork the current generator into `n_child` generators each able to output
+    /// `child_bytes` random bytes.
+    pub fn try_fork(
+        &mut self,
+        n_children: ChildrenCount,
+        n_bytes: BytesPerChild,
+    ) -> Result<ChildrenIterator<BlockCipher>, ForkError> {
+        if n_children.0 == 0 {
+            return Err(ForkError::ZeroChildrenCount);
+        }
+        if n_bytes.0 == 0 {
+            return Err(ForkError::ZeroBytesPerChild);
+        }
+        if !self.is_fork_in_bound(n_children, n_bytes) {
+            return Err(ForkError::ForkTooLarge);
+        }
+
+        // The state currently stored in the parent generator points to the table index of the last
+        // generated byte. The first index to be generated is the next one:
+        let first_index = self.state.table_index().incremented();
+        let output = (0..n_children.0)
+            .zip(std::iter::repeat((
+                self.block_cipher.clone(),
+                first_index,
+                n_bytes,
+            )))
+            .map(
+                // This map is a little weird because we need to cast the closure to a fn pointer
+                // that matches the signature of `ChildrenIterator<BlockCipher>`.
+                // Unfortunately, the compiler does not manage to coerce this one
+                // automatically.
+                (|(i, (block_cipher, first_index, n_bytes))| {
+                    // The first index to be outputted by the child is the `first_index` shifted by
+                    // the proper amount of `child_bytes`.
+                    let child_first_index = first_index.increased(n_bytes.0 * i);
+                    // The bound of the child is the first index of its next sibling.
+                    let child_bound_index = first_index.increased(n_bytes.0 * (i + 1));
+                    AesCtrGenerator::from_block_cipher(
+                        block_cipher,
+                        child_first_index,
+                        child_bound_index,
+                    )
+                }) as ChildrenClosure<BlockCipher>,
+            );
+        // The parent next index is the bound of the last child.
+        let next_index = first_index.increased(n_bytes.0 * n_children.0);
+        self.state = State::new(next_index);
+
+        Ok(output)
+    }
+
+    pub(crate) fn is_fork_in_bound(
+        &self,
+        n_child: ChildrenCount,
+        child_bytes: BytesPerChild,
+    ) -> bool {
+        let mut end = self.state.table_index();
+        end.increase(n_child.0 * child_bytes.0);
+        end <= self.last
+    }
+}
+
+impl<BlockCipher: AesBlockCipher> Iterator for AesCtrGenerator<BlockCipher> {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.state.table_index() >= self.last {
+            None
+        } else {
+            match self.state.increment() {
+                ShiftAction::OutputByte(BufferPointer(ptr)) => Some(self.buffer[ptr]),
+                ShiftAction::RefreshBatchAndOutputByte(aes_index, BufferPointer(ptr)) => {
+                    self.buffer = self.block_cipher.generate_batch(aes_index);
+                    Some(self.buffer[ptr])
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+pub mod aes_ctr_generic_test {
+    #![allow(unused)] // to please clippy when tests are not activated
+
+    use super::*;
+    use crate::generators::aes_ctr::index::{AesIndex, ByteIndex};
+    use crate::generators::aes_ctr::BYTES_PER_AES_CALL;
+    use rand::{thread_rng, Rng};
+
+    const REPEATS: usize = 1_000_000;
+
+    pub fn any_table_index() -> impl Iterator<Item = TableIndex> {
+        std::iter::repeat_with(|| {
+            TableIndex::new(
+                AesIndex(thread_rng().gen()),
+                ByteIndex(thread_rng().gen::<usize>() % BYTES_PER_AES_CALL),
+            )
+        })
+    }
+
+    pub fn any_usize() -> impl Iterator<Item = usize> {
+        std::iter::repeat_with(|| thread_rng().gen())
+    }
+
+    pub fn any_children_count() -> impl Iterator<Item = ChildrenCount> {
+        std::iter::repeat_with(|| ChildrenCount(thread_rng().gen::<usize>() % 2048 + 1))
+    }
+
+    pub fn any_bytes_per_child() -> impl Iterator<Item = BytesPerChild> {
+        std::iter::repeat_with(|| BytesPerChild(thread_rng().gen::<usize>() % 2048 + 1))
+    }
+
+    pub fn any_key() -> impl Iterator<Item = AesKey> {
+        std::iter::repeat_with(|| AesKey(thread_rng().gen()))
+    }
+
+    /// Output a valid fork:
+    ///     a table index t,
+    ///     a number of children nc,
+    ///     a number of bytes per children nb
+    ///     and a positive integer i such that:
+    ///         increase(t, nc*nb+i) < MAX with MAX the largest table index.
+    ///
+    /// Put differently, if we initialize a parent generator at t and fork it with (nc, nb), our
+    /// parent generator current index gets shifted to an index, distant of at least i bytes of
+    /// the max index.
+    pub fn any_valid_fork(
+    ) -> impl Iterator<Item = (TableIndex, ChildrenCount, BytesPerChild, usize)> {
+        any_table_index()
+            .zip(any_children_count())
+            .zip(any_bytes_per_child())
+            .zip(any_usize())
+            .map(|(((t, nc), nb), i)| (t, nc, nb, i))
+            .filter(|(t, nc, nb, i)| {
+                TableIndex::distance(&TableIndex::LAST, t).unwrap().0 > (nc.0 * nb.0 + i) as u128
+            })
+    }
+
+    /// Check the property:
+    ///     On a valid fork, the table index of the first child is the same as the table index of
+    ///     the parent before the fork.
+    pub fn prop_fork_first_state_table_index<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let original_generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let mut forked_generator = original_generator.clone();
+            let first_child = forked_generator.try_fork(nc, nb).unwrap().next().unwrap();
+            assert_eq!(original_generator.table_index(), first_child.table_index());
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, the table index of the first byte outputted by the parent after the
+    ///     fork, is the bound of the last child of the fork.
+    pub fn prop_fork_last_bound_table_index<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let mut parent_generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let last_child = parent_generator.try_fork(nc, nb).unwrap().last().unwrap();
+            assert_eq!(
+                parent_generator.table_index().incremented(),
+                last_child.get_bound()
+            );
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, the bound of the parent does not change.
+    pub fn prop_fork_parent_bound_table_index<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let original_generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let mut forked_generator = original_generator.clone();
+            forked_generator.try_fork(nc, nb).unwrap().last().unwrap();
+            assert_eq!(original_generator.get_bound(), forked_generator.get_bound());
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, the parent table index is increased of the number of children
+    ///     multiplied by the number of bytes per child.
+    pub fn prop_fork_parent_state_table_index<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let original_generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let mut forked_generator = original_generator.clone();
+            forked_generator.try_fork(nc, nb).unwrap().last().unwrap();
+            assert_eq!(
+                forked_generator.table_index(),
+                // Decrement accounts for the fact that the table index stored is the previous one
+                t.increased(nc.0 * nb.0).decremented()
+            );
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, the bytes outputted by the children in the fork order form the same
+    ///     sequence the parent would have had yielded no fork had happened.
+    pub fn prop_fork<G: AesBlockCipher>() {
+        for _ in 0..1000 {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let bytes_to_go = nc.0 * nb.0;
+            let original_generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let mut forked_generator = original_generator.clone();
+            let initial_output: Vec<u8> = original_generator.take(bytes_to_go).collect();
+            let forked_output: Vec<u8> = forked_generator
+                .try_fork(nc, nb)
+                .unwrap()
+                .flat_map(|child| child.collect::<Vec<_>>())
+                .collect();
+            assert_eq!(initial_output, forked_output);
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, all children got a number of remaining bytes equals to the number of
+    ///     bytes per child given as fork input.
+    pub fn prop_fork_children_remaining_bytes<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let mut generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            assert!(generator
+                .try_fork(nc, nb)
+                .unwrap()
+                .all(|c| c.remaining_bytes().0 == nb.0 as u128));
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, the number of remaining bybtes of the parent is reduced by the number
+    ///     of children multiplied by the number of bytes per child.
+    pub fn prop_fork_parent_remaining_bytes<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let bytes_to_go = nc.0 * nb.0;
+            let mut generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let before_remaining_bytes = generator.remaining_bytes();
+            let _ = generator.try_fork(nc, nb).unwrap();
+            let after_remaining_bytes = generator.remaining_bytes();
+            assert_eq!(
+                before_remaining_bytes.0 - after_remaining_bytes.0,
+                bytes_to_go as u128
+            );
+        }
+    }
+}
--- a/concrete-csprng/src/generators/aes_ctr/index.rs
+++ b/concrete-csprng/src/generators/aes_ctr/index.rs
@@ -0,0 +1,389 @@
+use crate::generators::aes_ctr::BYTES_PER_AES_CALL;
+use crate::generators::ByteCount;
+use std::cmp::Ordering;
+
+/// A structure representing an [aes index](#coarse-grained-pseudo-random-table-lookup).
+#[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq)]
+pub struct AesIndex(pub u128);
+
+/// A structure representing a [byte index](#fine-grained-pseudo-random-table-lookup).
+#[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq)]
+pub struct ByteIndex(pub usize);
+
+/// A structure representing a [table index](#fine-grained-pseudo-random-table-lookup)
+#[derive(Clone, Copy, Debug)]
+pub struct TableIndex {
+    pub(crate) aes_index: AesIndex,
+    pub(crate) byte_index: ByteIndex,
+}
+
+impl TableIndex {
+    /// The first table index.
+    pub const FIRST: TableIndex = TableIndex {
+        aes_index: AesIndex(0),
+        byte_index: ByteIndex(0),
+    };
+
+    /// The second table index.
+    pub const SECOND: TableIndex = TableIndex {
+        aes_index: AesIndex(0),
+        byte_index: ByteIndex(1),
+    };
+
+    /// The last table index.
+    pub const LAST: TableIndex = TableIndex {
+        aes_index: AesIndex(u128::MAX),
+        byte_index: ByteIndex(BYTES_PER_AES_CALL - 1),
+    };
+
+    /// Creates a table index from an aes index and a byte index.
+    #[allow(unused)] // to please clippy when tests are not activated
+    pub fn new(aes_index: AesIndex, byte_index: ByteIndex) -> Self {
+        assert!(byte_index.0 < BYTES_PER_AES_CALL);
+        TableIndex {
+            aes_index,
+            byte_index,
+        }
+    }
+
+    /// Shifts the table index forward of `shift` bytes.
+    pub fn increase(&mut self, shift: usize) {
+        // Compute full shifts to avoid overflows
+        let full_aes_shifts = shift / BYTES_PER_AES_CALL;
+        let shift_remainder = shift % BYTES_PER_AES_CALL;
+
+        // Get the additional shift if any
+        let new_byte_index = self.byte_index.0 + shift_remainder;
+        let full_aes_shifts = full_aes_shifts + new_byte_index / BYTES_PER_AES_CALL;
+
+        // Store the reaminder in the byte index
+        self.byte_index.0 = new_byte_index % BYTES_PER_AES_CALL;
+
+        self.aes_index.0 = self.aes_index.0.wrapping_add(full_aes_shifts as u128);
+    }
+
+    /// Shifts the table index backward of `shift` bytes.
+    pub fn decrease(&mut self, shift: usize) {
+        let remainder = shift % BYTES_PER_AES_CALL;
+        if remainder <= self.byte_index.0 {
+            self.aes_index.0 = self
+                .aes_index
+                .0
+                .wrapping_sub((shift / BYTES_PER_AES_CALL) as u128);
+            self.byte_index.0 -= remainder;
+        } else {
+            self.aes_index.0 = self
+                .aes_index
+                .0
+                .wrapping_sub((shift / BYTES_PER_AES_CALL) as u128 + 1);
+            self.byte_index.0 += BYTES_PER_AES_CALL - remainder;
+        }
+    }
+
+    /// Shifts the table index forward of one byte.
+    pub fn increment(&mut self) {
+        self.increase(1)
+    }
+
+    /// Shifts the table index backward of one byte.
+    pub fn decrement(&mut self) {
+        self.decrease(1)
+    }
+
+    /// Returns the table index shifted forward by `shift` bytes.
+    pub fn increased(mut self, shift: usize) -> Self {
+        self.increase(shift);
+        self
+    }
+
+    /// Returns the table index shifted backward by `shift` bytes.
+    #[allow(unused)] // to please clippy when tests are not activated
+    pub fn decreased(mut self, shift: usize) -> Self {
+        self.decrease(shift);
+        self
+    }
+
+    /// Returns the table index to the next byte.
+    pub fn incremented(mut self) -> Self {
+        self.increment();
+        self
+    }
+
+    /// Returns the table index to the previous byte.
+    pub fn decremented(mut self) -> Self {
+        self.decrement();
+        self
+    }
+
+    /// Returns the distance between two table indices in bytes.
+    ///
+    /// Note:
+    /// -----
+    ///
+    /// This method assumes that the `larger` input is, well, larger than the `smaller` input. If
+    /// this is not the case, the method returns `None`. Also, note that `ByteCount` uses the
+    /// `u128` datatype to store the byte count. Unfortunately, the number of bytes between two
+    /// table indices is in ⟦0;2¹³² -1⟧. When the distance is greater than 2¹²⁸ - 1, we saturate
+    /// the count at 2¹²⁸ - 1.
+    pub fn distance(larger: &Self, smaller: &Self) -> Option<ByteCount> {
+        match std::cmp::Ord::cmp(larger, smaller) {
+            Ordering::Less => None,
+            Ordering::Equal => Some(ByteCount(0)),
+            Ordering::Greater => {
+                let mut result = larger.aes_index.0 - smaller.aes_index.0;
+                result = result.saturating_mul(BYTES_PER_AES_CALL as u128);
+                result = result.saturating_add(larger.byte_index.0 as u128);
+                result = result.saturating_sub(smaller.byte_index.0 as u128);
+                Some(ByteCount(result))
+            }
+        }
+    }
+}
+
+impl Eq for TableIndex {}
+
+impl PartialEq<Self> for TableIndex {
+    fn eq(&self, other: &Self) -> bool {
+        matches!(self.partial_cmp(other), Some(Ordering::Equal))
+    }
+}
+
+impl PartialOrd<Self> for TableIndex {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for TableIndex {
+    fn cmp(&self, other: &Self) -> Ordering {
+        match self.aes_index.cmp(&other.aes_index) {
+            Ordering::Equal => self.byte_index.cmp(&other.byte_index),
+            other => other,
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use rand::{thread_rng, Rng};
+
+    const REPEATS: usize = 1_000_000;
+
+    fn any_table_index() -> impl Iterator<Item = TableIndex> {
+        std::iter::repeat_with(|| {
+            TableIndex::new(
+                AesIndex(thread_rng().gen()),
+                ByteIndex(thread_rng().gen::<usize>() % BYTES_PER_AES_CALL),
+            )
+        })
+    }
+
+    fn any_usize() -> impl Iterator<Item = usize> {
+        std::iter::repeat_with(|| thread_rng().gen())
+    }
+
+    #[test]
+    #[should_panic]
+    /// Verifies that the constructor of `TableIndex` panics when the byte index is too large.
+    fn test_table_index_new_panic() {
+        TableIndex::new(AesIndex(12), ByteIndex(144));
+    }
+
+    #[test]
+    /// Verifies that the `TableIndex` wraps nicely with predecessor
+    fn test_table_index_predecessor_edge() {
+        assert_eq!(TableIndex::FIRST.decremented(), TableIndex::LAST);
+    }
+
+    #[test]
+    /// Verifies that the `TableIndex` wraps nicely with successor
+    fn test_table_index_successor_edge() {
+        assert_eq!(TableIndex::LAST.incremented(), TableIndex::FIRST);
+    }
+
+    #[test]
+    /// Check that the table index distance saturates nicely.
+    fn prop_table_index_distance_saturates() {
+        assert_eq!(
+            TableIndex::distance(&TableIndex::LAST, &TableIndex::FIRST)
+                .unwrap()
+                .0,
+            u128::MAX
+        )
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t,
+    ///         distance(t, t) = Some(0).
+    fn prop_table_index_distance_zero() {
+        for _ in 0..REPEATS {
+            let t = any_table_index().next().unwrap();
+            assert_eq!(TableIndex::distance(&t, &t), Some(ByteCount(0)));
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t1, t2 such that t1 < t2,
+    ///         distance(t1, t2) = None.
+    fn prop_table_index_distance_wrong_order_none() {
+        for _ in 0..REPEATS {
+            let (t1, t2) = any_table_index()
+                .zip(any_table_index())
+                .find(|(t1, t2)| t1 < t2)
+                .unwrap();
+            assert_eq!(TableIndex::distance(&t1, &t2), None);
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t1, t2 such that t1 > t2,
+    ///         distance(t1, t2) = Some(v) where v is strictly positive.
+    fn prop_table_index_distance_some_positive() {
+        for _ in 0..REPEATS {
+            let (t1, t2) = any_table_index()
+                .zip(any_table_index())
+                .find(|(t1, t2)| t1 > t2)
+                .unwrap();
+            assert!(matches!(TableIndex::distance(&t1, &t2), Some(ByteCount(v)) if v > 0));
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t, positive i such that i < distance (MAX, t) with MAX the largest
+    ///     table index,
+    ///         distance(t.increased(i), t) = Some(i).
+    fn prop_table_index_distance_increase() {
+        for _ in 0..REPEATS {
+            let (t, inc) = any_table_index()
+                .zip(any_usize())
+                .find(|(t, inc)| {
+                    (*inc as u128) < TableIndex::distance(&TableIndex::LAST, t).unwrap().0
+                })
+                .unwrap();
+            assert_eq!(
+                TableIndex::distance(&t.increased(inc), &t).unwrap().0 as usize,
+                inc
+            );
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t, t =? t = true.
+    fn prop_table_index_equality() {
+        for _ in 0..REPEATS {
+            let t = any_table_index().next().unwrap();
+            assert_eq!(
+                std::cmp::PartialOrd::partial_cmp(&t, &t),
+                Some(std::cmp::Ordering::Equal)
+            );
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t, positive i such that i < distance (MAX, t) with MAX the largest
+    ///     table index,
+    ///         t.increased(i) >? t = true.
+    fn prop_table_index_greater() {
+        for _ in 0..REPEATS {
+            let (t, inc) = any_table_index()
+                .zip(any_usize())
+                .find(|(t, inc)| {
+                    (*inc as u128) < TableIndex::distance(&TableIndex::LAST, t).unwrap().0
+                })
+                .unwrap();
+            assert_eq!(
+                std::cmp::PartialOrd::partial_cmp(&t.increased(inc), &t),
+                Some(std::cmp::Ordering::Greater),
+            );
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t, positive i such that i < distance (t, 0) with MAX the largest
+    ///     table index,
+    ///         t.decreased(i) <? t = true.
+    fn prop_table_index_less() {
+        for _ in 0..REPEATS {
+            let (t, inc) = any_table_index()
+                .zip(any_usize())
+                .find(|(t, inc)| {
+                    (*inc as u128) < TableIndex::distance(t, &TableIndex::FIRST).unwrap().0
+                })
+                .unwrap();
+            assert_eq!(
+                std::cmp::PartialOrd::partial_cmp(&t.decreased(inc), &t),
+                Some(std::cmp::Ordering::Less)
+            );
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t,
+    ///         successor(predecessor(t)) = t.
+    fn prop_table_index_decrement_increment() {
+        for _ in 0..REPEATS {
+            let t = any_table_index().next().unwrap();
+            assert_eq!(t.decremented().incremented(), t);
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t,
+    ///         predecessor(successor(t)) = t.
+    fn prop_table_index_increment_decrement() {
+        for _ in 0..REPEATS {
+            let t = any_table_index().next().unwrap();
+            assert_eq!(t.incremented().decremented(), t);
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t, positive integer i,
+    ///         increase(decrease(t, i), i) = t.
+    fn prop_table_index_increase_decrease() {
+        for _ in 0..REPEATS {
+            let (t, i) = any_table_index().zip(any_usize()).next().unwrap();
+            assert_eq!(t.increased(i).decreased(i), t);
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t, positive integer i,
+    ///         decrease(increase(t, i), i) = t.
+    fn prop_table_index_decrease_increase() {
+        for _ in 0..REPEATS {
+            let (t, i) = any_table_index().zip(any_usize()).next().unwrap();
+            assert_eq!(t.decreased(i).increased(i), t);
+        }
+    }
+
+    #[test]
+    /// Check that a big increase does not overflow
+    fn prop_table_increase_max_no_overflow() {
+        let first = TableIndex::FIRST;
+        // Increase so that ByteIndex is at 1usize
+        let second = first.increased(1);
+
+        // Now increase by usize::MAX, as the underlying byte index stores a usize this may overflow
+        // depending on implementation, ensure it does not overflow
+        let big_increase = second.increased(usize::MAX);
+        let total_full_aes_shifts = (1u128 + usize::MAX as u128) / BYTES_PER_AES_CALL as u128;
+
+        assert_eq!(
+            big_increase,
+            TableIndex::new(AesIndex(total_full_aes_shifts), ByteIndex(0))
+        );
+    }
+}
--- a/concrete-csprng/src/generators/aes_ctr/mod.rs
+++ b/concrete-csprng/src/generators/aes_ctr/mod.rs
@@ -0,0 +1,223 @@
+//! A module implementing the random generator api with batched aes calls.
+//!
+//! This module provides a generic [`AesCtrGenerator`] structure which implements the
+//! [`super::RandomGenerator`] api using the AES block cipher in counter mode. That is, the
+//! generator holds a state (i.e. counter) which is incremented iteratively, to produce the stream
+//! of random values:
+//! ```ascii
+//!        state=0        state=1        state=2
+//!        ╔══↧══╗        ╔══↧══╗        ╔══↧══╗
+//!    key ↦ AES ║    key ↦ AES ║    key ↦ AES ║ ...
+//!        ╚══↧══╝        ╚══↧══╝        ╚══↧══╝
+//!        output0        output1        output2
+//!
+//!          t=0            t=1            t=2
+//! ```
+//!
+//! The [`AesCtrGenerator`] structure is generic over the AES block ciphers, which are
+//! represented by the [`AesBlockCipher`] trait. Consequently, implementers only need to implement
+//! the `AesBlockCipher` trait, to benefit from the whole api of the `AesCtrGenerator` structure.
+//!
+//! In the following section, we give details on the implementation of this generic generator.
+//!
+//! Coarse-grained pseudo-random lookup table
+//! =========================================
+//!
+//! To generate random values, we use the AES block cipher in counter mode. If we denote f the aes
+//! encryption function, we have:
+//! ```ascii
+//!     f: ⟦0;2¹²⁸ -1⟧ X ⟦0;2¹²⁸ -1⟧ ↦ ⟦0;2¹²⁸ -1⟧
+//!     f(secret_key, input) ↦ output
+//! ```
+
+//! If we fix the secret key to a value k, we have a function fₖ from ⟦0;2¹²⁸ -1⟧ to ⟦0;2¹²⁸-1⟧,
+//! transforming the state of the counter into a pseudo random value. Essentially, this fₖ
+//! function can be considered as a the following lookup table, containing 2¹²⁸ pseudo-random
+//! values:
+//! ```ascii  
+//!     ╭──────────────┬──────────────┬─────┬──────────────╮
+//!     │       0      │       1      │     │    2¹²⁸ -1   │
+//!     ├──────────────┼──────────────┼─────┼──────────────┤
+//!     │     fₖ(0)    │     fₖ(1)    │     │  fₖ(2¹²⁸ -1) │
+//!     ╔═══════↧══════╦═══════↧══════╦═════╦═══════↧══════╗
+//!     ║┏━━━━━━━━━━━━┓║┏━━━━━━━━━━━━┓║     ║┏━━━━━━━━━━━━┓║
+//!     ║┃    u128    ┃║┃    u128    ┃║ ... ║┃    u128    ┃║
+//!     ║┗━━━━━━━━━━━━┛║┗━━━━━━━━━━━━┛║     ║┗━━━━━━━━━━━━┛║
+//!     ╚══════════════╩══════════════╩═════╩══════════════╝
+//! ```
+//!
+//! An input to the fₖ function is called an _aes index_ (also called state or counter in the
+//! standards) of the pseudo-random table. The [`AesIndex`] structure defined in this module
+//! represents such an index in the code.
+//!
+//! Fine-grained pseudo-random table lookup
+//! =======================================
+//!
+//! Since we want to deliver the pseudo-random bytes one by one, we have to come with a finer
+//! grained indexing. Fortunately, each `u128` value outputted by fₖ can be seen as a table of 16
+//! `u8`:
+//! ```ascii
+//!     ╭──────────────┬──────────────┬─────┬──────────────╮
+//!     │       0      │       1      │     │    2¹²⁸ -1   │
+//!     ├──────────────┼──────────────┼─────┼──────────────┤
+//!     │     fₖ(0)    │     fₖ(1)    │     │  fₖ(2¹²⁸ -1) │
+//!     ╔═══════↧══════╦═══════↧══════╦═════╦═══════↧══════╗
+//!     ║┏━━━━━━━━━━━━┓║┏━━━━━━━━━━━━┓║     ║┏━━━━━━━━━━━━┓║
+//!     ║┃    u128    ┃║┃    u128    ┃║     ║┃    u128    ┃║
+//!     ║┣━━┯━━┯━━━┯━━┫║┣━━┯━━┯━━━┯━━┫║ ... ║┣━━┯━━┯━━━┯━━┫║
+//!     ║┃u8│u8│...│u8┃║┃u8│u8│...│u8┃║     ║┃u8│u8│...│u8┃║
+//!     ║┗━━┷━━┷━━━┷━━┛║┗━━┷━━┷━━━┷━━┛║     ║┗━━┷━━┷━━━┷━━┛║
+//!     ╚══════════════╩══════════════╩═════╩══════════════╝
+//! ```
+//!
+//! We introduce a second function to select a chunk of 8 bits:
+//! ```ascii
+//!     g: ⟦0;2¹²⁸ -1⟧ X ⟦0;15⟧ ↦ ⟦0;2⁸ -1⟧
+//!     g(big_int, index) ↦ byte
+//! ```
+//!
+//! If we fix the `u128` value to a value e, we have a function gₑ from ⟦0;15⟧ to ⟦0;2⁸ -1⟧
+//! transforming an index into a pseudo-random byte:
+//! ```ascii
+//!     ┏━━━━━━━━┯━━━━━━━━┯━━━┯━━━━━━━━┓
+//!     ┃   u8   │   u8   │...│   u8   ┃
+//!     ┗━━━━━━━━┷━━━━━━━━┷━━━┷━━━━━━━━┛
+//!     │  gₑ(0) │  gₑ(1) │   │ gₑ(15) │
+//!     ╰────────┴─────-──┴───┴────────╯
+//! ```
+//!
+//! We call this input to the gₑ function, a _byte index_ of the pseudo-random table. The
+//! [`ByteIndex`] structure defined in this module represents such an index in the code.
+//!
+//! By using both the g and the fₖ functions, we can define a new function l which allows to index
+//! any byte of the pseudo-random table:
+//! ```ascii
+//!     l: ⟦0;2¹²⁸ -1⟧ X ⟦0;15⟧ ↦ ⟦0;2⁸ -1⟧
+//!     l(aes_index, byte_index) ↦ g(fₖ(aes_index), byte_index)
+//! ```
+//!
+//! In this sense, any member of ⟦0;2¹²⁸ -1⟧ X ⟦0;15⟧ uniquely defines a byte in this pseudo-random
+//! table:
+//! ```ascii
+//!                          e = fₖ(a)
+//!     ╔══════════════╦═══════↧══════╦═════╦══════════════╗
+//!     ║┏━━━━━━━━━━━━┓║┏━━━━━━━━━━━━┓║     ║┏━━━━━━━━━━━━┓║
+//!     ║┃    u128    ┃║┃    u128    ┃║     ║┃    u128    ┃║
+//!     ║┣━━┯━━┯━━━┯━━┫║┣━━┯━━┯━━━┯━━┫║ ... ║┣━━┯━━┯━━━┯━━┫║
+//!     ║┃u8│u8│...│u8┃║┃u8│u8│...│u8┃║     ║┃u8│u8│...│u8┃║
+//!     ║┗━━┷━━┷━━━┷━━┛║┗━━┷↥━┷━━━┷━━┛║     ║┗━━┷━━┷━━━┷━━┛║
+//!     ║              ║│    gₑ(b)   │║     ║              ║
+//!     ║              ║╰───-────────╯║     ║              ║
+//!     ╚══════════════╩══════════════╩═════╩══════════════╝
+//! ```
+//!
+//! We call this input to the l function, a _table index_ of the pseudo-random table. The
+//! [`TableIndex`] structure defined in this module represents such an index in the code.
+//!
+//! Prngs current table index
+//! =========================
+//!
+//! When created, a prng is given an initial _table index_, denoted (a₀, b₀), which identifies the
+//! first byte of the table to be outputted by the prng. Then, each time the prng is queried for a
+//! new value, the byte corresponding to the current _table index_ is returned, and the current
+//! _table index_ is incremented:
+//! ```ascii
+//!       e = fₖ(a₀)                                                  e = fₖ(a₁)
+//!     ╔═════↧═════╦═══════════╦═════╦═══════════╗     ╔═══════════╦═════↧═════╦═════╦═══════════╗
+//!     ║┏━┯━┯━━━┯━┓║┏━┯━┯━━━┯━┓║ ... ║┏━┯━┯━━━┯━┓║     ║┏━┯━┯━━━┯━┓║┏━┯━┯━━━┯━┓║ ... ║┏━┯━┯━━━┯━┓║
+//!     ║┃ │ │...│ ┃║┃ │ │...│ ┃║     ║┃ │ │...│ ┃║     ║┃ │ │...│ ┃║┃ │ │...│ ┃║     ║┃ │ │...│ ┃║
+//!     ║┗━┷━┷━━━┷↥┛║┗━┷━┷━━━┷━┛║     ║┗━┷━┷━━━┷━┛║  →  ║┗━┷━┷━━━┷━┛║┗↥┷━┷━━━┷━┛║     ║┗━┷━┷━━━┷━┛║
+//!     ║│  gₑ(b₀) │║           ║     ║           ║     ║           ║│  gₑ(b₁) │║     ║           ║
+//!     ║╰─────────╯║           ║     ║           ║     ║           ║╰─────────╯║     ║           ║
+//!     ╚═══════════╩═══════════╩═════╩═══════════╝     ╚═══════════╩═══════════╩═════╩═══════════╝
+//! ```
+//!
+//! Prng bound
+//! ==========
+//!
+//! When created, a prng is also given a _bound_ (aₘ, bₘ) , that is a table index which it is not
+//! allowed to exceed:
+//! ```ascii
+//!       e = fₖ(a₀)
+//!     ╔═════↧═════╦═══════════╦═════╦═══════════╗
+//!     ║┏━┯━┯━━━┯━┓║┏━┯━┯━━━┯━┓║ ... ║┏━┯━┯━━━┯━┓║
+//!     ║┃ │ │...│ ┃║┃ │╳│...│╳┃║     ║┃╳│╳│...│╳┃║
+//!     ║┗━┷━┷━━━┷↥┛║┗━┷━┷━━━┷━┛║     ║┗━┷━┷━━━┷━┛║ The current byte can be returned.
+//!     ║│  gₑ(b₀) │║           ║     ║           ║
+//!     ║╰─────────╯║           ║     ║           ║
+//!     ╚═══════════╩═══════════╩═════╩═══════════╝
+//!     
+//!                   e = fₖ(aₘ)
+//!     ╔═══════════╦═════↧═════╦═════╦═══════════╗
+//!     ║┏━┯━┯━━━┯━┓║┏━┯━┯━━━┯━┓║ ... ║┏━┯━┯━━━┯━┓║
+//!     ║┃ │ │...│ ┃║┃ │╳│...│╳┃║     ║┃╳│╳│...│╳┃║ The table index reached the bound,
+//!     ║┗━┷━┷━━━┷━┛║┗━┷↥┷━━━┷━┛║     ║┗━┷━┷━━━┷━┛║ the current byte can not be
+//!     ║           ║│  gₑ(bₘ) │║     ║           ║ returned.
+//!     ║           ║╰─────────╯║     ║           ║
+//!     ╚═══════════╩═══════════╩═════╩═══════════╝
+//! ```
+//!
+//! Buffering
+//! =========
+//!
+//! Calling the aes function every time we need to output a single byte would be a huge waste of
+//! resources. In practice, we call aes 8 times in a row, for 8 successive values of aes index, and
+//! store the results in a buffer. For platforms which have a dedicated aes chip, this allows to
+//! fill the unit pipeline and reduces the amortized cost of the aes function.
+//!
+//! Together with the current table index of the prng, we also store a pointer p (initialized at
+//! p₀=b₀) to the current byte in the buffer. If we denote v the lookup function we have :
+//! ```ascii
+//!                        e = fₖ(a₀)                         Buffer(length=128)
+//!     ╔═════╦═══════════╦═════↧═════╦═══════════╦═════╗  ┏━┯━┯━┯━┯━┯━┯━┯━┯━━━┯━┓
+//!     ║ ... ║┏━┯━┯━━━┯━┓║┏━┯━┯━━━┯━┓║┏━┯━┯━━━┯━┓║ ... ║  ┃▓│▓│▓│▓│▓│▓│▓│▓│...│▓┃
+//!     ║     ║┃ │ │...│ ┃║┃▓│▓│...│▓┃║┃▓│▓│...│▓┃║     ║  ┗━┷↥┷━┷━┷━┷━┷━┷━┷━━━┷━┛
+//!     ║     ║┗━┷━┷━━━┷━┛║┗━┷↥┷━━━┷━┛║┗━┷━┷━━━┷━┛║     ║  │ v(p₀)               │
+//!     ║     ║           ║│  gₑ(b₀) │║           ║     ║  ╰─────────────────────╯
+//!     ║     ║           ║╰─────────╯║           ║     ║
+//!     ╚═════╩═══════════╩═══════════╩═══════════╩═════╝
+//! ```
+//!
+//! We call this input to the v function, a _buffer pointer_. The [`BufferPointer`] structure
+//! defined in this module represents such a pointer in the code.
+//!
+//! When the table index is incremented, the buffer pointer is incremented alongside:
+//! ```ascii
+//!                        e = fₖ(a)                          Buffer(length=128)
+//!     ╔═════╦═══════════╦═════↧═════╦═══════════╦═════╗  ┏━┯━┯━┯━┯━┯━┯━┯━┯━━━┯━┓
+//!     ║ ... ║┏━┯━┯━━━┯━┓║┏━┯━┯━━━┯━┓║┏━┯━┯━━━┯━┓║ ... ║  ┃▓│▓│▓│▓│▓│▓│▓│▓│...│▓┃
+//!     ║     ║┃ │ │...│ ┃║┃▓│▓│...│▓┃║┃▓│▓│...│▓┃║     ║  ┗━┷━┷↥┷━┷━┷━┷━┷━┷━━━┷━┛
+//!     ║     ║┗━┷━┷━━━┷━┛║┗━┷━┷↥━━┷━┛║┗━┷━┷━━━┷━┛║     ║  │   v(p)              │
+//!     ║     ║           ║│  gₑ(b)  │║           ║     ║  ╰─────────────────────╯
+//!     ║     ║           ║╰─────────╯║           ║     ║
+//!     ╚═════╩═══════════╩═══════════╩═══════════╩═════╝
+//! ```
+//!
+//! When the buffer pointer is incremented it is checked against the size of the buffer, and if
+//! necessary, a new batch of aes index values is generated.
+
+pub const AES_CALLS_PER_BATCH: usize = 8;
+pub const BYTES_PER_AES_CALL: usize = 128 / 8;
+pub const BYTES_PER_BATCH: usize = BYTES_PER_AES_CALL * AES_CALLS_PER_BATCH;
+
+/// A module containing structures to manage table indices.
+mod index;
+pub use index::*;
+
+/// A module containing structures to manage table indices and buffer pointers together properly.
+mod states;
+pub use states::*;
+
+/// A module containing an abstraction for aes block ciphers.
+mod block_cipher;
+pub use block_cipher::*;
+
+/// A module containing a generic implementation of a random generator.
+mod generic;
+pub use generic::*;
+
+/// A module extending `generic` to the `rayon` paradigm.
+#[cfg(feature = "parallel")]
+mod parallel;
+#[cfg(feature = "parallel")]
+pub use parallel::*;
--- a/concrete-csprng/src/generators/aes_ctr/parallel.rs
+++ b/concrete-csprng/src/generators/aes_ctr/parallel.rs
@@ -0,0 +1,222 @@
+use crate::generators::aes_ctr::{
+    AesBlockCipher, AesCtrGenerator, ChildrenClosure, State, TableIndex,
+};
+use crate::generators::{BytesPerChild, ChildrenCount, ForkError};
+
+/// A type alias for the parallel children iterator type.
+pub type ParallelChildrenIterator<BlockCipher> = rayon::iter::Map<
+    rayon::iter::Zip<
+        rayon::range::Iter<usize>,
+        rayon::iter::RepeatN<(Box<BlockCipher>, TableIndex, BytesPerChild)>,
+    >,
+    fn((usize, (Box<BlockCipher>, TableIndex, BytesPerChild))) -> AesCtrGenerator<BlockCipher>,
+>;
+
+impl<BlockCipher: AesBlockCipher> AesCtrGenerator<BlockCipher> {
+    /// Tries to fork the current generator into `n_child` generators each able to output
+    /// `child_bytes` random bytes as a parallel iterator.
+    ///
+    /// # Notes
+    ///
+    /// This method necessitate the "multithread" feature.
+    pub fn par_try_fork(
+        &mut self,
+        n_children: ChildrenCount,
+        n_bytes: BytesPerChild,
+    ) -> Result<ParallelChildrenIterator<BlockCipher>, ForkError>
+    where
+        BlockCipher: Send + Sync,
+    {
+        use rayon::prelude::*;
+
+        if n_children.0 == 0 {
+            return Err(ForkError::ZeroChildrenCount);
+        }
+        if n_bytes.0 == 0 {
+            return Err(ForkError::ZeroBytesPerChild);
+        }
+        if !self.is_fork_in_bound(n_children, n_bytes) {
+            return Err(ForkError::ForkTooLarge);
+        }
+
+        // The state currently stored in the parent generator points to the table index of the last
+        // generated byte. The first index to be generated is the next one :
+        let first_index = self.state.table_index().incremented();
+        let output = (0..n_children.0)
+            .into_par_iter()
+            .zip(rayon::iter::repeatn(
+                (self.block_cipher.clone(), first_index, n_bytes),
+                n_children.0,
+            ))
+            .map(
+                // This map is a little weird because we need to cast the closure to a fn pointer
+                // that matches the signature of `ChildrenIterator<BlockCipher>`. Unfortunately,
+                // the compiler does not manage to coerce this one automatically.
+                (|(i, (block_cipher, first_index, n_bytes))| {
+                    // The first index to be outputted by the child is the `first_index` shifted by
+                    // the proper amount of `child_bytes`.
+                    let child_first_index = first_index.increased(n_bytes.0 * i);
+                    // The bound of the child is the first index of its next sibling.
+                    let child_bound_index = first_index.increased(n_bytes.0 * (i + 1));
+                    AesCtrGenerator::from_block_cipher(
+                        block_cipher,
+                        child_first_index,
+                        child_bound_index,
+                    )
+                }) as ChildrenClosure<BlockCipher>,
+            );
+        // The parent next index is the bound of the last child.
+        let next_index = first_index.increased(n_bytes.0 * n_children.0);
+        self.state = State::new(next_index);
+
+        Ok(output)
+    }
+}
+
+#[cfg(test)]
+pub mod aes_ctr_parallel_generic_tests {
+
+    use super::*;
+    use crate::generators::aes_ctr::aes_ctr_generic_test::{any_key, any_valid_fork};
+    use rayon::prelude::*;
+
+    const REPEATS: usize = 1_000_000;
+
+    /// Check the property:
+    ///     On a valid fork, the table index of the first child is the same as the table index of
+    ///     the parent before the fork.
+    pub fn prop_fork_first_state_table_index<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let original_generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let mut forked_generator = original_generator.clone();
+            let first_child = forked_generator
+                .par_try_fork(nc, nb)
+                .unwrap()
+                .find_first(|_| true)
+                .unwrap();
+            assert_eq!(original_generator.table_index(), first_child.table_index());
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, the table index of the first byte outputted by the parent after the
+    ///     fork, is the bound of the last child of the fork.
+    pub fn prop_fork_last_bound_table_index<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let mut parent_generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let last_child = parent_generator
+                .par_try_fork(nc, nb)
+                .unwrap()
+                .find_last(|_| true)
+                .unwrap();
+            assert_eq!(
+                parent_generator.table_index().incremented(),
+                last_child.get_bound()
+            );
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, the bound of the parent does not change.
+    pub fn prop_fork_parent_bound_table_index<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let original_generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let mut forked_generator = original_generator.clone();
+            forked_generator
+                .par_try_fork(nc, nb)
+                .unwrap()
+                .find_last(|_| true)
+                .unwrap();
+            assert_eq!(original_generator.get_bound(), forked_generator.get_bound());
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, the parent table index is increased of the number of children
+    ///     multiplied by the number of bytes per child.
+    pub fn prop_fork_parent_state_table_index<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let original_generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let mut forked_generator = original_generator.clone();
+            forked_generator
+                .par_try_fork(nc, nb)
+                .unwrap()
+                .find_last(|_| true)
+                .unwrap();
+            assert_eq!(
+                forked_generator.table_index(),
+                // Decrement accounts for the fact that the table index stored is the previous one
+                t.increased(nc.0 * nb.0).decremented()
+            );
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, the bytes outputted by the children in the fork order form the same
+    ///     sequence the parent would have had outputted no fork had happened.
+    pub fn prop_fork<G: AesBlockCipher>() {
+        for _ in 0..1000 {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let bytes_to_go = nc.0 * nb.0;
+            let original_generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let mut forked_generator = original_generator.clone();
+            let initial_output: Vec<u8> = original_generator.take(bytes_to_go).collect();
+            let forked_output: Vec<u8> = forked_generator
+                .par_try_fork(nc, nb)
+                .unwrap()
+                .flat_map(|child| child.collect::<Vec<_>>())
+                .collect();
+            assert_eq!(initial_output, forked_output);
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, all children got a number of remaining bytes equals to the number of
+    ///     bytes per child given as fork input.
+    pub fn prop_fork_children_remaining_bytes<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let mut generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            assert!(generator
+                .par_try_fork(nc, nb)
+                .unwrap()
+                .all(|c| c.remaining_bytes().0 == nb.0 as u128));
+        }
+    }
+
+    /// Check the property:
+    ///     On a valid fork, the number of remaining bytes of the parent is reduced by the
+    ///     number of children multiplied by the number of bytes per child.
+    pub fn prop_fork_parent_remaining_bytes<G: AesBlockCipher>() {
+        for _ in 0..REPEATS {
+            let (t, nc, nb, i) = any_valid_fork().next().unwrap();
+            let k = any_key().next().unwrap();
+            let bytes_to_go = nc.0 * nb.0;
+            let mut generator =
+                AesCtrGenerator::<G>::new(k, Some(t), Some(t.increased(nc.0 * nb.0 + i)));
+            let before_remaining_bytes = generator.remaining_bytes();
+            let _ = generator.par_try_fork(nc, nb).unwrap();
+            let after_remaining_bytes = generator.remaining_bytes();
+            assert_eq!(
+                before_remaining_bytes.0 - after_remaining_bytes.0,
+                bytes_to_go as u128
+            );
+        }
+    }
+}
--- a/concrete-csprng/src/generators/aes_ctr/states.rs
+++ b/concrete-csprng/src/generators/aes_ctr/states.rs
@@ -0,0 +1,176 @@
+use crate::generators::aes_ctr::index::{AesIndex, TableIndex};
+use crate::generators::aes_ctr::BYTES_PER_BATCH;
+
+/// A pointer to the next byte to be outputted by the generator.
+#[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq)]
+pub struct BufferPointer(pub usize);
+
+/// A structure representing the current state of generator using batched aes-ctr approach.
+#[derive(Debug, Clone, Copy)]
+pub struct State {
+    table_index: TableIndex,
+    buffer_pointer: BufferPointer,
+}
+
+/// A structure representing the action to be taken by the generator after shifting its state.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum ShiftAction {
+    /// Outputs the byte pointed to by the 0-th field.
+    OutputByte(BufferPointer),
+    /// Refresh the buffer starting from the 0-th field, and output the byte pointed to by the 0-th
+    /// field.
+    RefreshBatchAndOutputByte(AesIndex, BufferPointer),
+}
+
+impl State {
+    /// Creates a new state from the initial table index.
+    ///
+    /// Note :
+    /// ------
+    ///
+    /// The `table_index` input, is the __first__ table index that will be outputted on the next
+    /// call to `increment`. Put differently, the current table index of the newly created state
+    /// is the predecessor of this one.
+    pub fn new(table_index: TableIndex) -> Self {
+        // We ensure that the table index is not the first one, to prevent wrapping on `decrement`,
+        // and outputting `RefreshBatchAndOutputByte(AesIndex::MAX, ...)` on the first increment
+        // (which would lead to loading a non continuous batch).
+        assert_ne!(table_index, TableIndex::FIRST);
+        State {
+            // To ensure that the first outputted table index is the proper one, we decrement the
+            // table index.
+            table_index: table_index.decremented(),
+            // To ensure that the first `ShiftAction` will be a `RefreshBatchAndOutputByte`, we set
+            // the buffer to the last allowed value.
+            buffer_pointer: BufferPointer(BYTES_PER_BATCH - 1),
+        }
+    }
+
+    /// Shifts the state forward of `shift` bytes.
+    pub fn increase(&mut self, shift: usize) -> ShiftAction {
+        self.table_index.increase(shift);
+        let total_batch_index = self.buffer_pointer.0 + shift;
+        if total_batch_index > BYTES_PER_BATCH - 1 {
+            self.buffer_pointer.0 = self.table_index.byte_index.0;
+            ShiftAction::RefreshBatchAndOutputByte(self.table_index.aes_index, self.buffer_pointer)
+        } else {
+            self.buffer_pointer.0 = total_batch_index;
+            ShiftAction::OutputByte(self.buffer_pointer)
+        }
+    }
+
+    /// Shifts the state forward of one byte.
+    pub fn increment(&mut self) -> ShiftAction {
+        self.increase(1)
+    }
+
+    /// Returns the current table index.
+    pub fn table_index(&self) -> TableIndex {
+        self.table_index
+    }
+}
+
+impl Default for State {
+    fn default() -> Self {
+        State::new(TableIndex::FIRST)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::generators::aes_ctr::index::ByteIndex;
+    use crate::generators::aes_ctr::BYTES_PER_AES_CALL;
+    use rand::{thread_rng, Rng};
+
+    const REPEATS: usize = 1_000_000;
+
+    fn any_table_index() -> impl Iterator<Item = TableIndex> {
+        std::iter::repeat_with(|| {
+            TableIndex::new(
+                AesIndex(thread_rng().gen()),
+                ByteIndex(thread_rng().gen::<usize>() % BYTES_PER_AES_CALL),
+            )
+        })
+    }
+
+    fn any_usize() -> impl Iterator<Item = usize> {
+        std::iter::repeat_with(|| thread_rng().gen())
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t,
+    ///         State::new(t).increment() = RefreshBatchAndOutputByte(t.aes_index, t.byte_index)
+    fn prop_state_new_increment() {
+        for _ in 0..REPEATS {
+            let (t, mut s) = any_table_index()
+                .map(|t| (t, State::new(t)))
+                .next()
+                .unwrap();
+            assert!(matches!(
+                s.increment(),
+                ShiftAction::RefreshBatchAndOutputByte(t_, BufferPointer(p_)) if t_ == t.aes_index && p_ == t.byte_index.0
+            ))
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all states s, table indices t, positive integer i
+    ///         if s = State::new(t), then t.increased(i) = s.increased(i-1).table_index().
+    fn prop_state_increase_table_index() {
+        for _ in 0..REPEATS {
+            let (t, mut s, i) = any_table_index()
+                .zip(any_usize())
+                .map(|(t, i)| (t, State::new(t), i))
+                .next()
+                .unwrap();
+            s.increase(i);
+            assert_eq!(s.table_index(), t.increased(i - 1))
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t, positive integer i such as t.byte_index + i < 127,
+    ///         if s = State::new(t), and s.increment() was executed, then
+    ///         s.increase(i) = OutputByte(t.byte_index + i).
+    fn prop_state_increase_small() {
+        for _ in 0..REPEATS {
+            let (t, mut s, i) = any_table_index()
+                .zip(any_usize())
+                .map(|(t, i)| (t, State::new(t), i % BYTES_PER_BATCH))
+                .find(|(t, _, i)| t.byte_index.0 + i < BYTES_PER_BATCH - 1)
+                .unwrap();
+            s.increment();
+            assert!(matches!(
+                s.increase(i),
+                ShiftAction::OutputByte(BufferPointer(p_)) if p_ == t.byte_index.0 + i
+            ));
+        }
+    }
+
+    #[test]
+    /// Check the property:
+    ///     For all table indices t, positive integer i such as t.byte_index + i >= 127,
+    ///         if s = State::new(t), and s.increment() was executed, then
+    ///         s.increase(i) = RefreshBatchAndOutputByte(
+    ///             t.increased(i).aes_index,
+    ///             t.increased(i).byte_index).
+    fn prop_state_increase_large() {
+        for _ in 0..REPEATS {
+            let (t, mut s, i) = any_table_index()
+                .zip(any_usize())
+                .map(|(t, i)| (t, State::new(t), i))
+                .find(|(t, _, i)| t.byte_index.0 + i >= BYTES_PER_BATCH - 1)
+                .unwrap();
+            s.increment();
+            assert!(matches!(
+                s.increase(i),
+                ShiftAction::RefreshBatchAndOutputByte(t_, BufferPointer(p_))
+                    if t_ == t.increased(i).aes_index && p_ == t.increased(i).byte_index.0
+            ));
+        }
+    }
+}
--- a/concrete-csprng/src/generators/implem/aarch64/block_cipher.rs
+++ b/concrete-csprng/src/generators/implem/aarch64/block_cipher.rs
@@ -0,0 +1,184 @@
+use crate::generators::aes_ctr::{AesBlockCipher, AesIndex, AesKey, BYTES_PER_BATCH};
+use core::arch::aarch64::{
+    uint8x16_t, vaeseq_u8, vaesmcq_u8, vdupq_n_u32, vdupq_n_u8, veorq_u8, vgetq_lane_u32,
+    vreinterpretq_u32_u8, vreinterpretq_u8_u32,
+};
+use std::arch::is_aarch64_feature_detected;
+use std::mem::transmute;
+
+const RCONS: [u32; 10] = [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36];
+const NUM_WORDS_IN_KEY: usize = 4;
+const NUM_ROUNDS: usize = 10;
+const NUM_ROUND_KEYS: usize = NUM_ROUNDS + 1;
+
+/// An aes block cipher implementation which uses `neon` and `aes` instructions.
+#[derive(Clone)]
+pub struct ArmAesBlockCipher {
+    round_keys: [uint8x16_t; NUM_ROUND_KEYS],
+}
+
+impl AesBlockCipher for ArmAesBlockCipher {
+    fn new(key: AesKey) -> ArmAesBlockCipher {
+        let aes_detected = is_aarch64_feature_detected!("aes");
+        let neon_detected = is_aarch64_feature_detected!("neon");
+
+        if !(aes_detected && neon_detected) {
+            panic!(
+                "The ArmAesBlockCipher requires both aes and neon aarch64 CPU features.\n\
+                aes feature available: {}\nneon feature available: {}\n.",
+                aes_detected, neon_detected
+            )
+        }
+
+        let round_keys = unsafe { generate_round_keys(key) };
+        ArmAesBlockCipher { round_keys }
+    }
+
+    fn generate_batch(&mut self, AesIndex(aes_ctr): AesIndex) -> [u8; BYTES_PER_BATCH] {
+        #[target_feature(enable = "aes,neon")]
+        unsafe fn implementation(
+            this: &ArmAesBlockCipher,
+            AesIndex(aes_ctr): AesIndex,
+        ) -> [u8; BYTES_PER_BATCH] {
+            let mut output = [0u8; BYTES_PER_BATCH];
+            // We want 128 bytes of output, the ctr gives 128 bit message (16 bytes)
+            for (i, out) in output.chunks_exact_mut(16).enumerate() {
+                // Safe because we prevent the user from creating the Generator
+                // on non-supported hardware
+                let encrypted = encrypt(aes_ctr + (i as u128), &this.round_keys);
+                out.copy_from_slice(&encrypted.to_ne_bytes());
+            }
+            output
+        }
+        // SAFETY: we checked for aes and neon availability in `Self::new`
+        unsafe { implementation(self, AesIndex(aes_ctr)) }
+    }
+}
+
+/// Does the AES SubWord operation for the Key Expansion step
+///
+/// # SAFETY
+///
+/// You must make sure the CPU's arch is`aarch64` and has
+/// `neon` and `aes` features.
+#[inline(always)]
+unsafe fn sub_word(word: u32) -> u32 {
+    let data = vreinterpretq_u8_u32(vdupq_n_u32(word));
+    let zero_key = vdupq_n_u8(0u8);
+    let temp = vaeseq_u8(data, zero_key);
+    // vaeseq_u8 does SubBytes(ShiftRow(XOR(data, key))
+    // But because we used a zero aes key,the XOR did not alter data
+    // We now have temp = SubBytes(ShiftRow(data))
+
+    // Since in AES ShiftRow operation, the first row is not shifted
+    // We can just get that one to have our SubWord(word) result
+    vgetq_lane_u32::<0>(vreinterpretq_u32_u8(temp))
+}
+
+#[inline(always)]
+fn uint8x16_t_to_u128(input: uint8x16_t) -> u128 {
+    unsafe { transmute(input) }
+}
+
+#[inline(always)]
+fn u128_to_uint8x16_t(input: u128) -> uint8x16_t {
+    unsafe { transmute(input) }
+}
+
+#[target_feature(enable = "aes,neon")]
+unsafe fn generate_round_keys(key: AesKey) -> [uint8x16_t; NUM_ROUND_KEYS] {
+    let mut round_keys: [uint8x16_t; NUM_ROUND_KEYS] = std::mem::zeroed();
+    round_keys[0] = u128_to_uint8x16_t(key.0);
+
+    let words = std::slice::from_raw_parts_mut(
+        round_keys.as_mut_ptr() as *mut u32,
+        NUM_ROUND_KEYS * NUM_WORDS_IN_KEY,
+    );
+
+    debug_assert_eq!(words.len(), 44);
+
+    // Skip the words of the first key, its already done
+    for i in NUM_WORDS_IN_KEY..words.len() {
+        if (i % NUM_WORDS_IN_KEY) == 0 {
+            words[i] = words[i - NUM_WORDS_IN_KEY]
+                ^ sub_word(words[i - 1]).rotate_right(8)
+                ^ RCONS[(i / NUM_WORDS_IN_KEY) - 1];
+        } else {
+            words[i] = words[i - NUM_WORDS_IN_KEY] ^ words[i - 1];
+        }
+        // Note: there is also a special thing to do when
+        // i mod SElf::NUM_WORDS_IN_KEY == 4 but it cannot happen on 128 bits keys
+    }
+
+    round_keys
+}
+
+/// Encrypts a 128-bit message
+///
+/// # SAFETY
+///
+/// You must make sure the CPU's arch is`aarch64` and has
+/// `neon` and `aes` features.
+#[inline(always)]
+unsafe fn encrypt(message: u128, keys: &[uint8x16_t; NUM_ROUND_KEYS]) -> u128 {
+    // Notes:
+    // According the [ARM Manual](https://developer.arm.com/documentation/ddi0487/gb/):
+    // `vaeseq_u8` is the following AES operations:
+    //      1. AddRoundKey (XOR)
+    //      2. ShiftRows
+    //      3. SubBytes
+    // `vaesmcq_u8` is MixColumns
+    let mut data: uint8x16_t = u128_to_uint8x16_t(message);
+
+    for &key in keys.iter().take(NUM_ROUNDS - 1) {
+        data = vaesmcq_u8(vaeseq_u8(data, key));
+    }
+
+    data = vaeseq_u8(data, keys[NUM_ROUNDS - 1]);
+    data = veorq_u8(data, keys[NUM_ROUND_KEYS - 1]);
+
+    uint8x16_t_to_u128(data)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    // Test vector for aes128, from the FIPS publication 197
+    const CIPHER_KEY: u128 = u128::from_be(0x000102030405060708090a0b0c0d0e0f);
+    const KEY_SCHEDULE: [u128; 11] = [
+        u128::from_be(0x000102030405060708090a0b0c0d0e0f),
+        u128::from_be(0xd6aa74fdd2af72fadaa678f1d6ab76fe),
+        u128::from_be(0xb692cf0b643dbdf1be9bc5006830b3fe),
+        u128::from_be(0xb6ff744ed2c2c9bf6c590cbf0469bf41),
+        u128::from_be(0x47f7f7bc95353e03f96c32bcfd058dfd),
+        u128::from_be(0x3caaa3e8a99f9deb50f3af57adf622aa),
+        u128::from_be(0x5e390f7df7a69296a7553dc10aa31f6b),
+        u128::from_be(0x14f9701ae35fe28c440adf4d4ea9c026),
+        u128::from_be(0x47438735a41c65b9e016baf4aebf7ad2),
+        u128::from_be(0x549932d1f08557681093ed9cbe2c974e),
+        u128::from_be(0x13111d7fe3944a17f307a78b4d2b30c5),
+    ];
+    const PLAINTEXT: u128 = u128::from_be(0x00112233445566778899aabbccddeeff);
+    const CIPHERTEXT: u128 = u128::from_be(0x69c4e0d86a7b0430d8cdb78070b4c55a);
+
+    #[test]
+    fn test_generate_key_schedule() {
+        // Checks that the round keys are correctly generated from the sample key from FIPS
+        let key = AesKey(CIPHER_KEY);
+        let keys = unsafe { generate_round_keys(key) };
+        for (expected, actual) in KEY_SCHEDULE.iter().zip(keys.iter()) {
+            assert_eq!(*expected, uint8x16_t_to_u128(*actual));
+        }
+    }
+
+    #[test]
+    fn test_encrypt_message() {
+        // Checks that encrypting many plaintext at the same time gives the correct output.
+        let message = PLAINTEXT;
+        let key = AesKey(CIPHER_KEY);
+        let keys = unsafe { generate_round_keys(key) };
+        let ciphertext = unsafe { encrypt(message, &keys) };
+        assert_eq!(CIPHERTEXT, ciphertext);
+    }
+}
--- a/concrete-csprng/src/generators/implem/aarch64/generator.rs
+++ b/concrete-csprng/src/generators/implem/aarch64/generator.rs
@@ -0,0 +1,110 @@
+use crate::generators::aes_ctr::{AesCtrGenerator, AesKey, ChildrenIterator};
+use crate::generators::implem::aarch64::block_cipher::ArmAesBlockCipher;
+use crate::generators::{ByteCount, BytesPerChild, ChildrenCount, ForkError, RandomGenerator};
+use crate::seeders::Seed;
+
+/// A random number generator using the `aesni` instructions.
+pub struct NeonAesRandomGenerator(pub(super) AesCtrGenerator<ArmAesBlockCipher>);
+
+/// The children iterator used by [`NeonAesRandomGenerator`].
+///
+/// Outputs children generators one by one.
+pub struct ArmAesChildrenIterator(ChildrenIterator<ArmAesBlockCipher>);
+
+impl Iterator for ArmAesChildrenIterator {
+    type Item = NeonAesRandomGenerator;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0.next().map(NeonAesRandomGenerator)
+    }
+}
+
+impl RandomGenerator for NeonAesRandomGenerator {
+    type ChildrenIter = ArmAesChildrenIterator;
+    fn new(seed: Seed) -> Self {
+        NeonAesRandomGenerator(AesCtrGenerator::new(AesKey(seed.0), None, None))
+    }
+    fn remaining_bytes(&self) -> ByteCount {
+        self.0.remaining_bytes()
+    }
+    fn try_fork(
+        &mut self,
+        n_children: ChildrenCount,
+        n_bytes: BytesPerChild,
+    ) -> Result<Self::ChildrenIter, ForkError> {
+        self.0
+            .try_fork(n_children, n_bytes)
+            .map(ArmAesChildrenIterator)
+    }
+}
+
+impl Iterator for NeonAesRandomGenerator {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0.next()
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::generators::aes_ctr::aes_ctr_generic_test;
+    use crate::generators::implem::aarch64::block_cipher::ArmAesBlockCipher;
+    use crate::generators::{generator_generic_test, NeonAesRandomGenerator};
+
+    #[test]
+    fn prop_fork_first_state_table_index() {
+        aes_ctr_generic_test::prop_fork_first_state_table_index::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_last_bound_table_index() {
+        aes_ctr_generic_test::prop_fork_last_bound_table_index::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_bound_table_index() {
+        aes_ctr_generic_test::prop_fork_parent_bound_table_index::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_state_table_index() {
+        aes_ctr_generic_test::prop_fork_parent_state_table_index::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork() {
+        aes_ctr_generic_test::prop_fork::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_children_remaining_bytes() {
+        aes_ctr_generic_test::prop_fork_children_remaining_bytes::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_remaining_bytes() {
+        aes_ctr_generic_test::prop_fork_parent_remaining_bytes::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn test_roughly_uniform() {
+        generator_generic_test::test_roughly_uniform::<NeonAesRandomGenerator>();
+    }
+
+    #[test]
+    fn test_generator_determinism() {
+        generator_generic_test::test_generator_determinism::<NeonAesRandomGenerator>();
+    }
+
+    #[test]
+    fn test_fork() {
+        generator_generic_test::test_fork_children::<NeonAesRandomGenerator>();
+    }
+
+    #[test]
+    #[should_panic(expected = "expected test panic")]
+    fn test_bounded_panic() {
+        generator_generic_test::test_bounded_none_should_panic::<NeonAesRandomGenerator>();
+    }
+}
--- a/concrete-csprng/src/generators/implem/aarch64/mod.rs
+++ b/concrete-csprng/src/generators/implem/aarch64/mod.rs
@@ -0,0 +1,16 @@
+//! A module implementing a random number generator, using the aarch64 `neon` and `aes`
+//! instructions.
+//!
+//! This module implements a cryptographically secure pseudorandom number generator
+//! (CS-PRNG), using a fast block cipher. The implementation is based on the
+//! [intel aesni white paper 323641-001 revision 3.0](https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf).
+
+mod block_cipher;
+
+mod generator;
+pub use generator::*;
+
+#[cfg(feature = "parallel")]
+mod parallel;
+#[cfg(feature = "parallel")]
+pub use parallel::*;
--- a/concrete-csprng/src/generators/implem/aarch64/parallel.rs
+++ b/concrete-csprng/src/generators/implem/aarch64/parallel.rs
@@ -0,0 +1,95 @@
+use super::*;
+use crate::generators::aes_ctr::{AesCtrGenerator, ParallelChildrenIterator};
+use crate::generators::implem::aarch64::block_cipher::ArmAesBlockCipher;
+use crate::generators::{BytesPerChild, ChildrenCount, ForkError, ParallelRandomGenerator};
+use rayon::iter::plumbing::{Consumer, ProducerCallback, UnindexedConsumer};
+use rayon::prelude::*;
+
+/// The parallel children iterator used by [`NeonAesRandomGenerator`].
+///
+/// Outputs the children generators one by one.
+#[allow(clippy::type_complexity)]
+pub struct ParallelArmAesChildrenIterator(
+    rayon::iter::Map<
+        ParallelChildrenIterator<ArmAesBlockCipher>,
+        fn(AesCtrGenerator<ArmAesBlockCipher>) -> NeonAesRandomGenerator,
+    >,
+);
+
+impl ParallelIterator for ParallelArmAesChildrenIterator {
+    type Item = NeonAesRandomGenerator;
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.0.drive_unindexed(consumer)
+    }
+}
+
+impl IndexedParallelIterator for ParallelArmAesChildrenIterator {
+    fn len(&self) -> usize {
+        self.0.len()
+    }
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        self.0.drive(consumer)
+    }
+    fn with_producer<CB: ProducerCallback<Self::Item>>(self, callback: CB) -> CB::Output {
+        self.0.with_producer(callback)
+    }
+}
+
+impl ParallelRandomGenerator for NeonAesRandomGenerator {
+    type ParChildrenIter = ParallelArmAesChildrenIterator;
+
+    fn par_try_fork(
+        &mut self,
+        n_children: ChildrenCount,
+        n_bytes: BytesPerChild,
+    ) -> Result<Self::ParChildrenIter, ForkError> {
+        self.0
+            .par_try_fork(n_children, n_bytes)
+            .map(|iterator| ParallelArmAesChildrenIterator(iterator.map(NeonAesRandomGenerator)))
+    }
+}
+
+#[cfg(test)]
+
+mod test {
+    use crate::generators::aes_ctr::aes_ctr_parallel_generic_tests;
+    use crate::generators::implem::aarch64::block_cipher::ArmAesBlockCipher;
+
+    #[test]
+    fn prop_fork_first_state_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_first_state_table_index::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_last_bound_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_last_bound_table_index::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_bound_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_parent_bound_table_index::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_state_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_parent_state_table_index::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_ttt() {
+        aes_ctr_parallel_generic_tests::prop_fork::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_children_remaining_bytes() {
+        aes_ctr_parallel_generic_tests::prop_fork_children_remaining_bytes::<ArmAesBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_remaining_bytes() {
+        aes_ctr_parallel_generic_tests::prop_fork_parent_remaining_bytes::<ArmAesBlockCipher>();
+    }
+}
--- a/concrete-csprng/src/generators/implem/aesni/block_cipher.rs
+++ b/concrete-csprng/src/generators/implem/aesni/block_cipher.rs
@@ -0,0 +1,231 @@
+use crate::generators::aes_ctr::{AesBlockCipher, AesIndex, AesKey, BYTES_PER_BATCH};
+use std::arch::x86_64::{
+    __m128i, _mm_aesenc_si128, _mm_aesenclast_si128, _mm_aeskeygenassist_si128, _mm_shuffle_epi32,
+    _mm_slli_si128, _mm_store_si128, _mm_xor_si128,
+};
+use std::mem::transmute;
+
+/// An aes block cipher implementation which uses `aesni` instructions.
+#[derive(Clone)]
+pub struct AesniBlockCipher {
+    // The set of round keys used for the aes encryption
+    round_keys: [__m128i; 11],
+}
+
+impl AesBlockCipher for AesniBlockCipher {
+    fn new(key: AesKey) -> AesniBlockCipher {
+        let aes_detected = is_x86_feature_detected!("aes");
+        let sse2_detected = is_x86_feature_detected!("sse2");
+
+        if !(aes_detected && sse2_detected) {
+            panic!(
+                "The AesniBlockCipher requires both aes and sse2 x86 CPU features.\n\
+                aes feature available: {}\nsse2 feature available: {}\n.",
+                aes_detected, sse2_detected
+            )
+        }
+
+        // SAFETY: we checked for aes and sse2 availability
+        let round_keys = unsafe { generate_round_keys(key) };
+        AesniBlockCipher { round_keys }
+    }
+
+    fn generate_batch(&mut self, AesIndex(aes_ctr): AesIndex) -> [u8; BYTES_PER_BATCH] {
+        #[target_feature(enable = "sse2,aes")]
+        unsafe fn implementation(
+            this: &AesniBlockCipher,
+            AesIndex(aes_ctr): AesIndex,
+        ) -> [u8; BYTES_PER_BATCH] {
+            si128arr_to_u8arr(aes_encrypt_many(
+                u128_to_si128(aes_ctr),
+                u128_to_si128(aes_ctr + 1),
+                u128_to_si128(aes_ctr + 2),
+                u128_to_si128(aes_ctr + 3),
+                u128_to_si128(aes_ctr + 4),
+                u128_to_si128(aes_ctr + 5),
+                u128_to_si128(aes_ctr + 6),
+                u128_to_si128(aes_ctr + 7),
+                &this.round_keys,
+            ))
+        }
+        // SAFETY: we checked for aes and sse2 availability in `Self::new`
+        unsafe { implementation(self, AesIndex(aes_ctr)) }
+    }
+}
+
+#[target_feature(enable = "sse2,aes")]
+unsafe fn generate_round_keys(key: AesKey) -> [__m128i; 11] {
+    let key = u128_to_si128(key.0);
+    let mut keys: [__m128i; 11] = [u128_to_si128(0); 11];
+    aes_128_key_expansion(key, &mut keys);
+    keys
+}
+
+// Uses aes to encrypt many values at once. This allows a substantial speedup (around 30%)
+// compared to the naive approach.
+#[allow(clippy::too_many_arguments)]
+#[inline(always)]
+fn aes_encrypt_many(
+    message_1: __m128i,
+    message_2: __m128i,
+    message_3: __m128i,
+    message_4: __m128i,
+    message_5: __m128i,
+    message_6: __m128i,
+    message_7: __m128i,
+    message_8: __m128i,
+    keys: &[__m128i; 11],
+) -> [__m128i; 8] {
+    unsafe {
+        let mut tmp_1 = _mm_xor_si128(message_1, keys[0]);
+        let mut tmp_2 = _mm_xor_si128(message_2, keys[0]);
+        let mut tmp_3 = _mm_xor_si128(message_3, keys[0]);
+        let mut tmp_4 = _mm_xor_si128(message_4, keys[0]);
+        let mut tmp_5 = _mm_xor_si128(message_5, keys[0]);
+        let mut tmp_6 = _mm_xor_si128(message_6, keys[0]);
+        let mut tmp_7 = _mm_xor_si128(message_7, keys[0]);
+        let mut tmp_8 = _mm_xor_si128(message_8, keys[0]);
+
+        for key in keys.iter().take(10).skip(1) {
+            tmp_1 = _mm_aesenc_si128(tmp_1, *key);
+            tmp_2 = _mm_aesenc_si128(tmp_2, *key);
+            tmp_3 = _mm_aesenc_si128(tmp_3, *key);
+            tmp_4 = _mm_aesenc_si128(tmp_4, *key);
+            tmp_5 = _mm_aesenc_si128(tmp_5, *key);
+            tmp_6 = _mm_aesenc_si128(tmp_6, *key);
+            tmp_7 = _mm_aesenc_si128(tmp_7, *key);
+            tmp_8 = _mm_aesenc_si128(tmp_8, *key);
+        }
+
+        tmp_1 = _mm_aesenclast_si128(tmp_1, keys[10]);
+        tmp_2 = _mm_aesenclast_si128(tmp_2, keys[10]);
+        tmp_3 = _mm_aesenclast_si128(tmp_3, keys[10]);
+        tmp_4 = _mm_aesenclast_si128(tmp_4, keys[10]);
+        tmp_5 = _mm_aesenclast_si128(tmp_5, keys[10]);
+        tmp_6 = _mm_aesenclast_si128(tmp_6, keys[10]);
+        tmp_7 = _mm_aesenclast_si128(tmp_7, keys[10]);
+        tmp_8 = _mm_aesenclast_si128(tmp_8, keys[10]);
+
+        [tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, tmp_7, tmp_8]
+    }
+}
+
+fn aes_128_assist(temp1: __m128i, temp2: __m128i) -> __m128i {
+    let mut temp3: __m128i;
+    let mut temp2 = temp2;
+    let mut temp1 = temp1;
+    unsafe {
+        temp2 = _mm_shuffle_epi32(temp2, 0xff);
+        temp3 = _mm_slli_si128(temp1, 0x4);
+        temp1 = _mm_xor_si128(temp1, temp3);
+        temp3 = _mm_slli_si128(temp3, 0x4);
+        temp1 = _mm_xor_si128(temp1, temp3);
+        temp3 = _mm_slli_si128(temp3, 0x4);
+        temp1 = _mm_xor_si128(temp1, temp3);
+        temp1 = _mm_xor_si128(temp1, temp2);
+    }
+    temp1
+}
+
+#[inline(always)]
+fn aes_128_key_expansion(key: __m128i, keys: &mut [__m128i; 11]) {
+    let (mut temp1, mut temp2): (__m128i, __m128i);
+    temp1 = key;
+    unsafe {
+        _mm_store_si128(keys.as_mut_ptr(), temp1);
+        temp2 = _mm_aeskeygenassist_si128(temp1, 0x01);
+        temp1 = aes_128_assist(temp1, temp2);
+        _mm_store_si128(keys.as_mut_ptr().offset(1), temp1);
+        temp2 = _mm_aeskeygenassist_si128(temp1, 0x02);
+        temp1 = aes_128_assist(temp1, temp2);
+        _mm_store_si128(keys.as_mut_ptr().offset(2), temp1);
+        temp2 = _mm_aeskeygenassist_si128(temp1, 0x04);
+        temp1 = aes_128_assist(temp1, temp2);
+        _mm_store_si128(keys.as_mut_ptr().offset(3), temp1);
+        temp2 = _mm_aeskeygenassist_si128(temp1, 0x08);
+        temp1 = aes_128_assist(temp1, temp2);
+        _mm_store_si128(keys.as_mut_ptr().offset(4), temp1);
+        temp2 = _mm_aeskeygenassist_si128(temp1, 0x10);
+        temp1 = aes_128_assist(temp1, temp2);
+        _mm_store_si128(keys.as_mut_ptr().offset(5), temp1);
+        temp2 = _mm_aeskeygenassist_si128(temp1, 0x20);
+        temp1 = aes_128_assist(temp1, temp2);
+        _mm_store_si128(keys.as_mut_ptr().offset(6), temp1);
+        temp2 = _mm_aeskeygenassist_si128(temp1, 0x40);
+        temp1 = aes_128_assist(temp1, temp2);
+        _mm_store_si128(keys.as_mut_ptr().offset(7), temp1);
+        temp2 = _mm_aeskeygenassist_si128(temp1, 0x80);
+        temp1 = aes_128_assist(temp1, temp2);
+        _mm_store_si128(keys.as_mut_ptr().offset(8), temp1);
+        temp2 = _mm_aeskeygenassist_si128(temp1, 0x1b);
+        temp1 = aes_128_assist(temp1, temp2);
+        _mm_store_si128(keys.as_mut_ptr().offset(9), temp1);
+        temp2 = _mm_aeskeygenassist_si128(temp1, 0x36);
+        temp1 = aes_128_assist(temp1, temp2);
+        _mm_store_si128(keys.as_mut_ptr().offset(10), temp1);
+    }
+}
+
+#[inline(always)]
+fn u128_to_si128(input: u128) -> __m128i {
+    unsafe { transmute(input) }
+}
+
+#[allow(unused)] // to please clippy when tests are not activated
+fn si128_to_u128(input: __m128i) -> u128 {
+    unsafe { transmute(input) }
+}
+
+#[inline(always)]
+fn si128arr_to_u8arr(input: [__m128i; 8]) -> [u8; BYTES_PER_BATCH] {
+    unsafe { transmute(input) }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    // Test vector for aes128, from the FIPS publication 197
+    const CIPHER_KEY: u128 = u128::from_be(0x000102030405060708090a0b0c0d0e0f);
+    const KEY_SCHEDULE: [u128; 11] = [
+        u128::from_be(0x000102030405060708090a0b0c0d0e0f),
+        u128::from_be(0xd6aa74fdd2af72fadaa678f1d6ab76fe),
+        u128::from_be(0xb692cf0b643dbdf1be9bc5006830b3fe),
+        u128::from_be(0xb6ff744ed2c2c9bf6c590cbf0469bf41),
+        u128::from_be(0x47f7f7bc95353e03f96c32bcfd058dfd),
+        u128::from_be(0x3caaa3e8a99f9deb50f3af57adf622aa),
+        u128::from_be(0x5e390f7df7a69296a7553dc10aa31f6b),
+        u128::from_be(0x14f9701ae35fe28c440adf4d4ea9c026),
+        u128::from_be(0x47438735a41c65b9e016baf4aebf7ad2),
+        u128::from_be(0x549932d1f08557681093ed9cbe2c974e),
+        u128::from_be(0x13111d7fe3944a17f307a78b4d2b30c5),
+    ];
+    const PLAINTEXT: u128 = u128::from_be(0x00112233445566778899aabbccddeeff);
+    const CIPHERTEXT: u128 = u128::from_be(0x69c4e0d86a7b0430d8cdb78070b4c55a);
+
+    #[test]
+    fn test_generate_key_schedule() {
+        // Checks that the round keys are correctly generated from the sample key from FIPS
+        let key = u128_to_si128(CIPHER_KEY);
+        let mut keys: [__m128i; 11] = [u128_to_si128(0); 11];
+        aes_128_key_expansion(key, &mut keys);
+        for (expected, actual) in KEY_SCHEDULE.iter().zip(keys.iter()) {
+            assert_eq!(*expected, si128_to_u128(*actual));
+        }
+    }
+
+    #[test]
+    fn test_encrypt_many_messages() {
+        // Checks that encrypting many plaintext at the same time gives the correct output.
+        let message = u128_to_si128(PLAINTEXT);
+        let key = u128_to_si128(CIPHER_KEY);
+        let mut keys: [__m128i; 11] = [u128_to_si128(0); 11];
+        aes_128_key_expansion(key, &mut keys);
+        let ciphertexts = aes_encrypt_many(
+            message, message, message, message, message, message, message, message, &keys,
+        );
+        for ct in &ciphertexts {
+            assert_eq!(CIPHERTEXT, si128_to_u128(*ct));
+        }
+    }
+}
--- a/concrete-csprng/src/generators/implem/aesni/generator.rs
+++ b/concrete-csprng/src/generators/implem/aesni/generator.rs
@@ -0,0 +1,110 @@
+use crate::generators::aes_ctr::{AesCtrGenerator, AesKey, ChildrenIterator};
+use crate::generators::implem::aesni::block_cipher::AesniBlockCipher;
+use crate::generators::{ByteCount, BytesPerChild, ChildrenCount, ForkError, RandomGenerator};
+use crate::seeders::Seed;
+
+/// A random number generator using the `aesni` instructions.
+pub struct AesniRandomGenerator(pub(super) AesCtrGenerator<AesniBlockCipher>);
+
+/// The children iterator used by [`AesniRandomGenerator`].
+///
+/// Outputs children generators one by one.
+pub struct AesniChildrenIterator(ChildrenIterator<AesniBlockCipher>);
+
+impl Iterator for AesniChildrenIterator {
+    type Item = AesniRandomGenerator;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0.next().map(AesniRandomGenerator)
+    }
+}
+
+impl RandomGenerator for AesniRandomGenerator {
+    type ChildrenIter = AesniChildrenIterator;
+    fn new(seed: Seed) -> Self {
+        AesniRandomGenerator(AesCtrGenerator::new(AesKey(seed.0), None, None))
+    }
+    fn remaining_bytes(&self) -> ByteCount {
+        self.0.remaining_bytes()
+    }
+    fn try_fork(
+        &mut self,
+        n_children: ChildrenCount,
+        n_bytes: BytesPerChild,
+    ) -> Result<Self::ChildrenIter, ForkError> {
+        self.0
+            .try_fork(n_children, n_bytes)
+            .map(AesniChildrenIterator)
+    }
+}
+
+impl Iterator for AesniRandomGenerator {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0.next()
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::generators::aes_ctr::aes_ctr_generic_test;
+    use crate::generators::implem::aesni::block_cipher::AesniBlockCipher;
+    use crate::generators::{generator_generic_test, AesniRandomGenerator};
+
+    #[test]
+    fn prop_fork_first_state_table_index() {
+        aes_ctr_generic_test::prop_fork_first_state_table_index::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_last_bound_table_index() {
+        aes_ctr_generic_test::prop_fork_last_bound_table_index::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_bound_table_index() {
+        aes_ctr_generic_test::prop_fork_parent_bound_table_index::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_state_table_index() {
+        aes_ctr_generic_test::prop_fork_parent_state_table_index::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork() {
+        aes_ctr_generic_test::prop_fork::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_children_remaining_bytes() {
+        aes_ctr_generic_test::prop_fork_children_remaining_bytes::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_remaining_bytes() {
+        aes_ctr_generic_test::prop_fork_parent_remaining_bytes::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn test_roughly_uniform() {
+        generator_generic_test::test_roughly_uniform::<AesniRandomGenerator>();
+    }
+
+    #[test]
+    fn test_generator_determinism() {
+        generator_generic_test::test_generator_determinism::<AesniRandomGenerator>();
+    }
+
+    #[test]
+    fn test_fork() {
+        generator_generic_test::test_fork_children::<AesniRandomGenerator>();
+    }
+
+    #[test]
+    #[should_panic(expected = "expected test panic")]
+    fn test_bounded_panic() {
+        generator_generic_test::test_bounded_none_should_panic::<AesniRandomGenerator>();
+    }
+}
--- a/concrete-csprng/src/generators/implem/aesni/mod.rs
+++ b/concrete-csprng/src/generators/implem/aesni/mod.rs
@@ -0,0 +1,15 @@
+//! A module implementing a random number generator, using the x86_64 `aesni` instructions.
+//!
+//! This module implements a cryptographically secure pseudorandom number generator
+//! (CS-PRNG), using a fast block cipher. The implementation is based on the
+//! [intel aesni white paper 323641-001 revision 3.0](https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf).
+
+mod block_cipher;
+
+mod generator;
+pub use generator::*;
+
+#[cfg(feature = "parallel")]
+mod parallel;
+#[cfg(feature = "parallel")]
+pub use parallel::*;
--- a/concrete-csprng/src/generators/implem/aesni/parallel.rs
+++ b/concrete-csprng/src/generators/implem/aesni/parallel.rs
@@ -0,0 +1,95 @@
+use super::*;
+use crate::generators::aes_ctr::{AesCtrGenerator, ParallelChildrenIterator};
+use crate::generators::implem::aesni::block_cipher::AesniBlockCipher;
+use crate::generators::{BytesPerChild, ChildrenCount, ForkError, ParallelRandomGenerator};
+use rayon::iter::plumbing::{Consumer, ProducerCallback, UnindexedConsumer};
+use rayon::prelude::*;
+
+/// The parallel children iterator used by [`AesniRandomGenerator`].
+///
+/// Outputs the children generators one by one.
+#[allow(clippy::type_complexity)]
+pub struct ParallelAesniChildrenIterator(
+    rayon::iter::Map<
+        ParallelChildrenIterator<AesniBlockCipher>,
+        fn(AesCtrGenerator<AesniBlockCipher>) -> AesniRandomGenerator,
+    >,
+);
+
+impl ParallelIterator for ParallelAesniChildrenIterator {
+    type Item = AesniRandomGenerator;
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.0.drive_unindexed(consumer)
+    }
+}
+
+impl IndexedParallelIterator for ParallelAesniChildrenIterator {
+    fn len(&self) -> usize {
+        self.0.len()
+    }
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        self.0.drive(consumer)
+    }
+    fn with_producer<CB: ProducerCallback<Self::Item>>(self, callback: CB) -> CB::Output {
+        self.0.with_producer(callback)
+    }
+}
+
+impl ParallelRandomGenerator for AesniRandomGenerator {
+    type ParChildrenIter = ParallelAesniChildrenIterator;
+
+    fn par_try_fork(
+        &mut self,
+        n_children: ChildrenCount,
+        n_bytes: BytesPerChild,
+    ) -> Result<Self::ParChildrenIter, ForkError> {
+        self.0
+            .par_try_fork(n_children, n_bytes)
+            .map(|iterator| ParallelAesniChildrenIterator(iterator.map(AesniRandomGenerator)))
+    }
+}
+
+#[cfg(test)]
+
+mod test {
+    use crate::generators::aes_ctr::aes_ctr_parallel_generic_tests;
+    use crate::generators::implem::aesni::block_cipher::AesniBlockCipher;
+
+    #[test]
+    fn prop_fork_first_state_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_first_state_table_index::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_last_bound_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_last_bound_table_index::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_bound_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_parent_bound_table_index::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_state_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_parent_state_table_index::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_ttt() {
+        aes_ctr_parallel_generic_tests::prop_fork::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_children_remaining_bytes() {
+        aes_ctr_parallel_generic_tests::prop_fork_children_remaining_bytes::<AesniBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_remaining_bytes() {
+        aes_ctr_parallel_generic_tests::prop_fork_parent_remaining_bytes::<AesniBlockCipher>();
+    }
+}
--- a/concrete-csprng/src/generators/implem/mod.rs
+++ b/concrete-csprng/src/generators/implem/mod.rs
@@ -0,0 +1,14 @@
+#[cfg(feature = "generator_x86_64_aesni")]
+mod aesni;
+#[cfg(feature = "generator_x86_64_aesni")]
+pub use aesni::*;
+
+#[cfg(feature = "generator_aarch64_aes")]
+mod aarch64;
+#[cfg(feature = "generator_aarch64_aes")]
+pub use aarch64::*;
+
+#[cfg(feature = "generator_fallback")]
+mod soft;
+#[cfg(feature = "generator_fallback")]
+pub use soft::*;
--- a/concrete-csprng/src/generators/implem/soft/block_cipher.rs
+++ b/concrete-csprng/src/generators/implem/soft/block_cipher.rs
@@ -0,0 +1,114 @@
+use crate::generators::aes_ctr::{
+    AesBlockCipher, AesIndex, AesKey, AES_CALLS_PER_BATCH, BYTES_PER_AES_CALL, BYTES_PER_BATCH,
+};
+use aes::cipher::generic_array::GenericArray;
+use aes::cipher::{BlockEncrypt, KeyInit};
+use aes::Aes128;
+
+#[derive(Clone)]
+pub struct SoftwareBlockCipher {
+    // Aes structure
+    aes: Aes128,
+}
+
+impl AesBlockCipher for SoftwareBlockCipher {
+    fn new(key: AesKey) -> SoftwareBlockCipher {
+        let key: [u8; BYTES_PER_AES_CALL] = key.0.to_ne_bytes();
+        let key = GenericArray::clone_from_slice(&key[..]);
+        let aes = Aes128::new(&key);
+        SoftwareBlockCipher { aes }
+    }
+
+    fn generate_batch(&mut self, AesIndex(aes_ctr): AesIndex) -> [u8; BYTES_PER_BATCH] {
+        aes_encrypt_many(
+            aes_ctr,
+            aes_ctr + 1,
+            aes_ctr + 2,
+            aes_ctr + 3,
+            aes_ctr + 4,
+            aes_ctr + 5,
+            aes_ctr + 6,
+            aes_ctr + 7,
+            &self.aes,
+        )
+    }
+}
+
+// Uses aes to encrypt many values at once. This allows a substantial speedup (around 30%)
+// compared to the naive approach.
+#[allow(clippy::too_many_arguments)]
+fn aes_encrypt_many(
+    message_1: u128,
+    message_2: u128,
+    message_3: u128,
+    message_4: u128,
+    message_5: u128,
+    message_6: u128,
+    message_7: u128,
+    message_8: u128,
+    cipher: &Aes128,
+) -> [u8; BYTES_PER_BATCH] {
+    let mut b1 = GenericArray::clone_from_slice(&message_1.to_ne_bytes()[..]);
+    let mut b2 = GenericArray::clone_from_slice(&message_2.to_ne_bytes()[..]);
+    let mut b3 = GenericArray::clone_from_slice(&message_3.to_ne_bytes()[..]);
+    let mut b4 = GenericArray::clone_from_slice(&message_4.to_ne_bytes()[..]);
+    let mut b5 = GenericArray::clone_from_slice(&message_5.to_ne_bytes()[..]);
+    let mut b6 = GenericArray::clone_from_slice(&message_6.to_ne_bytes()[..]);
+    let mut b7 = GenericArray::clone_from_slice(&message_7.to_ne_bytes()[..]);
+    let mut b8 = GenericArray::clone_from_slice(&message_8.to_ne_bytes()[..]);
+
+    cipher.encrypt_block(&mut b1);
+    cipher.encrypt_block(&mut b2);
+    cipher.encrypt_block(&mut b3);
+    cipher.encrypt_block(&mut b4);
+    cipher.encrypt_block(&mut b5);
+    cipher.encrypt_block(&mut b6);
+    cipher.encrypt_block(&mut b7);
+    cipher.encrypt_block(&mut b8);
+
+    let output_array: [[u8; BYTES_PER_AES_CALL]; AES_CALLS_PER_BATCH] = [
+        b1.into(),
+        b2.into(),
+        b3.into(),
+        b4.into(),
+        b5.into(),
+        b6.into(),
+        b7.into(),
+        b8.into(),
+    ];
+
+    unsafe { *{ output_array.as_ptr() as *const [u8; BYTES_PER_BATCH] } }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use std::convert::TryInto;
+
+    // Test vector for aes128, from the FIPS publication 197
+    const CIPHER_KEY: u128 = u128::from_be(0x000102030405060708090a0b0c0d0e0f);
+    const PLAINTEXT: u128 = u128::from_be(0x00112233445566778899aabbccddeeff);
+    const CIPHERTEXT: u128 = u128::from_be(0x69c4e0d86a7b0430d8cdb78070b4c55a);
+
+    #[test]
+    fn test_encrypt_many_messages() {
+        // Checks that encrypting many plaintext at the same time gives the correct output.
+        let key: [u8; BYTES_PER_AES_CALL] = CIPHER_KEY.to_ne_bytes();
+        let aes = Aes128::new(&GenericArray::from(key));
+        let ciphertexts = aes_encrypt_many(
+            PLAINTEXT, PLAINTEXT, PLAINTEXT, PLAINTEXT, PLAINTEXT, PLAINTEXT, PLAINTEXT, PLAINTEXT,
+            &aes,
+        );
+        let ciphertexts: [u8; BYTES_PER_BATCH] = ciphertexts[..].try_into().unwrap();
+        for i in 0..8 {
+            assert_eq!(
+                u128::from_ne_bytes(
+                    ciphertexts[BYTES_PER_AES_CALL * i..BYTES_PER_AES_CALL * (i + 1)]
+                        .try_into()
+                        .unwrap()
+                ),
+                CIPHERTEXT
+            );
+        }
+    }
+}
--- a/concrete-csprng/src/generators/implem/soft/generator.rs
+++ b/concrete-csprng/src/generators/implem/soft/generator.rs
@@ -0,0 +1,110 @@
+use crate::generators::aes_ctr::{AesCtrGenerator, AesKey, ChildrenIterator};
+use crate::generators::implem::soft::block_cipher::SoftwareBlockCipher;
+use crate::generators::{ByteCount, BytesPerChild, ChildrenCount, ForkError, RandomGenerator};
+use crate::seeders::Seed;
+
+/// A random number generator using a software implementation.
+pub struct SoftwareRandomGenerator(pub(super) AesCtrGenerator<SoftwareBlockCipher>);
+
+/// The children iterator used by [`SoftwareRandomGenerator`].
+///
+/// Outputs children generators one by one.
+pub struct SoftwareChildrenIterator(ChildrenIterator<SoftwareBlockCipher>);
+
+impl Iterator for SoftwareChildrenIterator {
+    type Item = SoftwareRandomGenerator;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0.next().map(SoftwareRandomGenerator)
+    }
+}
+
+impl RandomGenerator for SoftwareRandomGenerator {
+    type ChildrenIter = SoftwareChildrenIterator;
+    fn new(seed: Seed) -> Self {
+        SoftwareRandomGenerator(AesCtrGenerator::new(AesKey(seed.0), None, None))
+    }
+    fn remaining_bytes(&self) -> ByteCount {
+        self.0.remaining_bytes()
+    }
+    fn try_fork(
+        &mut self,
+        n_children: ChildrenCount,
+        n_bytes: BytesPerChild,
+    ) -> Result<Self::ChildrenIter, ForkError> {
+        self.0
+            .try_fork(n_children, n_bytes)
+            .map(SoftwareChildrenIterator)
+    }
+}
+
+impl Iterator for SoftwareRandomGenerator {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.0.next()
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::generators::aes_ctr::aes_ctr_generic_test;
+    use crate::generators::generator_generic_test;
+
+    #[test]
+    fn prop_fork_first_state_table_index() {
+        aes_ctr_generic_test::prop_fork_first_state_table_index::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_last_bound_table_index() {
+        aes_ctr_generic_test::prop_fork_last_bound_table_index::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_bound_table_index() {
+        aes_ctr_generic_test::prop_fork_parent_bound_table_index::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_state_table_index() {
+        aes_ctr_generic_test::prop_fork_parent_state_table_index::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork() {
+        aes_ctr_generic_test::prop_fork::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_children_remaining_bytes() {
+        aes_ctr_generic_test::prop_fork_children_remaining_bytes::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_remaining_bytes() {
+        aes_ctr_generic_test::prop_fork_parent_remaining_bytes::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn test_roughly_uniform() {
+        generator_generic_test::test_roughly_uniform::<SoftwareRandomGenerator>();
+    }
+
+    #[test]
+    fn test_fork() {
+        generator_generic_test::test_fork_children::<SoftwareRandomGenerator>();
+    }
+
+    #[test]
+    fn test_generator_determinism() {
+        generator_generic_test::test_generator_determinism::<SoftwareRandomGenerator>();
+    }
+
+    #[test]
+    #[should_panic(expected = "expected test panic")]
+    fn test_bounded_panic() {
+        generator_generic_test::test_bounded_none_should_panic::<SoftwareRandomGenerator>();
+    }
+}
--- a/concrete-csprng/src/generators/implem/soft/mod.rs
+++ b/concrete-csprng/src/generators/implem/soft/mod.rs
@@ -0,0 +1,11 @@
+//! A module using a software fallback implementation of random number generator.
+
+mod block_cipher;
+
+mod generator;
+pub use generator::*;
+
+#[cfg(feature = "parallel")]
+mod parallel;
+#[cfg(feature = "parallel")]
+pub use parallel::*;
--- a/concrete-csprng/src/generators/implem/soft/parallel.rs
+++ b/concrete-csprng/src/generators/implem/soft/parallel.rs
@@ -0,0 +1,94 @@
+use super::*;
+use crate::generators::aes_ctr::{AesCtrGenerator, ParallelChildrenIterator};
+use crate::generators::implem::soft::block_cipher::SoftwareBlockCipher;
+use crate::generators::{BytesPerChild, ChildrenCount, ForkError, ParallelRandomGenerator};
+use rayon::iter::plumbing::{Consumer, ProducerCallback, UnindexedConsumer};
+use rayon::prelude::*;
+
+/// The parallel children iterator used by [`SoftwareRandomGenerator`].
+///
+/// Outputs the children generators one by one.
+#[allow(clippy::type_complexity)]
+pub struct ParallelSoftwareChildrenIterator(
+    rayon::iter::Map<
+        ParallelChildrenIterator<SoftwareBlockCipher>,
+        fn(AesCtrGenerator<SoftwareBlockCipher>) -> SoftwareRandomGenerator,
+    >,
+);
+
+impl ParallelIterator for ParallelSoftwareChildrenIterator {
+    type Item = SoftwareRandomGenerator;
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.0.drive_unindexed(consumer)
+    }
+}
+
+impl IndexedParallelIterator for ParallelSoftwareChildrenIterator {
+    fn len(&self) -> usize {
+        self.0.len()
+    }
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        self.0.drive(consumer)
+    }
+    fn with_producer<CB: ProducerCallback<Self::Item>>(self, callback: CB) -> CB::Output {
+        self.0.with_producer(callback)
+    }
+}
+
+impl ParallelRandomGenerator for SoftwareRandomGenerator {
+    type ParChildrenIter = ParallelSoftwareChildrenIterator;
+
+    fn par_try_fork(
+        &mut self,
+        n_children: ChildrenCount,
+        n_bytes: BytesPerChild,
+    ) -> Result<Self::ParChildrenIter, ForkError> {
+        self.0
+            .par_try_fork(n_children, n_bytes)
+            .map(|iterator| ParallelSoftwareChildrenIterator(iterator.map(SoftwareRandomGenerator)))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::generators::aes_ctr::aes_ctr_parallel_generic_tests;
+
+    #[test]
+    fn prop_fork_first_state_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_first_state_table_index::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_last_bound_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_last_bound_table_index::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_bound_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_parent_bound_table_index::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_state_table_index() {
+        aes_ctr_parallel_generic_tests::prop_fork_parent_state_table_index::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork() {
+        aes_ctr_parallel_generic_tests::prop_fork::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_children_remaining_bytes() {
+        aes_ctr_parallel_generic_tests::prop_fork_children_remaining_bytes::<SoftwareBlockCipher>();
+    }
+
+    #[test]
+    fn prop_fork_parent_remaining_bytes() {
+        aes_ctr_parallel_generic_tests::prop_fork_parent_remaining_bytes::<SoftwareBlockCipher>();
+    }
+}
--- a/concrete-csprng/src/generators/mod.rs
+++ b/concrete-csprng/src/generators/mod.rs
@@ -0,0 +1,235 @@
+//! A module containing random generators objects.
+//!
+//! See [crate-level](`crate`) explanations.
+use crate::seeders::Seed;
+use std::error::Error;
+use std::fmt::{Display, Formatter};
+
+/// The number of children created when a generator is forked.
+#[derive(Debug, Copy, Clone)]
+pub struct ChildrenCount(pub usize);
+
+/// The number of bytes each child can generate, when a generator is forked.
+#[derive(Debug, Copy, Clone)]
+pub struct BytesPerChild(pub usize);
+
+/// A structure representing the number of bytes between two table indices.
+#[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq)]
+pub struct ByteCount(pub u128);
+
+/// An error occuring during a generator fork.
+#[derive(Debug)]
+pub enum ForkError {
+    ForkTooLarge,
+    ZeroChildrenCount,
+    ZeroBytesPerChild,
+}
+
+impl Display for ForkError {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ForkError::ForkTooLarge => {
+                write!(
+                    f,
+                    "The children generators would output bytes after the parent bound. "
+                )
+            }
+            ForkError::ZeroChildrenCount => {
+                write!(
+                    f,
+                    "The number of children in the fork must be greater than zero."
+                )
+            }
+            ForkError::ZeroBytesPerChild => {
+                write!(
+                    f,
+                    "The number of bytes per child must be greater than zero."
+                )
+            }
+        }
+    }
+}
+impl Error for ForkError {}
+
+/// A trait for cryptographically secure pseudo-random generators.
+///
+/// See the [crate-level](#crate) documentation for details.
+pub trait RandomGenerator: Iterator<Item = u8> {
+    /// The iterator over children generators, returned by `try_fork` in case of success.
+    type ChildrenIter: Iterator<Item = Self>;
+
+    /// Creates a new generator from a seed.
+    ///
+    /// This operation is usually costly to perform, as the aes round keys need to be generated from
+    /// the seed.
+    fn new(seed: Seed) -> Self;
+
+    /// Returns the number of bytes that can still be outputted by the generator before reaching its
+    /// bound.
+    ///
+    /// Note:
+    /// -----
+    ///
+    /// A fresh generator can generate 2¹³² bytes. Unfortunately, no rust integer type in is able
+    /// to encode such a large number. Consequently [`ByteCount`] uses the largest integer type
+    /// available to encode this value: the `u128` type. For this reason, this method does not
+    /// effectively return the number of remaining bytes, but instead
+    /// `min(2¹²⁸-1, remaining_bytes)`.
+    fn remaining_bytes(&self) -> ByteCount;
+
+    /// Returns the next byte of the stream, if the generator did not yet reach its bound.
+    fn next_byte(&mut self) -> Option<u8> {
+        self.next()
+    }
+
+    /// Tries to fork the generator into an iterator of `n_children` new generators, each able to
+    /// output `n_bytes` bytes.
+    ///
+    /// Note:
+    /// -----
+    ///
+    /// To be successful, the number of remaining bytes for the parent generator must be larger than
+    /// `n_children*n_bytes`.
+    fn try_fork(
+        &mut self,
+        n_children: ChildrenCount,
+        n_bytes: BytesPerChild,
+    ) -> Result<Self::ChildrenIter, ForkError>;
+}
+
+/// A trait extending [`RandomGenerator`] to the parallel iterators of `rayon`.
+#[cfg(feature = "parallel")]
+pub trait ParallelRandomGenerator: RandomGenerator + Send {
+    /// The iterator over children generators, returned by `par_try_fork` in case of success.
+    type ParChildrenIter: rayon::prelude::IndexedParallelIterator<Item = Self>;
+
+    /// Tries to fork the generator into a parallel iterator of `n_children` new generators, each
+    /// able to output `n_bytes` bytes.
+    ///
+    /// Note:
+    /// -----
+    ///
+    /// To be successful, the number of remaining bytes for the parent generator must be larger than
+    /// `n_children*n_bytes`.
+    fn par_try_fork(
+        &mut self,
+        n_children: ChildrenCount,
+        n_bytes: BytesPerChild,
+    ) -> Result<Self::ParChildrenIter, ForkError>;
+}
+
+mod aes_ctr;
+
+mod implem;
+pub use implem::*;
+
+#[cfg(test)]
+pub mod generator_generic_test {
+    #![allow(unused)] // to please clippy when tests are not activated
+    use super::*;
+    use rand::Rng;
+
+    const REPEATS: usize = 1_000;
+
+    fn any_seed() -> impl Iterator<Item = Seed> {
+        std::iter::repeat_with(|| Seed(rand::thread_rng().gen()))
+    }
+
+    fn some_children_count() -> impl Iterator<Item = ChildrenCount> {
+        std::iter::repeat_with(|| ChildrenCount(rand::thread_rng().gen::<usize>() % 16 + 1))
+    }
+
+    fn some_bytes_per_child() -> impl Iterator<Item = BytesPerChild> {
+        std::iter::repeat_with(|| BytesPerChild(rand::thread_rng().gen::<usize>() % 128 + 1))
+    }
+
+    /// Checks that the PRNG roughly generates uniform numbers.
+    ///
+    /// To do that, we perform an histogram of the occurences of each byte value, over a fixed
+    /// number of samples and check that the empirical probabilities of the bins are close to
+    /// the theoretical probabilities.
+    pub fn test_roughly_uniform<G: RandomGenerator>() {
+        // Number of bins to use for the histogram.
+        const N_BINS: usize = u8::MAX as usize + 1;
+        // Number of samples to use for the histogram.
+        let n_samples = 10_000_000_usize;
+        // Theoretical probability of a each bins.
+        let expected_prob: f64 = 1. / N_BINS as f64;
+        // Absolute error allowed on the empirical probabilities.
+        // This value was tuned to make the test pass on an arguably correct state of
+        // implementation. 10^-4 precision is arguably pretty fine for this rough test, but it would
+        // be interesting to improve this test.
+        let precision = 10f64.powi(-3);
+
+        for _ in 0..REPEATS {
+            // We instantiate a new generator.
+            let seed = any_seed().next().unwrap();
+            let mut generator = G::new(seed);
+            // We create a new histogram
+            let mut counts = [0usize; N_BINS];
+            // We fill the histogram.
+            for _ in 0..n_samples {
+                counts[generator.next_byte().unwrap() as usize] += 1;
+            }
+            // We check that the empirical probabilities are close enough to the theoretical one.
+            counts
+                .iter()
+                .map(|a| (*a as f64) / (n_samples as f64))
+                .for_each(|a| assert!((a - expected_prob).abs() < precision))
+        }
+    }
+
+    /// Checks that given a state and a key, the PRNG is determinist.
+    pub fn test_generator_determinism<G: RandomGenerator>() {
+        for _ in 0..REPEATS {
+            let seed = any_seed().next().unwrap();
+            let mut first_generator = G::new(seed);
+            let mut second_generator = G::new(seed);
+            for _ in 0..1024 {
+                assert_eq!(first_generator.next(), second_generator.next());
+            }
+        }
+    }
+
+    /// Checks that forks returns a bounded child, and that the proper number of bytes can be
+    /// generated.
+    pub fn test_fork_children<G: RandomGenerator>() {
+        for _ in 0..REPEATS {
+            let ((seed, n_children), n_bytes) = any_seed()
+                .zip(some_children_count())
+                .zip(some_bytes_per_child())
+                .next()
+                .unwrap();
+            let mut gen = G::new(seed);
+            let mut bounded = gen.try_fork(n_children, n_bytes).unwrap().next().unwrap();
+            assert_eq!(bounded.remaining_bytes(), ByteCount(n_bytes.0 as u128));
+            for _ in 0..n_bytes.0 {
+                bounded.next().unwrap();
+            }
+
+            // Assert we are at the bound
+            assert!(bounded.next().is_none());
+        }
+    }
+
+    /// Checks that a bounded prng returns none when exceeding the allowed number of bytes.
+    ///
+    /// To properly check for panic use `#[should_panic(expected = "expected test panic")]` as an
+    /// attribute on the test function.
+    pub fn test_bounded_none_should_panic<G: RandomGenerator>() {
+        let ((seed, n_children), n_bytes) = any_seed()
+            .zip(some_children_count())
+            .zip(some_bytes_per_child())
+            .next()
+            .unwrap();
+        let mut gen = G::new(seed);
+        let mut bounded = gen.try_fork(n_children, n_bytes).unwrap().next().unwrap();
+        assert_eq!(bounded.remaining_bytes(), ByteCount(n_bytes.0 as u128));
+        for _ in 0..n_bytes.0 {
+            assert!(bounded.next().is_some());
+        }
+
+        // One call too many, should panic
+        bounded.next().ok_or("expected test panic").unwrap();
+    }
+}
--- a/concrete-csprng/src/lib.rs
+++ b/concrete-csprng/src/lib.rs
@@ -0,0 +1,114 @@
+#![deny(rustdoc::broken_intra_doc_links)]
+//! Cryptographically secure pseudo random number generator.
+//!
+//! Welcome to the `concrete-csprng` documentation.
+//!
+//! This crate provides a fast cryptographically secure pseudo-random number generator, suited to
+//! work in a multithreaded setting.
+//!
+//! Random Generators
+//! =================
+//!
+//! The central abstraction of this crate is the [`RandomGenerator`](generators::RandomGenerator)
+//! trait, which is implemented by different types, each supporting a different platform. In
+//! essence, a type implementing [`RandomGenerator`](generators::RandomGenerator) is a type that
+//! outputs a new pseudo-random byte at each call to
+//! [`next_byte`](generators::RandomGenerator::next_byte). Such a generator `g` can be seen as
+//! enclosing a growing index into an imaginary array of pseudo-random bytes:
+//! ```ascii
+//!   0 1 2 3 4 5 6 7 8 9     M-1     │
+//!  ┏━┯━┯━┯━┯━┯━┯━┯━┯━┯━┯━━━┯━┓      │
+//!  ┃ │ │ │ │ │ │ │ │ │ │...│ ┃      │
+//!  ┗↥┷━┷━┷━┷━┷━┷━┷━┷━┷━┷━━━┷━┛      │
+//!   g                               │
+//!                                   │
+//!   g.next_byte()                   │
+//!                                   │
+//!   0 1 2 3 4 5 6 7 8 9     M-1     │
+//!  ┏━┯━┯━┯━┯━┯━┯━┯━┯━┯━┯━━━┯━┓      │
+//!  ┃╳│ │ │ │ │ │ │ │ │ │...│ ┃      │
+//!  ┗━┷↥┷━┷━┷━┷━┷━┷━┷━┷━┷━━━┷━┛      │
+//!     g                             │
+//!                                   │
+//!   g.next_byte()                   │  legend:
+//!                                   │  -------
+//!   0 1 2 3 4 5 6 7 8 9     M-1     │   ↥ : next byte to be outputted by g
+//!  ┏━┯━┯━┯━┯━┯━┯━┯━┯━┯━┯━━━┯━┓      │  │ │: byte not yet outputted by g
+//!  ┃╳│╳│ │ │ │ │ │ │ │ │...│ ┃      │  │╳│: byte already outputted by g
+//!  ┗━┷━┷↥┷━┷━┷━┷━┷━┷━┷━┷━━━┷━┛      │  
+//!       g                           🭭
+//! ```
+//!
+//! While being large, this imaginary array is still bounded to M = 2¹³² bytes. Consequently, a
+//! generator is always bounded to a maximal index. That is, there is always a max amount of
+//! elements of this array that can be outputted by the generator. By default, generators created
+//! via [`new`](generators::RandomGenerator::new) are always bounded to M-1.
+//!
+//! Tree partition of the pseudo-random stream
+//! ==========================================
+//!
+//! One particularity of this implementation is that you can use the
+//! [`try_fork`](generators::RandomGenerator::try_fork) method to create an arbitrary partition tree
+//! of a region of this array. Indeed, calling `try_fork(nc, nb)` outputs `nc` new generators, each
+//! able to output `nb` bytes. The `try_fork` method ensures that the states and bounds of the
+//! parent and children generators are set so as to prevent the same substream to be outputted
+//! twice:
+//! ```ascii
+//!   0 1 2 3 4 5 6 7 8 9     M   │   
+//!  ┏━┯━┯━┯━┯━┯━┯━┯━┯━┯━┯━━━┯━┓  │  
+//!  ┃P│P│P│P│P│P│P│P│P│P│...│P┃  │  
+//!  ┗↥┷━┷━┷━┷━┷━┷━┷━┷━┷━┷━━━┷━┛  │  
+//!   p                           │  
+//!                               │  
+//!   (a,b) = p.fork(2,4)         │  
+//!                               │
+//!   0 1 2 3 4 5 6 7 8 9     M   │
+//!  ┏━┯━┯━┯━┯━┯━┯━┯━┯━┯━┯━━━┯━┓  │
+//!  ┃A│A│A│A│B│B│B│B│P│P│...│P┃  │
+//!  ┗↥┷━┷━┷━┷↥┷━┷━┷━┷↥┷━┷━━━┷━┛  │
+//!   a       b       p           │
+//!                               │  legend:
+//!   (c,d) = b.fork(2, 1)        │  -------
+//!                               │   ↥ : next byte to be outputted by p
+//!   0 1 2 3 4 5 6 7 8 9     M   │  │P│: byte to be outputted by p
+//!  ┏━┯━┯━┯━┯━┯━┯━┯━┯━┯━┯━━━┯━┓  │  │╳│: byte already outputted
+//!  ┃A│A│A│A│C│D│B│B│P│P│...│P┃  │  
+//!  ┗↥┷━┷━┷━┷↥┷↥┷↥┷━┷↥┷━┷━━━┷━┛  │
+//!   a       c d b   p           🭭
+//! ```
+//!
+//! This makes it possible to consume the stream at different places. This is particularly useful in
+//! a multithreaded setting, in which we want to use the same generator from different independent
+//! threads:
+//!
+//! ```ascii
+//!   0 1 2 3 4 5 6 7 8 9     M   │   
+//!  ┏━┯━┯━┯━┯━┯━┯━┯━┯━┯━┯━━━┯━┓  │  
+//!  ┃A│A│A│A│C│D│B│B│P│P│...│P┃  │  
+//!  ┗↥┷━┷━┷━┷↥┷↥┷↥┷━┷↥┷━┷━━━┷━┛  │  
+//!   a       c d b   p           │  
+//!                               │  
+//!   a.next_byte()               │  
+//!                               │
+//!   0 1 2 3 4 5 6 7 8 9     M   │
+//!  ┏━┯━┯━┯━┯━┯━┯━┯━┯━┯━┯━━━┯━┓  │
+//!  ┃╳│A│A│A│C│D│B│B│P│P│...│P┃  │
+//!  ┗━┷↥┷━┷━┷↥┷↥┷↥┷━┷↥┷━┷━━━┷━┛  │
+//!     a     c d b   p           │
+//!                               │  legend:
+//!   b.next_byte()               │  -------
+//!                               │   ↥ : next byte to be outputted by p
+//!   0 1 2 3 4 5 6 7 8 9     M   │  │P│: byte to be outputted by p  
+//!  ┏━┯━┯━┯━┯━┯━┯━┯━┯━┯━┯━━━┯━┓  │  │╳│: byte already outputted
+//!  ┃╳│A│A│A│C│D│╳│B│P│P│...│P┃  │  
+//!  ┗━┷↥┷━┷━┷↥┷↥┷━┷↥┷↥┷━┷━━━┷━┛  │
+//!     a     c d   b p           🭭
+//! ```
+//!
+//! Implementation
+//! ==============
+//!
+//! The implementation is based on the AES blockcipher used in counter (CTR) mode, as presented
+//! in the ISO/IEC 18033-4 document.
+pub mod generators;
+pub mod seeders;
--- a/concrete-csprng/src/seeders/implem/apple_secure_enclave_seeder.rs
+++ b/concrete-csprng/src/seeders/implem/apple_secure_enclave_seeder.rs
@@ -0,0 +1,141 @@
+use crate::seeders::{Seed, Seeder};
+use libc;
+use std::cmp::Ordering;
+
+/// There is no `rseed` equivalent in the ARM specification until `ARMv8.5-A`.
+/// However it seems that these instructions are not exposed in `core::arch::aarch64`.
+///
+/// Our primary interest for supporting aarch64 targets is AppleSilicon support
+/// which for the M1 macs available, they are based on the `ARMv8.4-A` set.
+///
+/// So we fall back to using a function from Apple's API which
+/// uses the [Secure Enclave] to generate cryptographically secure random bytes.
+///
+/// [Secure Enclave]: https://support.apple.com/fr-fr/guide/security/sec59b0b31ff/web
+mod secure_enclave {
+    pub enum __SecRandom {}
+    pub type SecRandomRef = *const __SecRandom;
+    use libc::{c_int, c_void};
+
+    #[link(name = "Security", kind = "framework")]
+    extern "C" {
+        pub static kSecRandomDefault: SecRandomRef;
+
+        pub fn SecRandomCopyBytes(rnd: SecRandomRef, count: usize, bytes: *mut c_void) -> c_int;
+    }
+
+    pub fn generate_random_bytes(bytes: &mut [u8]) -> std::io::Result<()> {
+        // As per Apple's documentation:
+        // - https://developer.apple.com/documentation/security/randomization_services?language=objc
+        // - https://developer.apple.com/documentation/security/1399291-secrandomcopybytes?language=objc
+        //
+        // The `SecRandomCopyBytes` "Generate cryptographically secure random numbers"
+        unsafe {
+            let res = SecRandomCopyBytes(
+                kSecRandomDefault,
+                bytes.len(),
+                bytes.as_mut_ptr() as *mut c_void,
+            );
+            if res != 0 {
+                Err(std::io::Error::last_os_error())
+            } else {
+                Ok(())
+            }
+        }
+    }
+}
+
+/// A seeder which uses the `SecRandomCopyBytes` function from Apple's `Security` framework.
+///
+/// <https://developer.apple.com/documentation/security/1399291-secrandomcopybytes?language=objc>
+pub struct AppleSecureEnclaveSeeder;
+
+impl Seeder for AppleSecureEnclaveSeeder {
+    fn seed(&mut self) -> Seed {
+        // 16 bytes == 128 bits
+        let mut bytes = [0u8; 16];
+        secure_enclave::generate_random_bytes(&mut bytes)
+            .expect("Failure while using Apple secure enclave: {err:?}");
+
+        Seed(u128::from_le_bytes(bytes))
+    }
+
+    fn is_available() -> bool {
+        let os_version_sysctl_name = match std::ffi::CString::new("kern.osproductversion") {
+            Ok(c_str) => c_str,
+            _ => return false,
+        };
+
+        // Big enough buffer to get a version output as an ASCII string
+        const OUTPUT_BUFFER_SIZE: usize = 64;
+        let mut output_buffer_size = OUTPUT_BUFFER_SIZE;
+        let mut output_buffer = [0u8; OUTPUT_BUFFER_SIZE];
+        let res = unsafe {
+            libc::sysctlbyname(
+                os_version_sysctl_name.as_ptr() as *const _ as *const _,
+                &mut output_buffer as *mut _ as *mut _,
+                &mut output_buffer_size as *mut _ as *mut _,
+                std::ptr::null_mut(),
+                0,
+            )
+        };
+
+        if res != 0 {
+            return false;
+        }
+
+        let result_c_str =
+            match std::ffi::CStr::from_bytes_with_nul(&output_buffer[..output_buffer_size]) {
+                Ok(c_str) => c_str,
+                _ => return false,
+            };
+
+        let result_string = match result_c_str.to_str() {
+            Ok(str) => str,
+            _ => return false,
+        };
+
+        // Normally we get a major version and minor version
+        let split_string: Vec<&str> = result_string.split('.').collect();
+
+        let mut major = -1;
+        let mut minor = -1;
+
+        // Major part of the version string
+        if !split_string.is_empty() {
+            major = match split_string[0].parse() {
+                Ok(major_from_str) => major_from_str,
+                _ => return false,
+            };
+        }
+
+        // SecRandomCopyBytes is available starting with mac OS 10.7
+        // https://developer.apple.com/documentation/security/1399291-secrandomcopybytes?language=objc
+        // This match pattern is recommended by clippy, so we oblige here
+        match major.cmp(&10) {
+            Ordering::Greater => true,
+            Ordering::Equal => {
+                // Minor part of the version string
+                if split_string.len() >= 2 {
+                    minor = match split_string[1].parse() {
+                        Ok(minor_from_str) => minor_from_str,
+                        _ => return false,
+                    };
+                }
+                minor >= 7
+            }
+            Ordering::Less => false,
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::seeders::generic_tests::check_seeder_fixed_sequences_different;
+
+    #[test]
+    fn check_bounded_sequence_difference() {
+        check_seeder_fixed_sequences_different(|_| AppleSecureEnclaveSeeder);
+    }
+}
--- a/concrete-csprng/src/seeders/implem/mod.rs
+++ b/concrete-csprng/src/seeders/implem/mod.rs
@@ -0,0 +1,14 @@
+#[cfg(target_os = "macos")]
+mod apple_secure_enclave_seeder;
+#[cfg(target_os = "macos")]
+pub use apple_secure_enclave_seeder::AppleSecureEnclaveSeeder;
+
+#[cfg(feature = "seeder_x86_64_rdseed")]
+mod rdseed;
+#[cfg(feature = "seeder_x86_64_rdseed")]
+pub use rdseed::RdseedSeeder;
+
+#[cfg(feature = "seeder_unix")]
+mod unix;
+#[cfg(feature = "seeder_unix")]
+pub use unix::UnixSeeder;
--- a/concrete-csprng/src/seeders/implem/rdseed.rs
+++ b/concrete-csprng/src/seeders/implem/rdseed.rs
@@ -0,0 +1,51 @@
+use crate::seeders::{Seed, Seeder};
+
+/// A seeder which uses the `rdseed` x86_64 instruction.
+///
+/// The `rdseed` instruction allows to deliver seeds from a hardware source of entropy see
+/// <https://www.felixcloutier.com/x86/rdseed> .
+pub struct RdseedSeeder;
+
+impl Seeder for RdseedSeeder {
+    fn seed(&mut self) -> Seed {
+        Seed(unsafe { rdseed_random_m128() })
+    }
+
+    fn is_available() -> bool {
+        is_x86_feature_detected!("rdseed")
+    }
+}
+
+// Generates a random 128 bits value from rdseed
+#[target_feature(enable = "rdseed")]
+unsafe fn rdseed_random_m128() -> u128 {
+    let mut rand1: u64 = 0;
+    let mut rand2: u64 = 0;
+    let mut output_bytes = [0u8; 16];
+    unsafe {
+        loop {
+            if core::arch::x86_64::_rdseed64_step(&mut rand1) == 1 {
+                break;
+            }
+        }
+        loop {
+            if core::arch::x86_64::_rdseed64_step(&mut rand2) == 1 {
+                break;
+            }
+        }
+    }
+    output_bytes[0..8].copy_from_slice(&rand1.to_ne_bytes());
+    output_bytes[8..16].copy_from_slice(&rand2.to_ne_bytes());
+    u128::from_ne_bytes(output_bytes)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::seeders::generic_tests::check_seeder_fixed_sequences_different;
+
+    #[test]
+    fn check_bounded_sequence_difference() {
+        check_seeder_fixed_sequences_different(|_| RdseedSeeder);
+    }
+}
--- a/concrete-csprng/src/seeders/implem/unix.rs
+++ b/concrete-csprng/src/seeders/implem/unix.rs
@@ -0,0 +1,72 @@
+use crate::seeders::{Seed, Seeder};
+use std::fs::File;
+use std::io::Read;
+
+/// A seeder which uses the `/dev/random` source on unix-like systems.
+pub struct UnixSeeder {
+    counter: u128,
+    secret: u128,
+    file: File,
+}
+
+impl UnixSeeder {
+    /// Creates a new seeder from a user defined secret.
+    ///
+    /// Important:
+    /// ----------
+    ///
+    /// This secret is used to ensure the quality of the seed in scenarios where `/dev/random` may
+    /// be compromised.
+    ///
+    /// The attack hypotheses are as follow:
+    /// - `/dev/random` output can be predicted by a process running on the machine by just
+    ///   observing various states of the machine
+    /// - The attacker cannot read data from the process where `concrete-csprng` is running
+    ///
+    /// Using a secret in `concrete-csprng` allows to generate values that the attacker cannot
+    /// predict, making this seeder secure on systems were `/dev/random` outputs can be
+    /// predicted.
+    pub fn new(secret: u128) -> UnixSeeder {
+        let file = std::fs::File::open("/dev/random").expect("Failed to open /dev/random .");
+        let counter = std::time::UNIX_EPOCH
+            .elapsed()
+            .expect("Failed to initialize unix seeder.")
+            .as_nanos();
+        UnixSeeder {
+            secret,
+            counter,
+            file,
+        }
+    }
+}
+
+impl Seeder for UnixSeeder {
+    fn seed(&mut self) -> Seed {
+        let output = self.secret ^ self.counter ^ dev_random(&mut self.file);
+        self.counter = self.counter.wrapping_add(1);
+        Seed(output)
+    }
+
+    fn is_available() -> bool {
+        cfg!(target_family = "unix")
+    }
+}
+
+fn dev_random(random: &mut File) -> u128 {
+    let mut buf = [0u8; 16];
+    random
+        .read_exact(&mut buf[..])
+        .expect("Failed to read from /dev/random .");
+    u128::from_ne_bytes(buf)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::seeders::generic_tests::check_seeder_fixed_sequences_different;
+
+    #[test]
+    fn check_bounded_sequence_difference() {
+        check_seeder_fixed_sequences_different(UnixSeeder::new);
+    }
+}
--- a/concrete-csprng/src/seeders/mod.rs
+++ b/concrete-csprng/src/seeders/mod.rs
@@ -0,0 +1,47 @@
+//! A module containing seeders objects.
+//!
+//! When initializing a generator, one needs to provide a [`Seed`], which is then used as key to the
+//! AES blockcipher. As a consequence, the quality of the outputs of the generator is directly
+//! conditioned by the quality of this seed. This module proposes different mechanisms to deliver
+//! seeds that can accomodate varying scenarios.
+
+/// A seed value, used to initialize a generator.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct Seed(pub u128);
+
+/// A trait representing a seeding strategy.
+pub trait Seeder {
+    /// Generates a new seed.
+    fn seed(&mut self) -> Seed;
+
+    /// Check whether the seeder can be used on the current machine. This function may check if some
+    /// required CPU features are available or if some OS features are availble for example.
+    fn is_available() -> bool
+    where
+        Self: Sized;
+}
+
+mod implem;
+pub use implem::*;
+
+#[cfg(test)]
+mod generic_tests {
+    use crate::seeders::Seeder;
+
+    /// Naively verifies that two fixed-size sequences generated by repeatedly calling the seeder
+    /// are different.
+    #[allow(unused)] // to please clippy when tests are not activated
+    pub fn check_seeder_fixed_sequences_different<S: Seeder, F: Fn(u128) -> S>(
+        construct_seeder: F,
+    ) {
+        const SEQUENCE_SIZE: usize = 500;
+        const REPEATS: usize = 10_000;
+        for i in 0..REPEATS {
+            let mut seeder = construct_seeder(i as u128);
+            let orig_seed = seeder.seed();
+            for _ in 0..SEQUENCE_SIZE {
+                assert_ne!(seeder.seed(), orig_seed);
+            }
+        }
+    }
+}
--- a/scripts/dieharder_test.sh
+++ b/scripts/dieharder_test.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+
+# dieharder does not support running a subset of its tests, so we'll check which ones are not good
+# and ignore the output from those tests in the final log
+
+set -e
+
+DIEHARDER_RUN_LOG_FILE="dieharder_run.log"
+
+bad_tests="$(dieharder -l | \
+# select lines with the -d
+grep -w '\-d' | \
+# forget about the good tests
+grep -v -i 'good' | \
+# get the test id
+cut -d ' ' -f 4 | \
+# nice formatting
+xargs)"
+
+
+bad_test_filter=""
+for bad_test in ${bad_tests}; do
+    bad_test_filter="${bad_test_filter:+${bad_test_filter}|}$(dieharder -d "${bad_test}" -t 1 -p 1 -D test_name | xargs)"
+done
+
+echo "The following tests will be ignored as they are marked as either 'suspect' or 'do not use': "
+echo ""
+echo "${bad_test_filter}"
+echo ""
+
+# by default we may have no pv just forward the input
+pv="cat"
+if which pv > /dev/null; then
+    pv="pv -t -a -b"
+fi
+
+rm -f "${DIEHARDER_RUN_LOG_FILE}"
+
+# ignore potential errors and parse the log afterwards
+set +e
+
+# We are writing in both cases
+# shellcheck disable=SC2094
+./target/release/examples/generate 2>"${DIEHARDER_RUN_LOG_FILE}" | \
+$pv | \
+# -a: all tests
+# -g 200: get random bytes from input
+# -Y 1: disambiguate results, i.e. if a weak result appear check if it's a random failure/weakness
+# -k 2: better maths formulas to determine some test statistics
+dieharder -a -g 200 -Y 1 -k 2 | \
+tee -a "${DIEHARDER_RUN_LOG_FILE}"
+set -e
+
+printf "\n\n"
+
+cat "${DIEHARDER_RUN_LOG_FILE}"
+
+if ! grep -q -i "failed" < "${DIEHARDER_RUN_LOG_FILE}"; then
+    echo "All tests passed!"
+    exit 0
+fi
+
+printf "\n\n"
+
+failed_tests="$(grep -i "failed" < "${DIEHARDER_RUN_LOG_FILE}")"
+true_failed_test="$(grep -i "failed" < "${DIEHARDER_RUN_LOG_FILE}" | { grep -v -E "${bad_test_filter}" || true; } | sed -z '$ s/\n$//')"
+
+if [[ "${true_failed_test}" == "" ]]; then
+    echo "There were test failures, but the tests were either marked as 'suspect' or 'do not use'"
+    echo "${failed_tests}"
+    exit 0
+fi
+
+echo "The following tests failed:"
+echo "${true_failed_test}"
+
+exit 1
--- a/scripts/integer-tests.sh
+++ b/scripts/integer-tests.sh
@@ -142,7 +142,7 @@ and not test(/.*default_add_sequence_multi_thread_param_message_3_carry_3_ks_pbs
    fi

    num_cpu_threads="$(${nproc_bin})"
-    num_threads=$((num_cpu_threads * 2 / 3))
+    num_threads=$((num_cpu_threads * 1 / 2))
    cargo "${RUST_TOOLCHAIN}" nextest run \
        --tests \
        --cargo-profile "${cargo_profile}" \
--- a/tfhe/Cargo.toml
+++ b/tfhe/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tfhe"
-version = "0.3.0"
+version = "0.4.0"
 edition = "2021"
 readme = "../README.md"
 keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
@@ -17,7 +17,7 @@ exclude = [
    "/js_on_wasm_tests/",
    "/web_wasm_parallel_tests/",
 ]
-rust-version = "1.67"
+rust-version = "1.72"

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

@@ -29,24 +29,27 @@ lazy_static = { version = "1.4.0" }
 criterion = "0.4.0"
 doc-comment = "0.3.3"
 serde_json = "1.0.94"
-clap = { version = "4.2.7", features = ["derive"] }
+# clap has to be pinned as its minimum supported rust version
+# changes often between minor releases, which breaks our CI
+clap = { version = "=4.2.7", features = ["derive"] }
 # Used in user documentation
 bincode = "1.3.3"
 fs2 = { version = "0.4.3" }
 itertools = "0.10.5"
-num_cpus = "1.15"
 # For erf and normality test
 libm = "0.2.6"
+# Begin regex-engine deps
 test-case = "3.1.0"
 combine = "4.6.6"
 env_logger = "0.10.0"
 log = "0.4.19"
+# End regex-engine deps

 [build-dependencies]
 cbindgen = { version = "0.24.3", optional = true }

 [dependencies]
-concrete-csprng = { version = "0.3.0", features = [
+concrete-csprng = { version = "0.4.0", path= "../concrete-csprng", features = [
    "generator_fallback",
    "parallel",
 ] }
@@ -54,12 +57,12 @@ lazy_static = { version = "1.4.0", optional = true }
 serde = { version = "1.0", features = ["derive"] }
 rayon = { version = "1.5.0" }
 bincode = { version = "1.3.3", optional = true }
-concrete-fft = { version = "0.2.1", features = ["serde", "fft128"] }
-pulp = "0.11"
+concrete-fft = { version = "0.3.0", features = ["serde", "fft128"] }
+pulp = "0.13"
+concrete-ntt = "0.1.0"
 aligned-vec = { version = "0.5", features = ["serde"] }
 dyn-stack = { version = "0.9" }
-once_cell = "1.13"
-paste = "1.0.7"
+paste = { version = "1.0.7", optional = true }
 fs2 = { version = "0.4.3", optional = true }
 # While we wait for repeat_n in rust standard library
 itertools = "0.10.5"
@@ -76,17 +79,18 @@ getrandom = { version = "0.2.8", optional = true }
 bytemuck = "1.13.1"

 [features]
-boolean = []
-shortint = []
-integer = ["shortint"]
-internal-keycache = ["lazy_static", "fs2", "bincode"]
+# paste is used by the HL API
+boolean = ["dep:paste"]
+shortint = ["dep:paste"]
+integer = ["shortint", "dep:paste"]
+internal-keycache = ["lazy_static", "dep:fs2", "bincode", "dep:paste"]

 # Experimental section
 experimental = []
 experimental-force_fft_algo_dif4 = []
 # End experimental section

-__c_api = ["cbindgen", "bincode"]
+__c_api = ["cbindgen", "bincode", "dep:paste"]
 boolean-c-api = ["boolean", "__c_api"]
 shortint-c-api = ["shortint", "__c_api"]
 high-level-c-api = ["boolean-c-api", "shortint-c-api", "integer", "__c_api"]
@@ -118,6 +122,7 @@ generator_aarch64_aes = ["concrete-csprng/generator_aarch64_aes"]

 # Private features
 __profiling = []
+__coverage = []

 seeder_unix = ["concrete-csprng/seeder_unix"]
 seeder_x86_64_rdseed = ["concrete-csprng/seeder_x86_64_rdseed"]
@@ -160,6 +165,11 @@ name = "pbs128-bench"
 path = "benches/core_crypto/pbs128_bench.rs"
 harness = false

+[[bench]]
+name = "pbs-crt-bench"
+path = "benches/core_crypto/pbs_crt_bench.rs"
+harness = false
+
 [[bench]]
 name = "boolean-bench"
 path = "benches/boolean/bench.rs"
@@ -222,6 +232,11 @@ name = "micro_bench_and"
 path = "examples/utilities/micro_bench_and.rs"
 required-features = ["boolean"]

+[[example]]
+name = "write_params_to_file"
+path = "examples/utilities/params_to_file.rs"
+required-features = ["boolean", "shortint", "internal-keycache"]
+
 # Real use-case examples

 [[example]]
--- a/tfhe/benches/boolean/bench.rs
+++ b/tfhe/benches/boolean/bench.rs
@@ -4,14 +4,21 @@ use crate::utilities::{write_to_json, CryptoParametersRecord, OperatorType};

 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use tfhe::boolean::client_key::ClientKey;
-use tfhe::boolean::parameters::{BooleanParameters, DEFAULT_PARAMETERS, TFHE_LIB_PARAMETERS};
+use tfhe::boolean::parameters::{
+    BooleanParameters, DEFAULT_PARAMETERS, DEFAULT_PARAMETERS_KS_PBS,
+    PARAMETERS_ERROR_PROB_2_POW_MINUS_165, PARAMETERS_ERROR_PROB_2_POW_MINUS_165_KS_PBS,
+    TFHE_LIB_PARAMETERS,
+};
 use tfhe::boolean::prelude::BinaryBooleanGates;
 use tfhe::boolean::server_key::ServerKey;

 criterion_group!(
    gates_benches,
    bench_default_parameters,
-    bench_tfhe_lib_parameters
+    bench_default_parameters_ks_pbs,
+    bench_low_prob_parameters,
+    bench_low_prob_parameters_ks_pbs,
+    bench_tfhe_lib_parameters,
 );

 criterion_main!(gates_benches);
@@ -79,6 +86,26 @@ fn bench_default_parameters(c: &mut Criterion) {
    benchs(c, DEFAULT_PARAMETERS, "DEFAULT_PARAMETERS");
 }

-fn bench_tfhe_lib_parameters(c: &mut Criterion) {
-    benchs(c, TFHE_LIB_PARAMETERS, "TFHE_LIB_PARAMETERS");
+fn bench_default_parameters_ks_pbs(c: &mut Criterion) {
+    benchs(c, DEFAULT_PARAMETERS_KS_PBS, "DEFAULT_PARAMETERS_KS_PBS");
+}
+
+fn bench_low_prob_parameters(c: &mut Criterion) {
+    benchs(
+        c,
+        PARAMETERS_ERROR_PROB_2_POW_MINUS_165,
+        "PARAMETERS_ERROR_PROB_2_POW_MINUS_165",
+    );
+}
+
+fn bench_low_prob_parameters_ks_pbs(c: &mut Criterion) {
+    benchs(
+        c,
+        PARAMETERS_ERROR_PROB_2_POW_MINUS_165_KS_PBS,
+        "PARAMETERS_ERROR_PROB_2_POW_MINUS_165_KS_PBS",
+    );
+}
+
+fn bench_tfhe_lib_parameters(c: &mut Criterion) {
+    benchs(c, TFHE_LIB_PARAMETERS, " TFHE_LIB_PARAMETERS");
 }
--- a/tfhe/benches/core_crypto/pbs_bench.rs
+++ b/tfhe/benches/core_crypto/pbs_bench.rs
@@ -5,13 +5,15 @@ use rayon::prelude::*;

 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use serde::Serialize;
-use tfhe::boolean::parameters::{BooleanParameters, DEFAULT_PARAMETERS, TFHE_LIB_PARAMETERS};
+use tfhe::boolean::parameters::{
+    BooleanParameters, DEFAULT_PARAMETERS, PARAMETERS_ERROR_PROB_2_POW_MINUS_165,
+};
 use tfhe::core_crypto::prelude::*;
-use tfhe::shortint::keycache::NamedParam;
+use tfhe::keycache::NamedParam;
 use tfhe::shortint::parameters::*;
 use tfhe::shortint::ClassicPBSParameters;

-const SHORTINT_BENCH_PARAMS: [ClassicPBSParameters; 15] = [
+const SHORTINT_BENCH_PARAMS: [ClassicPBSParameters; 19] = [
    PARAM_MESSAGE_1_CARRY_0_KS_PBS,
    PARAM_MESSAGE_1_CARRY_1_KS_PBS,
    PARAM_MESSAGE_2_CARRY_0_KS_PBS,
@@ -27,11 +29,18 @@ const SHORTINT_BENCH_PARAMS: [ClassicPBSParameters; 15] = [
    PARAM_MESSAGE_6_CARRY_0_KS_PBS,
    PARAM_MESSAGE_7_CARRY_0_KS_PBS,
    PARAM_MESSAGE_8_CARRY_0_KS_PBS,
+    PARAM_MESSAGE_1_CARRY_1_PBS_KS,
+    PARAM_MESSAGE_2_CARRY_2_PBS_KS,
+    PARAM_MESSAGE_3_CARRY_3_PBS_KS,
+    PARAM_MESSAGE_4_CARRY_4_PBS_KS,
 ];

 const BOOLEAN_BENCH_PARAMS: [(&str, BooleanParameters); 2] = [
    ("BOOLEAN_DEFAULT_PARAMS", DEFAULT_PARAMETERS),
-    ("BOOLEAN_TFHE_LIB_PARAMS", TFHE_LIB_PARAMETERS),
+    (
+        "BOOLEAN_TFHE_LIB_PARAMS",
+        PARAMETERS_ERROR_PROB_2_POW_MINUS_165,
+    ),
 ];

 criterion_group!(
@@ -57,8 +66,8 @@ criterion_group!(

 criterion_main!(pbs_group, multi_bit_pbs_group, pbs_throughput_group);

-fn benchmark_parameters<Scalar: UnsignedInteger>(
-) -> Vec<(&'static str, CryptoParametersRecord<Scalar>)> {
+fn benchmark_parameters<Scalar: UnsignedInteger>() -> Vec<(String, CryptoParametersRecord<Scalar>)>
+{
    if Scalar::BITS == 64 {
        SHORTINT_BENCH_PARAMS
            .iter()
@@ -74,7 +83,7 @@ fn benchmark_parameters<Scalar: UnsignedInteger>(
    } else if Scalar::BITS == 32 {
        BOOLEAN_BENCH_PARAMS
            .iter()
-            .map(|(name, params)| (*name, params.to_owned().into()))
+            .map(|(name, params)| (name.to_string(), params.to_owned().into()))
            .collect()
    } else {
        vec![]
@@ -82,7 +91,7 @@ fn benchmark_parameters<Scalar: UnsignedInteger>(
 }

 fn throughput_benchmark_parameters<Scalar: UnsignedInteger>(
-) -> Vec<(&'static str, CryptoParametersRecord<Scalar>)> {
+) -> Vec<(String, CryptoParametersRecord<Scalar>)> {
    if Scalar::BITS == 64 {
        vec![
            PARAM_MESSAGE_1_CARRY_1_KS_PBS,
@@ -102,18 +111,15 @@ fn throughput_benchmark_parameters<Scalar: UnsignedInteger>(
    } else if Scalar::BITS == 32 {
        BOOLEAN_BENCH_PARAMS
            .iter()
-            .map(|(name, params)| (*name, params.to_owned().into()))
+            .map(|(name, params)| (name.to_string(), params.to_owned().into()))
            .collect()
    } else {
        vec![]
    }
 }

-fn multi_bit_benchmark_parameters<Scalar: UnsignedInteger + Default>() -> Vec<(
-    &'static str,
-    CryptoParametersRecord<Scalar>,
-    LweBskGroupingFactor,
-)> {
+fn multi_bit_benchmark_parameters<Scalar: UnsignedInteger + Default>(
+) -> Vec<(String, CryptoParametersRecord<Scalar>, LweBskGroupingFactor)> {
    if Scalar::BITS == 64 {
        vec![
            PARAM_MULTI_BIT_MESSAGE_1_CARRY_1_GROUP_2_KS_PBS,
@@ -234,7 +240,7 @@ fn mem_optimized_pbs<Scalar: UnsignedTorus + CastInto<usize> + Serialize>(c: &mu
        write_to_json(
            &id,
            *params,
-            *name,
+            name,
            "pbs",
            &OperatorType::Atomic,
            bit_size,
@@ -323,7 +329,7 @@ fn multi_bit_pbs<
        write_to_json(
            &id,
            *params,
-            *name,
+            name,
            "pbs",
            &OperatorType::Atomic,
            bit_size,
@@ -412,7 +418,7 @@ fn multi_bit_deterministic_pbs<
        write_to_json(
            &id,
            *params,
-            *name,
+            name,
            "pbs",
            &OperatorType::Atomic,
            bit_size,
@@ -469,8 +475,6 @@ fn pbs_throughput<Scalar: UnsignedTorus + CastInto<usize> + Sync + Send + Serial
            params.ciphertext_modulus.unwrap(),
        );

-        let lwe_vec = lwe_vec;
-
        let fft = Fft::new(params.polynomial_size.unwrap());
        let fft = fft.as_view();

@@ -534,7 +538,7 @@ fn pbs_throughput<Scalar: UnsignedTorus + CastInto<usize> + Sync + Send + Serial
            write_to_json(
                &id,
                *params,
-                *name,
+                name,
                "pbs",
                &OperatorType::Atomic,
                bit_size,
--- a/tfhe/benches/core_crypto/pbs_crt_bench.rs
+++ b/tfhe/benches/core_crypto/pbs_crt_bench.rs
@@ -0,0 +1,116 @@
+use criterion::{criterion_group, criterion_main, Criterion};
+use dyn_stack::PodStack;
+
+fn criterion_bench(c: &mut Criterion) {
+    {
+        use tfhe::core_crypto::fft_impl::crt_ntt::crypto::bootstrap::{
+            bootstrap_scratch, CrtNttLweBootstrapKey,
+        };
+        use tfhe::core_crypto::fft_impl::crt_ntt::math::ntt::CrtNtt64;
+        use tfhe::core_crypto::prelude::*;
+        type Scalar = u64;
+
+        let small_lwe_dimension = LweDimension(742);
+        let glwe_dimension = GlweDimension(1);
+        let polynomial_size = PolynomialSize(2048);
+        let lwe_modular_std_dev = StandardDev(0.000007069849454709433);
+        let pbs_base_log = DecompositionBaseLog(23);
+        let pbs_level = DecompositionLevelCount(1);
+
+        // Request the best seeder possible, starting with hardware entropy sources and falling back
+        // to /dev/random on Unix systems if enabled via cargo features
+        let mut boxed_seeder = new_seeder();
+        // Get a mutable reference to the seeder as a trait object from the Box returned by
+        // new_seeder
+        let seeder = boxed_seeder.as_mut();
+
+        // Create a generator which uses a CSPRNG to generate secret keys
+        let mut secret_generator =
+            SecretRandomGenerator::<ActivatedRandomGenerator>::new(seeder.seed());
+
+        // Create a generator which uses two CSPRNGs to generate public masks and secret encryption
+        // noise
+        let mut encryption_generator =
+            EncryptionRandomGenerator::<ActivatedRandomGenerator>::new(seeder.seed(), seeder);
+
+        // Generate an LweSecretKey with binary coefficients
+        let small_lwe_sk =
+            LweSecretKey::generate_new_binary(small_lwe_dimension, &mut secret_generator);
+
+        // Generate a GlweSecretKey with binary coefficients
+        let glwe_sk = GlweSecretKey::<Vec<Scalar>>::generate_new_binary(
+            glwe_dimension,
+            polynomial_size,
+            &mut secret_generator,
+        );
+
+        // Create a copy of the GlweSecretKey re-interpreted as an LweSecretKey
+        let big_lwe_sk = glwe_sk.into_lwe_secret_key();
+
+        // Create the empty bootstrapping key in the NTT domain
+        let ntt_bsk = CrtNttLweBootstrapKey::new(
+            small_lwe_dimension,
+            polynomial_size,
+            glwe_dimension.to_glwe_size(),
+            pbs_base_log,
+            pbs_level,
+        );
+
+        let fft = CrtNtt64::new(polynomial_size);
+        let fft = fft.as_view();
+
+        // We don't need the standard bootstrapping key anymore
+
+        // Our 4 bits message space
+        let message_modulus: Scalar = 1 << 4;
+
+        // Our input message
+        let input_message: Scalar = 3;
+
+        // Delta used to encode 4 bits of message + a bit of padding on Scalar
+        let delta: Scalar = (1 << (Scalar::BITS - 1)) / message_modulus;
+
+        // Apply our encoding
+        let plaintext = Plaintext(input_message * delta);
+
+        // Allocate a new LweCiphertext and encrypt our plaintext
+        let lwe_ciphertext_in: LweCiphertextOwned<Scalar> = allocate_and_encrypt_new_lwe_ciphertext(
+            &small_lwe_sk,
+            plaintext,
+            lwe_modular_std_dev,
+            &mut encryption_generator,
+        );
+
+        let accumulator: GlweCiphertextOwned<Scalar> =
+            GlweCiphertextOwned::new(Scalar::ONE, glwe_dimension.to_glwe_size(), polynomial_size);
+
+        // Allocate the LweCiphertext to store the result of the PBS
+        let mut pbs_multiplication_ct: LweCiphertext<Vec<Scalar>> =
+            LweCiphertext::new(0, big_lwe_sk.lwe_dimension().to_lwe_size());
+
+        let mut buf = vec![
+            0u8;
+            bootstrap_scratch::<u32, 5, Scalar>(
+                ntt_bsk.glwe_size(),
+                ntt_bsk.polynomial_size(),
+            )
+            .unwrap()
+            .unaligned_bytes_required()
+        ];
+
+        c.bench_function("pbs-crt-u64-u32x5", |b| {
+            b.iter(|| {
+                ntt_bsk.bootstrap(
+                    &mut pbs_multiplication_ct,
+                    &lwe_ciphertext_in,
+                    &accumulator,
+                    fft,
+                    PodStack::new(&mut buf),
+                )
+            });
+        });
+    }
+}
+
+criterion_group!(benches, criterion_bench);
+criterion_main!(benches);
--- a/tfhe/benches/integer/bench.rs
+++ b/tfhe/benches/integer/bench.rs
@@ -13,7 +13,9 @@ use rand::Rng;
 use std::vec::IntoIter;
 use tfhe::integer::keycache::KEY_CACHE;
 use tfhe::integer::{RadixCiphertext, ServerKey};
-use tfhe::shortint::keycache::NamedParam;
+use tfhe::keycache::NamedParam;
+
+use tfhe::integer::U256;

 #[allow(unused_imports)]
 use tfhe::shortint::parameters::{
@@ -21,6 +23,20 @@ use tfhe::shortint::parameters::{
    PARAM_MESSAGE_4_CARRY_4_KS_PBS, PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS,
 };

+/// The type used to hold scalar values
+/// It must be as big as the largest bit size tested
+type ScalarType = U256;
+
+const FAST_BENCH_BIT_SIZES: [usize; 1] = [32];
+const BENCH_BIT_SIZES: [usize; 7] = [8, 16, 32, 40, 64, 128, 256];
+
+fn gen_random_u256(rng: &mut ThreadRng) -> U256 {
+    let clearlow = rng.gen::<u128>();
+    let clearhigh = rng.gen::<u128>();
+
+    tfhe::integer::U256::from((clearlow, clearhigh))
+}
+
 /// An iterator that yields a succession of combinations
 /// of parameters and a num_block to achieve a certain bit_size ciphertext
 /// in radix decomposition
@@ -41,15 +57,14 @@ impl Default for ParamsAndNumBlocksIter {
            Err(_) => false,
        };

+        let bit_sizes = if is_fast_bench {
+            FAST_BENCH_BIT_SIZES.to_vec()
+        } else {
+            BENCH_BIT_SIZES.to_vec()
+        };
+
        if is_multi_bit {
            let params = vec![PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS.into()];
-
-            let bit_sizes = if is_fast_bench {
-                vec![32]
-            } else {
-                vec![8, 16, 32, 40, 64]
-            };
-
            let params_and_bit_sizes = iproduct!(params, bit_sizes);
            Self {
                params_and_bit_sizes,
@@ -62,13 +77,6 @@ impl Default for ParamsAndNumBlocksIter {
                // PARAM_MESSAGE_3_CARRY_3_KS_PBS.into(),
                // PARAM_MESSAGE_4_CARRY_4_KS_PBS.into(),
            ];
-
-            let bit_sizes = if is_fast_bench {
-                vec![32]
-            } else {
-                vec![8, 16, 32, 40, 64, 128, 256]
-            };
-
            let params_and_bit_sizes = iproduct!(params, bit_sizes);
            Self {
                params_and_bit_sizes,
@@ -113,23 +121,17 @@ fn bench_server_key_binary_function_dirty_inputs<F>(
            let (cks, sks) = KEY_CACHE.get_from_params(param);

            let encrypt_two_values = || {
-                let clearlow = rng.gen::<u128>();
-                let clearhigh = rng.gen::<u128>();
-                let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh));
+                let clear_0 = gen_random_u256(&mut rng);
                let mut ct_0 = cks.encrypt_radix(clear_0, num_block);

-                let clearlow = rng.gen::<u128>();
-                let clearhigh = rng.gen::<u128>();
-                let clear_1 = tfhe::integer::U256::from((clearlow, clearhigh));
+                let clear_1 = gen_random_u256(&mut rng);
                let mut ct_1 = cks.encrypt_radix(clear_1, num_block);

                // Raise the degree, so as to ensure worst case path in operations
                let mut carry_mod = param.carry_modulus().0;
                while carry_mod > 0 {
                    // Raise the degree, so as to ensure worst case path in operations
-                    let clearlow = rng.gen::<u128>();
-                    let clearhigh = rng.gen::<u128>();
-                    let clear_2 = tfhe::integer::U256::from((clearlow, clearhigh));
+                    let clear_2 = gen_random_u256(&mut rng);
                    let ct_2 = cks.encrypt_radix(clear_2, num_block);
                    sks.unchecked_add_assign(&mut ct_0, &ct_2);
                    sks.unchecked_add_assign(&mut ct_1, &ct_2);
@@ -187,14 +189,10 @@ fn bench_server_key_binary_function_clean_inputs<F>(
            let (cks, sks) = KEY_CACHE.get_from_params(param);

            let encrypt_two_values = || {
-                let clearlow = rng.gen::<u128>();
-                let clearhigh = rng.gen::<u128>();
-                let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh));
+                let clear_0 = gen_random_u256(&mut rng);
                let ct_0 = cks.encrypt_radix(clear_0, num_block);

-                let clearlow = rng.gen::<u128>();
-                let clearhigh = rng.gen::<u128>();
-                let clear_1 = tfhe::integer::U256::from((clearlow, clearhigh));
+                let clear_1 = gen_random_u256(&mut rng);
                let ct_1 = cks.encrypt_radix(clear_1, num_block);

                (ct_0, ct_1)
@@ -248,20 +246,14 @@ fn bench_server_key_unary_function_dirty_inputs<F>(
            let (cks, sks) = KEY_CACHE.get_from_params(param);

            let encrypt_one_value = || {
-                let clearlow = rng.gen::<u128>();
-                let clearhigh = rng.gen::<u128>();
-
-                let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh));
-
+                let clear_0 = gen_random_u256(&mut rng);
                let mut ct_0 = cks.encrypt_radix(clear_0, num_block);

                // Raise the degree, so as to ensure worst case path in operations
                let mut carry_mod = param.carry_modulus().0;
                while carry_mod > 0 {
                    // Raise the degree, so as to ensure worst case path in operations
-                    let clearlow = rng.gen::<u128>();
-                    let clearhigh = rng.gen::<u128>();
-                    let clear_2 = tfhe::integer::U256::from((clearlow, clearhigh));
+                    let clear_2 = gen_random_u256(&mut rng);
                    let ct_2 = cks.encrypt_radix(clear_2, num_block);
                    sks.unchecked_add_assign(&mut ct_0, &ct_2);

@@ -319,10 +311,7 @@ fn bench_server_key_unary_function_clean_inputs<F>(
            let (cks, sks) = KEY_CACHE.get_from_params(param);

            let encrypt_one_value = || {
-                let clearlow = rng.gen::<u128>();
-                let clearhigh = rng.gen::<u128>();
-
-                let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh));
+                let clear_0 = gen_random_u256(&mut rng);

                cks.encrypt_radix(clear_0, num_block)
            };
@@ -350,13 +339,15 @@ fn bench_server_key_unary_function_clean_inputs<F>(
    bench_group.finish()
 }

-fn bench_server_key_binary_scalar_function_dirty_inputs<F>(
+fn bench_server_key_binary_scalar_function_dirty_inputs<F, G>(
    c: &mut Criterion,
    bench_name: &str,
    display_name: &str,
    binary_op: F,
+    rng_func: G,
 ) where
-    F: Fn(&ServerKey, &mut RadixCiphertext, u64),
+    F: Fn(&ServerKey, &mut RadixCiphertext, ScalarType),
+    G: Fn(&mut ThreadRng, usize) -> ScalarType,
 {
    let mut bench_group = c.benchmark_group(bench_name);
    bench_group
@@ -367,15 +358,14 @@ fn bench_server_key_binary_scalar_function_dirty_inputs<F>(
    for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() {
        let param_name = param.name();

+        let max_value_for_bit_size = ScalarType::MAX >> (ScalarType::BITS as usize - bit_size);
+
        let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
        bench_group.bench_function(&bench_id, |b| {
            let (cks, sks) = KEY_CACHE.get_from_params(param);

            let encrypt_one_value = || {
-                let clearlow = rng.gen::<u128>();
-                let clearhigh = rng.gen::<u128>();
-
-                let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh));
+                let clear_0 = gen_random_u256(&mut rng);
                let mut ct_0 = cks.encrypt_radix(clear_0, num_block);

                // Raise the degree, so as to ensure worst case path in operations
@@ -391,7 +381,7 @@ fn bench_server_key_binary_scalar_function_dirty_inputs<F>(
                    carry_mod -= 1;
                }

-                let clear_1 = rng.gen::<u64>();
+                let clear_1 = rng_func(&mut rng, bit_size) & max_value_for_bit_size;

                (ct_0, clear_1)
            };
@@ -426,8 +416,8 @@ fn bench_server_key_binary_scalar_function_clean_inputs<F, G>(
    binary_op: F,
    rng_func: G,
 ) where
-    F: Fn(&ServerKey, &mut RadixCiphertext, u64),
-    G: Fn(&mut ThreadRng, usize) -> u64,
+    F: Fn(&ServerKey, &mut RadixCiphertext, ScalarType),
+    G: Fn(&mut ThreadRng, usize) -> ScalarType,
 {
    let mut bench_group = c.benchmark_group(bench_name);
    bench_group
@@ -436,24 +426,22 @@ fn bench_server_key_binary_scalar_function_clean_inputs<F, G>(
    let mut rng = rand::thread_rng();

    for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() {
-        if bit_size > 64 {
+        if bit_size > ScalarType::BITS as usize {
            break;
        }
        let param_name = param.name();

+        let max_value_for_bit_size = ScalarType::MAX >> (ScalarType::BITS as usize - bit_size);
+
        let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits_scalar_{bit_size}");
        bench_group.bench_function(&bench_id, |b| {
            let (cks, sks) = KEY_CACHE.get_from_params(param);

            let encrypt_one_value = || {
-                let clearlow = rng.gen::<u128>();
-                let clearhigh = rng.gen::<u128>();
-
-                let clear_0 = tfhe::integer::U256::from((clearlow, clearhigh));
+                let clear_0 = gen_random_u256(&mut rng);
                let ct_0 = cks.encrypt_radix(clear_0, num_block);

-                // Avoid overflow issues for u64 where we would take values mod 1
-                let clear_1 = (rng_func(&mut rng, bit_size) as u128 % (1u128 << bit_size)) as u64;
+                let clear_1 = rng_func(&mut rng, bit_size) & max_value_for_bit_size;

                (ct_0, clear_1)
            };
@@ -482,18 +470,18 @@ fn bench_server_key_binary_scalar_function_clean_inputs<F, G>(
 }

 // Functions used to apply different way of selecting a scalar based on the context.
-fn default_scalar(rng: &mut ThreadRng, _clear_bit_size: usize) -> u64 {
-    rng.gen::<u64>()
+fn default_scalar(rng: &mut ThreadRng, _clear_bit_size: usize) -> ScalarType {
+    gen_random_u256(rng)
 }

-fn shift_scalar(_rng: &mut ThreadRng, _clear_bit_size: usize) -> u64 {
+fn shift_scalar(_rng: &mut ThreadRng, _clear_bit_size: usize) -> ScalarType {
    // Shifting by one is the worst case scenario.
-    1
+    ScalarType::ONE
 }

-fn mul_scalar(rng: &mut ThreadRng, _clear_bit_size: usize) -> u64 {
+fn mul_scalar(rng: &mut ThreadRng, _clear_bit_size: usize) -> ScalarType {
    loop {
-        let scalar = rng.gen_range(3u64..=u64::MAX);
+        let scalar = gen_random_u256(rng);
        // If scalar is power of two, it is just a shit, which is an happy path.
        if !scalar.is_power_of_two() {
            return scalar;
@@ -501,16 +489,69 @@ fn mul_scalar(rng: &mut ThreadRng, _clear_bit_size: usize) -> u64 {
    }
 }

-fn div_scalar(rng: &mut ThreadRng, clear_bit_size: usize) -> u64 {
+fn div_scalar(rng: &mut ThreadRng, clear_bit_size: usize) -> ScalarType {
    loop {
-        let scalar = rng.gen_range(1..=u64::MAX);
-        // Avoid overflow issues for u64 where we would take values mod 1
-        if (scalar as u128 % (1u128 << clear_bit_size)) != 0 {
+        let scalar = gen_random_u256(rng);
+        let max_for_bit_size = ScalarType::MAX >> (ScalarType::BITS as usize - clear_bit_size);
+        let scalar = scalar & max_for_bit_size;
+        if scalar != ScalarType::ZERO {
            return scalar;
        }
    }
 }

+fn if_then_else_parallelized(c: &mut Criterion) {
+    let bench_name = "integer::if_then_else_parallelized";
+    let display_name = "if_then_else";
+
+    let mut bench_group = c.benchmark_group(bench_name);
+    bench_group
+        .sample_size(15)
+        .measurement_time(std::time::Duration::from_secs(60));
+    let mut rng = rand::thread_rng();
+
+    for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() {
+        let param_name = param.name();
+
+        let bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits");
+        bench_group.bench_function(&bench_id, |b| {
+            let (cks, sks) = KEY_CACHE.get_from_params(param);
+
+            let encrypt_tree_values = || {
+                let clear_0 = gen_random_u256(&mut rng);
+                let ct_0 = cks.encrypt_radix(clear_0, num_block);
+
+                let clear_1 = gen_random_u256(&mut rng);
+                let ct_1 = cks.encrypt_radix(clear_1, num_block);
+
+                let cond = sks.create_trivial_radix(rng.gen_bool(0.5) as u64, num_block);
+
+                (cond, ct_0, ct_1)
+            };
+
+            b.iter_batched(
+                encrypt_tree_values,
+                |(condition, true_ct, false_ct)| {
+                    sks.if_then_else_parallelized(&condition, &true_ct, &false_ct)
+                },
+                criterion::BatchSize::SmallInput,
+            )
+        });
+
+        write_to_json::<u64, _>(
+            &bench_id,
+            param,
+            param.name(),
+            display_name,
+            &OperatorType::Atomic,
+            bit_size as u32,
+            vec![param.message_modulus().0.ilog2(); num_block],
+        );
+    }
+
+    bench_group.finish()
+}
+
 macro_rules! define_server_key_bench_unary_fn (
    (method_name: $server_key_method:ident, display_name:$name:ident) => {
        fn $server_key_method(c: &mut Criterion) {
@@ -568,7 +609,7 @@ macro_rules! define_server_key_bench_default_fn (
  );

 macro_rules! define_server_key_bench_scalar_fn (
-  (method_name: $server_key_method:ident, display_name:$name:ident) => {
+    (method_name: $server_key_method:ident, display_name:$name:ident, rng_func:$($rng_fn:tt)*) => {
      fn $server_key_method(c: &mut Criterion) {
          bench_server_key_binary_scalar_function_dirty_inputs(
              c,
@@ -576,7 +617,9 @@ macro_rules! define_server_key_bench_scalar_fn (
              stringify!($name),
              |server_key, lhs, rhs| {
                server_key.$server_key_method(lhs, rhs);
-          })
+              },
+              $($rng_fn)*
+          )
      }
  }
 );
@@ -590,7 +633,9 @@ macro_rules! define_server_key_bench_scalar_default_fn (
                stringify!($name),
                |server_key, lhs, rhs| {
                  server_key.$server_key_method(lhs, rhs);
-            }, $($rng_fn)*)
+                },
+                $($rng_fn)*
+            )
        }
    }
  );
@@ -612,47 +657,64 @@ define_server_key_bench_fn!(method_name: smart_bitor_parallelized, display_name:
 define_server_key_bench_default_fn!(method_name: add_parallelized, display_name: add);
 define_server_key_bench_default_fn!(method_name: sub_parallelized, display_name: sub);
 define_server_key_bench_default_fn!(method_name: mul_parallelized, display_name: mul);
+define_server_key_bench_default_fn!(method_name: div_parallelized, display_name: div);
+define_server_key_bench_default_fn!(method_name: rem_parallelized, display_name: modulo);
 define_server_key_bench_default_fn!(method_name: bitand_parallelized, display_name: bitand);
 define_server_key_bench_default_fn!(method_name: bitxor_parallelized, display_name: bitxor);
 define_server_key_bench_default_fn!(method_name: bitor_parallelized, display_name: bitor);
 define_server_key_bench_unary_default_fn!(method_name: bitnot_parallelized, display_name: bitnot);

-define_server_key_bench_fn!(method_name: unchecked_add, display_name: add);
-define_server_key_bench_fn!(method_name: unchecked_sub, display_name: sub);
-define_server_key_bench_fn!(method_name: unchecked_mul, display_name: mul);
-define_server_key_bench_fn!(method_name: unchecked_bitand, display_name: bitand);
-define_server_key_bench_fn!(method_name: unchecked_bitor, display_name: bitor);
-define_server_key_bench_fn!(method_name: unchecked_bitxor, display_name: bitxor);
+define_server_key_bench_default_fn!(method_name: unchecked_add, display_name: add);
+define_server_key_bench_default_fn!(method_name: unchecked_sub, display_name: sub);
+define_server_key_bench_default_fn!(method_name: unchecked_mul, display_name: mul);
+define_server_key_bench_default_fn!(method_name: unchecked_bitand, display_name: bitand);
+define_server_key_bench_default_fn!(method_name: unchecked_bitor, display_name: bitor);
+define_server_key_bench_default_fn!(method_name: unchecked_bitxor, display_name: bitxor);

-define_server_key_bench_fn!(method_name: unchecked_mul_parallelized, display_name: mul);
-define_server_key_bench_fn!(
+define_server_key_bench_default_fn!(method_name: unchecked_mul_parallelized, display_name: mul);
+define_server_key_bench_default_fn!(
    method_name: unchecked_bitand_parallelized,
    display_name: bitand
 );
-define_server_key_bench_fn!(
+define_server_key_bench_default_fn!(
    method_name: unchecked_bitor_parallelized,
    display_name: bitor
 );
-define_server_key_bench_fn!(
+define_server_key_bench_default_fn!(
    method_name: unchecked_bitxor_parallelized,
    display_name: bitxor
 );

-define_server_key_bench_scalar_fn!(method_name: smart_scalar_add, display_name: add);
-define_server_key_bench_scalar_fn!(method_name: smart_scalar_sub, display_name: sub);
-define_server_key_bench_scalar_fn!(method_name: smart_scalar_mul, display_name: mul);
+define_server_key_bench_scalar_fn!(
+    method_name: smart_scalar_add,
+    display_name: add,
+    rng_func: default_scalar
+);
+define_server_key_bench_scalar_fn!(
+    method_name: smart_scalar_sub,
+    display_name: sub,
+    rng_func: default_scalar
+);
+define_server_key_bench_scalar_fn!(
+    method_name: smart_scalar_mul,
+    display_name: mul,
+    rng_func: mul_scalar
+);

 define_server_key_bench_scalar_fn!(
    method_name: smart_scalar_add_parallelized,
-    display_name: add
+    display_name: add,
+    rng_func: default_scalar
 );
 define_server_key_bench_scalar_fn!(
    method_name: smart_scalar_sub_parallelized,
-    display_name: sub
+    display_name: sub,
+    rng_func: default_scalar,
 );
 define_server_key_bench_scalar_fn!(
    method_name: smart_scalar_mul_parallelized,
-    display_name: mul
+    display_name: mul,
+    rng_func: mul_scalar
 );

 define_server_key_bench_scalar_default_fn!(
@@ -702,48 +764,60 @@ define_server_key_bench_scalar_default_fn!(
 );
 define_server_key_bench_scalar_default_fn!(
    method_name: scalar_eq_parallelized,
-    display_name: scalar_equal,
+    display_name: equal,
    rng_func: default_scalar
 );
 define_server_key_bench_scalar_default_fn!(
    method_name: scalar_ne_parallelized,
-    display_name: scalar_not_equal,
+    display_name: not_equal,
    rng_func: default_scalar
 );
 define_server_key_bench_scalar_default_fn!(
    method_name: scalar_le_parallelized,
-    display_name: scalar_less_or_equal,
+    display_name: less_or_equal,
    rng_func: default_scalar
 );
 define_server_key_bench_scalar_default_fn!(
    method_name: scalar_lt_parallelized,
-    display_name: scalar_less_than,
+    display_name: less_than,
    rng_func: default_scalar
 );
 define_server_key_bench_scalar_default_fn!(
    method_name: scalar_ge_parallelized,
-    display_name: scalar_greater_or_equal,
+    display_name: greater_or_equal,
    rng_func: default_scalar
 );
 define_server_key_bench_scalar_default_fn!(
    method_name: scalar_gt_parallelized,
-    display_name: scalar_greater_than,
+    display_name: greater_than,
    rng_func: default_scalar
 );
 define_server_key_bench_scalar_default_fn!(
    method_name: scalar_max_parallelized,
-    display_name: scalar_max,
+    display_name: max,
    rng_func: default_scalar
 );
 define_server_key_bench_scalar_default_fn!(
    method_name: scalar_min_parallelized,
-    display_name: scalar_min,
+    display_name: min,
    rng_func: default_scalar
 );

-define_server_key_bench_scalar_fn!(method_name: unchecked_scalar_add, display_name: add);
-define_server_key_bench_scalar_fn!(method_name: unchecked_scalar_sub, display_name: sub);
-define_server_key_bench_scalar_fn!(method_name: unchecked_small_scalar_mul, display_name: mul);
+define_server_key_bench_scalar_default_fn!(
+    method_name: unchecked_scalar_add,
+    display_name: add,
+    rng_func: default_scalar
+);
+define_server_key_bench_scalar_default_fn!(
+    method_name: unchecked_scalar_sub,
+    display_name: sub,
+    rng_func: default_scalar
+);
+define_server_key_bench_scalar_default_fn!(
+    method_name: unchecked_scalar_mul_parallelized,
+    display_name: mul,
+    rng_func: mul_scalar
+);

 define_server_key_bench_unary_fn!(method_name: smart_neg, display_name: negation);
 define_server_key_bench_unary_fn!(method_name: smart_neg_parallelized, display_name: negation);
@@ -755,30 +829,30 @@ define_server_key_bench_unary_fn!(
    display_name: carry_propagation
 );

-define_server_key_bench_fn!(method_name: unchecked_max, display_name: max);
-define_server_key_bench_fn!(method_name: unchecked_min, display_name: min);
-define_server_key_bench_fn!(method_name: unchecked_eq, display_name: equal);
-define_server_key_bench_fn!(method_name: unchecked_lt, display_name: less_than);
-define_server_key_bench_fn!(method_name: unchecked_le, display_name: less_or_equal);
-define_server_key_bench_fn!(method_name: unchecked_gt, display_name: greater_than);
-define_server_key_bench_fn!(method_name: unchecked_ge, display_name: greater_or_equal);
+define_server_key_bench_default_fn!(method_name: unchecked_max, display_name: max);
+define_server_key_bench_default_fn!(method_name: unchecked_min, display_name: min);
+define_server_key_bench_default_fn!(method_name: unchecked_eq, display_name: equal);
+define_server_key_bench_default_fn!(method_name: unchecked_lt, display_name: less_than);
+define_server_key_bench_default_fn!(method_name: unchecked_le, display_name: less_or_equal);
+define_server_key_bench_default_fn!(method_name: unchecked_gt, display_name: greater_than);
+define_server_key_bench_default_fn!(method_name: unchecked_ge, display_name: greater_or_equal);

-define_server_key_bench_fn!(method_name: unchecked_max_parallelized, display_name: max);
-define_server_key_bench_fn!(method_name: unchecked_min_parallelized, display_name: min);
-define_server_key_bench_fn!(method_name: unchecked_eq_parallelized, display_name: equal);
-define_server_key_bench_fn!(
+define_server_key_bench_default_fn!(method_name: unchecked_max_parallelized, display_name: max);
+define_server_key_bench_default_fn!(method_name: unchecked_min_parallelized, display_name: min);
+define_server_key_bench_default_fn!(method_name: unchecked_eq_parallelized, display_name: equal);
+define_server_key_bench_default_fn!(
    method_name: unchecked_lt_parallelized,
    display_name: less_than
 );
-define_server_key_bench_fn!(
+define_server_key_bench_default_fn!(
    method_name: unchecked_le_parallelized,
    display_name: less_or_equal
 );
-define_server_key_bench_fn!(
+define_server_key_bench_default_fn!(
    method_name: unchecked_gt_parallelized,
    display_name: greater_than
 );
-define_server_key_bench_fn!(
+define_server_key_bench_default_fn!(
    method_name: unchecked_ge_parallelized,
    display_name: greater_or_equal
 );
@@ -842,6 +916,10 @@ criterion_group!(
    smart_bitand,
    smart_bitor,
    smart_bitxor,
+);
+
+criterion_group!(
+    smart_ops_comp,
    smart_max,
    smart_min,
    smart_eq,
@@ -859,6 +937,10 @@ criterion_group!(
    smart_bitand_parallelized,
    smart_bitor_parallelized,
    smart_bitxor_parallelized,
+);
+
+criterion_group!(
+    smart_parallelized_ops_comp,
    smart_max_parallelized,
    smart_min_parallelized,
    smart_eq_parallelized,
@@ -873,11 +955,21 @@ criterion_group!(
    add_parallelized,
    sub_parallelized,
    mul_parallelized,
+    div_parallelized,
+    rem_parallelized,
    neg_parallelized,
    bitand_parallelized,
    bitnot_parallelized,
    bitor_parallelized,
    bitxor_parallelized,
+    left_shift_parallelized,
+    right_shift_parallelized,
+    rotate_left_parallelized,
+    rotate_right_parallelized,
+);
+
+criterion_group!(
+    default_parallelized_ops_comp,
    max_parallelized,
    min_parallelized,
    eq_parallelized,
@@ -886,10 +978,7 @@ criterion_group!(
    le_parallelized,
    gt_parallelized,
    ge_parallelized,
-    left_shift_parallelized,
-    right_shift_parallelized,
-    rotate_left_parallelized,
-    rotate_right_parallelized,
+    if_then_else_parallelized,
 );

 criterion_group!(
@@ -915,6 +1004,12 @@ criterion_group!(
    scalar_rem_parallelized,
    scalar_left_shift_parallelized,
    scalar_right_shift_parallelized,
+    scalar_rotate_left_parallelized,
+    scalar_rotate_right_parallelized,
+);
+
+criterion_group!(
+    default_scalar_parallelized_ops_comp,
    scalar_eq_parallelized,
    scalar_ne_parallelized,
    scalar_lt_parallelized,
@@ -923,8 +1018,6 @@ criterion_group!(
    scalar_ge_parallelized,
    scalar_min_parallelized,
    scalar_max_parallelized,
-    scalar_rotate_left_parallelized,
-    scalar_rotate_right_parallelized,
 );

 criterion_group!(
@@ -935,6 +1028,10 @@ criterion_group!(
    unchecked_bitand,
    unchecked_bitor,
    unchecked_bitxor,
+);
+
+criterion_group!(
+    unchecked_ops_comp,
    unchecked_max,
    unchecked_min,
    unchecked_eq,
@@ -948,7 +1045,14 @@ criterion_group!(
    unchecked_scalar_ops,
    unchecked_scalar_add,
    unchecked_scalar_sub,
-    unchecked_small_scalar_mul,
+    unchecked_scalar_mul_parallelized,
+    unchecked_bitand_parallelized,
+    unchecked_bitor_parallelized,
+    unchecked_bitxor_parallelized,
+);
+
+criterion_group!(
+    unchecked_scalar_ops_comp,
    unchecked_max_parallelized,
    unchecked_min_parallelized,
    unchecked_eq_parallelized,
@@ -956,9 +1060,6 @@ criterion_group!(
    unchecked_le_parallelized,
    unchecked_gt_parallelized,
    unchecked_ge_parallelized,
-    unchecked_bitand_parallelized,
-    unchecked_bitor_parallelized,
-    unchecked_bitxor_parallelized,
 );

 criterion_group!(misc, full_propagate, full_propagate_parallelized);
@@ -968,13 +1069,19 @@ fn main() {
        Ok(val) => {
            match val.to_lowercase().as_str() {
                "default" => default_parallelized_ops(),
+                "default_comp" => default_parallelized_ops_comp(),
                "default_scalar" => default_scalar_parallelized_ops(),
+                "default_scalar_comp" => default_scalar_parallelized_ops_comp(),
                "smart" => smart_ops(),
+                "smart_comp" => smart_ops_comp(),
                "smart_scalar" => smart_scalar_ops(),
                "smart_parallelized" => smart_parallelized_ops(),
+                "smart_parallelized_comp" => smart_parallelized_ops_comp(),
                "smart_scalar_parallelized" => smart_scalar_parallelized_ops(),
                "unchecked" => unchecked_ops(),
+                "unchecked_comp" => unchecked_ops_comp(),
                "unchecked_scalar" => unchecked_scalar_ops(),
+                "unchecked_scalar_comp" => unchecked_scalar_ops_comp(),
                "misc" => misc(),
                _ => panic!("unknown benchmark operations flavor"),
            };
--- a/tfhe/benches/shortint/bench.rs
+++ b/tfhe/benches/shortint/bench.rs
@@ -5,14 +5,15 @@ use crate::utilities::{write_to_json, OperatorType};
 use std::env;

 use criterion::{criterion_group, Criterion};
-use tfhe::shortint::keycache::NamedParam;
+use tfhe::keycache::NamedParam;
 use tfhe::shortint::parameters::*;
-use tfhe::shortint::{Ciphertext, ClassicPBSParameters, ServerKey, ShortintParameterSet};
+use tfhe::shortint::{
+    Ciphertext, ClassicPBSParameters, CompressedServerKey, ServerKey, ShortintParameterSet,
+};

 use rand::Rng;
-use tfhe::shortint::keycache::KEY_CACHE;
+use tfhe::shortint::keycache::{KEY_CACHE, KEY_CACHE_WOPBS};

-use tfhe::shortint::keycache::KEY_CACHE_WOPBS;
 use tfhe::shortint::parameters::parameters_wopbs::WOPBS_PARAM_MESSAGE_4_NORM2_6_KS_PBS;

 const SERVER_KEY_BENCH_PARAMS: [ClassicPBSParameters; 4] = [
@@ -40,20 +41,59 @@ const SERVER_KEY_BENCH_PARAMS_EXTENDED: [ClassicPBSParameters; 15] = [
    PARAM_MESSAGE_8_CARRY_0_KS_PBS,
 ];

+const SERVER_KEY_MULTI_BIT_BENCH_PARAMS: [MultiBitPBSParameters; 2] = [
+    PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS,
+    PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS,
+];
+
+const SERVER_KEY_MULTI_BIT_BENCH_PARAMS_EXTENDED: [MultiBitPBSParameters; 6] = [
+    PARAM_MULTI_BIT_MESSAGE_1_CARRY_1_GROUP_2_KS_PBS,
+    PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS,
+    PARAM_MULTI_BIT_MESSAGE_3_CARRY_3_GROUP_2_KS_PBS,
+    PARAM_MULTI_BIT_MESSAGE_1_CARRY_1_GROUP_3_KS_PBS,
+    PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS,
+    PARAM_MULTI_BIT_MESSAGE_3_CARRY_3_GROUP_3_KS_PBS,
+];
+
+enum BenchParamsSet {
+    Standard,
+    Extended,
+}
+
+fn benchmark_parameters(params_set: BenchParamsSet) -> Vec<PBSParameters> {
+    let is_multi_bit = match env::var("__TFHE_RS_BENCH_TYPE") {
+        Ok(val) => val.to_lowercase() == "multi_bit",
+        Err(_) => false,
+    };
+
+    if is_multi_bit {
+        let params = match params_set {
+            BenchParamsSet::Standard => SERVER_KEY_MULTI_BIT_BENCH_PARAMS.to_vec(),
+            BenchParamsSet::Extended => SERVER_KEY_MULTI_BIT_BENCH_PARAMS_EXTENDED.to_vec(),
+        };
+        params.iter().map(|p| (*p).into()).collect()
+    } else {
+        let params = match params_set {
+            BenchParamsSet::Standard => SERVER_KEY_BENCH_PARAMS.to_vec(),
+            BenchParamsSet::Extended => SERVER_KEY_BENCH_PARAMS_EXTENDED.to_vec(),
+        };
+        params.iter().map(|p| (*p).into()).collect()
+    }
+}
+
 fn bench_server_key_unary_function<F>(
    c: &mut Criterion,
    bench_name: &str,
    display_name: &str,
    unary_op: F,
-    params: &[ClassicPBSParameters],
+    params_set: BenchParamsSet,
 ) where
    F: Fn(&ServerKey, &mut Ciphertext),
 {
    let mut bench_group = c.benchmark_group(bench_name);

-    for param in params.iter() {
-        let param: PBSParameters = (*param).into();
-        let keys = KEY_CACHE.get_from_param(param);
+    for param in benchmark_parameters(params_set).iter() {
+        let keys = KEY_CACHE.get_from_param(*param);
        let (cks, sks) = (keys.client_key(), keys.server_key());

        let mut rng = rand::thread_rng();
@@ -73,7 +113,7 @@ fn bench_server_key_unary_function<F>(

        write_to_json::<u64, _>(
            &bench_id,
-            param,
+            *param,
            param.name(),
            display_name,
            &OperatorType::Atomic,
@@ -90,15 +130,14 @@ fn bench_server_key_binary_function<F>(
    bench_name: &str,
    display_name: &str,
    binary_op: F,
-    params: &[ClassicPBSParameters],
+    params_set: BenchParamsSet,
 ) where
    F: Fn(&ServerKey, &mut Ciphertext, &mut Ciphertext),
 {
    let mut bench_group = c.benchmark_group(bench_name);

-    for param in params.iter() {
-        let param: PBSParameters = (*param).into();
-        let keys = KEY_CACHE.get_from_param(param);
+    for param in benchmark_parameters(params_set).iter() {
+        let keys = KEY_CACHE.get_from_param(*param);
        let (cks, sks) = (keys.client_key(), keys.server_key());

        let mut rng = rand::thread_rng();
@@ -120,7 +159,7 @@ fn bench_server_key_binary_function<F>(

        write_to_json::<u64, _>(
            &bench_id,
-            param,
+            *param,
            param.name(),
            display_name,
            &OperatorType::Atomic,
@@ -137,15 +176,14 @@ fn bench_server_key_binary_scalar_function<F>(
    bench_name: &str,
    display_name: &str,
    binary_op: F,
-    params: &[ClassicPBSParameters],
+    params_set: BenchParamsSet,
 ) where
    F: Fn(&ServerKey, &mut Ciphertext, u8),
 {
    let mut bench_group = c.benchmark_group(bench_name);

-    for param in params {
-        let param: PBSParameters = (*param).into();
-        let keys = KEY_CACHE.get_from_param(param);
+    for param in benchmark_parameters(params_set).iter() {
+        let keys = KEY_CACHE.get_from_param(*param);
        let (cks, sks) = (keys.client_key(), keys.server_key());

        let mut rng = rand::thread_rng();
@@ -166,7 +204,7 @@ fn bench_server_key_binary_scalar_function<F>(

        write_to_json::<u64, _>(
            &bench_id,
-            param,
+            *param,
            param.name(),
            display_name,
            &OperatorType::Atomic,
@@ -183,15 +221,14 @@ fn bench_server_key_binary_scalar_division_function<F>(
    bench_name: &str,
    display_name: &str,
    binary_op: F,
-    params: &[ClassicPBSParameters],
+    params_set: BenchParamsSet,
 ) where
    F: Fn(&ServerKey, &mut Ciphertext, u8),
 {
    let mut bench_group = c.benchmark_group(bench_name);

-    for param in params {
-        let param: PBSParameters = (*param).into();
-        let keys = KEY_CACHE.get_from_param(param);
+    for param in benchmark_parameters(params_set).iter() {
+        let keys = KEY_CACHE.get_from_param(*param);
        let (cks, sks) = (keys.client_key(), keys.server_key());

        let mut rng = rand::thread_rng();
@@ -216,7 +253,7 @@ fn bench_server_key_binary_scalar_division_function<F>(

        write_to_json::<u64, _>(
            &bench_id,
-            param,
+            *param,
            param.name(),
            display_name,
            &OperatorType::Atomic,
@@ -228,12 +265,11 @@ fn bench_server_key_binary_scalar_division_function<F>(
    bench_group.finish()
 }

-fn carry_extract(c: &mut Criterion) {
+fn carry_extract_bench(c: &mut Criterion, params_set: BenchParamsSet) {
    let mut bench_group = c.benchmark_group("carry_extract");

-    for param in SERVER_KEY_BENCH_PARAMS {
-        let param: PBSParameters = param.into();
-        let keys = KEY_CACHE.get_from_param(param);
+    for param in benchmark_parameters(params_set).iter() {
+        let keys = KEY_CACHE.get_from_param(*param);
        let (cks, sks) = (keys.client_key(), keys.server_key());

        let mut rng = rand::thread_rng();
@@ -253,7 +289,7 @@ fn carry_extract(c: &mut Criterion) {

        write_to_json::<u64, _>(
            &bench_id,
-            param,
+            *param,
            param.name(),
            "carry_extract",
            &OperatorType::Atomic,
@@ -265,12 +301,11 @@ fn carry_extract(c: &mut Criterion) {
    bench_group.finish()
 }

-fn programmable_bootstrapping(c: &mut Criterion) {
+fn programmable_bootstrapping_bench(c: &mut Criterion, params_set: BenchParamsSet) {
    let mut bench_group = c.benchmark_group("programmable_bootstrap");

-    for param in SERVER_KEY_BENCH_PARAMS {
-        let param: PBSParameters = param.into();
-        let keys = KEY_CACHE.get_from_param(param);
+    for param in benchmark_parameters(params_set).iter() {
+        let keys = KEY_CACHE.get_from_param(*param);
        let (cks, sks) = (keys.client_key(), keys.server_key());

        let mut rng = rand::thread_rng();
@@ -293,7 +328,7 @@ fn programmable_bootstrapping(c: &mut Criterion) {

        write_to_json::<u64, _>(
            &bench_id,
-            param,
+            *param,
            param.name(),
            "pbs",
            &OperatorType::Atomic,
@@ -305,6 +340,54 @@ fn programmable_bootstrapping(c: &mut Criterion) {
    bench_group.finish();
 }

+fn server_key_from_compressed_key(c: &mut Criterion) {
+    let mut bench_group = c.benchmark_group("uncompress_key");
+    bench_group
+        .sample_size(10)
+        .measurement_time(std::time::Duration::from_secs(60));
+
+    let mut params = SERVER_KEY_BENCH_PARAMS_EXTENDED
+        .iter()
+        .map(|p| (*p).into())
+        .collect::<Vec<PBSParameters>>();
+    let multi_bit_params = SERVER_KEY_MULTI_BIT_BENCH_PARAMS_EXTENDED
+        .iter()
+        .map(|p| (*p).into())
+        .collect::<Vec<PBSParameters>>();
+    params.extend(&multi_bit_params);
+
+    for param in params.iter() {
+        let keys = KEY_CACHE.get_from_param(*param);
+        let sks_compressed = CompressedServerKey::new(keys.client_key());
+
+        let bench_id = format!("shortint::uncompress_key::{}", param.name());
+
+        bench_group.bench_function(&bench_id, |b| {
+            let clone_compressed_key = || sks_compressed.clone();
+
+            b.iter_batched(
+                clone_compressed_key,
+                |sks_cloned| {
+                    let _ = ServerKey::from(sks_cloned);
+                },
+                criterion::BatchSize::PerIteration,
+            )
+        });
+
+        write_to_json::<u64, _>(
+            &bench_id,
+            *param,
+            param.name(),
+            "uncompress_key",
+            &OperatorType::Atomic,
+            param.message_modulus().0.ilog2(),
+            vec![param.message_modulus().0.ilog2()],
+        );
+    }
+
+    bench_group.finish();
+}
+
 // TODO: remove?
 fn _bench_wopbs_param_message_8_norm2_5(c: &mut Criterion) {
    let mut bench_group = c.benchmark_group("programmable_bootstrap");
@@ -334,7 +417,7 @@ fn _bench_wopbs_param_message_8_norm2_5(c: &mut Criterion) {
 }

 macro_rules! define_server_key_unary_bench_fn (
-  (method_name:$server_key_method:ident, display_name:$name:ident, $params:expr) => {
+  (method_name:$server_key_method:ident, display_name:$name:ident, $params_set:expr) => {
      fn $server_key_method(c: &mut Criterion) {
          bench_server_key_unary_function(
              c,
@@ -342,13 +425,13 @@ macro_rules! define_server_key_unary_bench_fn (
              stringify!($name),
              |server_key, lhs| {
                let _ = server_key.$server_key_method(lhs);},
-              $params)
+              $params_set)
      }
  }
 );

 macro_rules! define_server_key_bench_fn (
-  (method_name:$server_key_method:ident, display_name:$name:ident, $params:expr) => {
+  (method_name:$server_key_method:ident, display_name:$name:ident, $params_set:expr) => {
      fn $server_key_method(c: &mut Criterion) {
          bench_server_key_binary_function(
              c,
@@ -356,13 +439,13 @@ macro_rules! define_server_key_bench_fn (
              stringify!($name),
              |server_key, lhs, rhs| {
                let _ = server_key.$server_key_method(lhs, rhs);},
-              $params)
+              $params_set)
      }
  }
 );

 macro_rules! define_server_key_scalar_bench_fn (
-  (method_name:$server_key_method:ident, display_name:$name:ident, $params:expr) => {
+  (method_name:$server_key_method:ident, display_name:$name:ident, $params_set:expr) => {
      fn $server_key_method(c: &mut Criterion) {
          bench_server_key_binary_scalar_function(
              c,
@@ -370,13 +453,13 @@ macro_rules! define_server_key_scalar_bench_fn (
              stringify!($name),
              |server_key, lhs, rhs| {
                let _ = server_key.$server_key_method(lhs, rhs);},
-              $params)
+              $params_set)
      }
  }
 );

 macro_rules! define_server_key_scalar_div_bench_fn (
-  (method_name:$server_key_method:ident, display_name:$name:ident, $params:expr) => {
+  (method_name:$server_key_method:ident, display_name:$name:ident, $params_set:expr) => {
      fn $server_key_method(c: &mut Criterion) {
          bench_server_key_binary_scalar_division_function(
              c,
@@ -384,7 +467,19 @@ macro_rules! define_server_key_scalar_div_bench_fn (
              stringify!($name),
              |server_key, lhs, rhs| {
                let _ = server_key.$server_key_method(lhs, rhs);},
-              $params)
+              $params_set)
+      }
+  }
+);
+
+macro_rules! define_custom_bench_fn (
+  (function_name:$function:ident, $params_set:expr) => {
+      fn $function(c: &mut Criterion) {
+          ::paste::paste! {
+              [<$function _bench>](
+                  c,
+                  $params_set)
+          }
      }
  }
 );
@@ -392,251 +487,258 @@ macro_rules! define_server_key_scalar_div_bench_fn (
 define_server_key_unary_bench_fn!(
    method_name: unchecked_neg,
    display_name: negation,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );

 define_server_key_bench_fn!(
    method_name: unchecked_add,
    display_name: add,
-    &SERVER_KEY_BENCH_PARAMS_EXTENDED
+    BenchParamsSet::Extended
 );
 define_server_key_bench_fn!(
    method_name: unchecked_sub,
    display_name: sub,
-    &SERVER_KEY_BENCH_PARAMS_EXTENDED
+    BenchParamsSet::Extended
 );
 define_server_key_bench_fn!(
    method_name: unchecked_mul_lsb,
    display_name: mul,
-    &SERVER_KEY_BENCH_PARAMS_EXTENDED
+    BenchParamsSet::Extended
 );
 define_server_key_bench_fn!(
    method_name: unchecked_mul_msb,
    display_name: mul,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: unchecked_div,
    display_name: div,
-    &SERVER_KEY_BENCH_PARAMS_EXTENDED
+    BenchParamsSet::Extended
 );
 define_server_key_bench_fn!(
    method_name: smart_bitand,
    display_name: bitand,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: smart_bitor,
    display_name: bitor,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: smart_bitxor,
    display_name: bitxor,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: smart_add,
    display_name: add,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: smart_sub,
    display_name: sub,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: smart_mul_lsb,
    display_name: mul,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: bitand,
    display_name: bitand,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: bitor,
    display_name: bitor,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: bitxor,
    display_name: bitxor,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: add,
    display_name: add,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: sub,
    display_name: sub,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: mul,
    display_name: mul,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: div,
    display_name: div,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: greater,
    display_name: greater,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: greater_or_equal,
    display_name: greater_or_equal,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: less,
    display_name: less,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: less_or_equal,
    display_name: less_or_equal,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: equal,
    display_name: equal,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: not_equal,
    display_name: not_equal,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_unary_bench_fn!(
    method_name: neg,
    display_name: negation,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: unchecked_greater,
    display_name: greater_than,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: unchecked_less,
    display_name: less_than,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_bench_fn!(
    method_name: unchecked_equal,
    display_name: equal,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );

 define_server_key_scalar_bench_fn!(
    method_name: unchecked_scalar_add,
    display_name: add,
-    &SERVER_KEY_BENCH_PARAMS_EXTENDED
+    BenchParamsSet::Extended
 );
 define_server_key_scalar_bench_fn!(
    method_name: unchecked_scalar_sub,
    display_name: sub,
-    &SERVER_KEY_BENCH_PARAMS_EXTENDED
+    BenchParamsSet::Extended
 );
 define_server_key_scalar_bench_fn!(
    method_name: unchecked_scalar_mul,
    display_name: mul,
-    &SERVER_KEY_BENCH_PARAMS_EXTENDED
+    BenchParamsSet::Extended
 );
 define_server_key_scalar_bench_fn!(
    method_name: unchecked_scalar_left_shift,
    display_name: left_shift,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_bench_fn!(
    method_name: unchecked_scalar_right_shift,
    display_name: right_shift,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );

 define_server_key_scalar_div_bench_fn!(
    method_name: unchecked_scalar_div,
    display_name: div,
-    &SERVER_KEY_BENCH_PARAMS_EXTENDED
+    BenchParamsSet::Extended
 );
 define_server_key_scalar_div_bench_fn!(
    method_name: unchecked_scalar_mod,
    display_name: modulo,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_bench_fn!(
    method_name: scalar_add,
    display_name: add,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_bench_fn!(
    method_name: scalar_sub,
    display_name: sub,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_bench_fn!(
    method_name: scalar_mul,
    display_name: mul,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_bench_fn!(
    method_name: scalar_left_shift,
    display_name: left_shift,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_bench_fn!(
    method_name: scalar_right_shift,
    display_name: right_shift,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );

 define_server_key_scalar_div_bench_fn!(
    method_name: scalar_div,
    display_name: div,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_div_bench_fn!(
    method_name: scalar_mod,
    display_name: modulo,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_bench_fn!(
    method_name: scalar_greater,
    display_name: greater,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_bench_fn!(
    method_name: scalar_greater_or_equal,
    display_name: greater_or_equal,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_bench_fn!(
    method_name: scalar_less,
    display_name: less,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_bench_fn!(
    method_name: scalar_less_or_equal,
    display_name: less_or_equal,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_div_bench_fn!(
    method_name: scalar_equal,
    display_name: equal,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
 );
 define_server_key_scalar_div_bench_fn!(
    method_name: scalar_not_equal,
    display_name: not_equal,
-    &SERVER_KEY_BENCH_PARAMS
+    BenchParamsSet::Standard
+);
+
+define_custom_bench_fn!(function_name: carry_extract, BenchParamsSet::Standard);
+
+define_custom_bench_fn!(
+    function_name: programmable_bootstrapping,
+    BenchParamsSet::Standard
 );

 criterion_group!(
@@ -710,6 +812,8 @@ criterion_group!(
    scalar_not_equal
 );

+criterion_group!(misc, server_key_from_compressed_key);
+
 mod casting;
 criterion_group!(
    casting,
@@ -723,6 +827,7 @@ fn main() {
        casting();
        default_ops();
        default_scalar_ops();
+        misc();
    }

    match env::var("__TFHE_RS_BENCH_OP_FLAVOR") {
--- a/tfhe/docs/SUMMARY.md
+++ b/tfhe/docs/SUMMARY.md
@@ -9,9 +9,9 @@
 * [Benchmarks](getting_started/benchmarks.md)
 * [Security and Cryptography](getting_started/security_and_cryptography.md)

-## Tutorials 
+## Tutorials
 * [Homomorphic Parity Bit](tutorials/parity_bit.md)
-* [Homomorphic Case Changing on Latin String](tutorials/latin_fhe_string.md)
+* [Homomorphic Case Changing on Ascii String](tutorials/ascii_fhe_string.md)

 ## How To
 * [Configure Rust](how_to/rust_configuration.md)
@@ -19,23 +19,24 @@
 * [Compress Ciphertexts/Keys](how_to/compress.md)
 * [Use Public Key Encryption](how_to/public_key.md)
 * [Use Trivial Ciphertext](how_to/trivial_ciphertext.md)
+* [Generic Function Bounds](how_to/trait_bounds.md)
 * [Use Parallelized PBS](how_to/parallelized_pbs.md)
 * [Use the C API](how_to/c_api.md)
 * [Use the JS on WASM API](how_to/js_on_wasm_api.md)

 ## Fine-grained APIs
 * [Quick Start](fine_grained_api/quick_start.md)
-* [Boolean](fine_grained_api/Boolean/tutorial.md)
+* [Boolean](fine_grained_api/Boolean/readme.md)
    * [Operations](fine_grained_api/Boolean/operations.md)
    * [Cryptographic Parameters](fine_grained_api/Boolean/parameters.md)
    * [Serialization/Deserialization](fine_grained_api/Boolean/serialization.md)

-* [Shortint](fine_grained_api/shortint/tutorial.md)
+* [Shortint](fine_grained_api/shortint/readme.md)
    * [Operations](fine_grained_api/shortint/operations.md)
    * [Cryptographic Parameters](fine_grained_api/shortint/parameters.md)
    * [Serialization/Deserialization](fine_grained_api/shortint/serialization.md)

-* [Integer](fine_grained_api/integer/tutorial.md)
+* [Integer](fine_grained_api/integer/readme.md)
    * [Operations](fine_grained_api/integer/operations.md)
    * [Cryptographic Parameters](fine_grained_api/integer/parameters.md)
    * [Serialization/Deserialization](fine_grained_api/integer/serialization.md)
--- a/tfhe/docs/_static/carry.png
+++ b/tfhe/docs/_static/carry.png
--- a/tfhe/docs/_static/ciphertext-representation.png
+++ b/tfhe/docs/_static/ciphertext-representation.png
--- a/tfhe/docs/_static/fig6.png
+++ b/tfhe/docs/_static/fig6.png
--- a/tfhe/docs/_static/fig7.png
+++ b/tfhe/docs/_static/fig7.png
--- a/tfhe/docs/_static/fig8.png
+++ b/tfhe/docs/_static/fig8.png
--- a/tfhe/docs/_static/integer-ciphertext.png
+++ b/tfhe/docs/_static/integer-ciphertext.png
--- a/tfhe/docs/_static/lwe.png
+++ b/tfhe/docs/_static/lwe.png
--- a/tfhe/docs/_static/multisum.png
+++ b/tfhe/docs/_static/multisum.png
--- a/tfhe/docs/_static/overflow.png
+++ b/tfhe/docs/_static/overflow.png
--- a/tfhe/docs/_static/sha256.png
+++ b/tfhe/docs/_static/sha256.png
--- a/tfhe/docs/application_tutorials/dark_market.md
+++ b/tfhe/docs/application_tutorials/dark_market.md
@@ -102,25 +102,23 @@ for buy_order in buy_orders.iter_mut() {
 #### The complete algorithm in plain Rust:

 ```rust
-fn volume_match_plain(sell_orders: &mut Vec<u16>, buy_orders: &mut Vec<u16>) {
+fn fill_orders(orders: &mut [u16], total_volume: u16) {
+    let mut volume_left_to_transact = total_volume;
+    for order in orders {
+        let filled_amount = std::cmp::min(volume_left_to_transact, *order);
+        *order = filled_amount;
+        volume_left_to_transact -= filled_amount;
+    }
+}
+
+pub fn volume_match(sell_orders: &mut [u16], buy_orders: &mut [u16]) {
    let total_sell_volume: u16 = sell_orders.iter().sum();
    let total_buy_volume: u16 = buy_orders.iter().sum();

    let total_volume = std::cmp::min(total_buy_volume, total_sell_volume);

-    let mut volume_left_to_transact = total_volume;
-    for sell_order in sell_orders.iter_mut() {
-        let filled_amount = std::cmp::min(volume_left_to_transact, *sell_order);
-        *sell_order = filled_amount;
-        volume_left_to_transact -= filled_amount;
-    }
-
-    let mut volume_left_to_transact = total_volume;
-    for buy_order in buy_orders.iter_mut() {
-        let filled_amount = std::cmp::min(volume_left_to_transact, *buy_order);
-        *buy_order = filled_amount;
-        volume_left_to_transact -= filled_amount;
-    }
+    fill_orders(sell_orders, total_volume);
+    fill_orders(buy_orders, total_volume);
 }
 ```

@@ -155,15 +153,17 @@ Now, we can start implementing the algorithm with FHE:
 1. Calculate the total sell volume and the total buy volume.

 ```rust
-let mut total_sell_volume = server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS);
-for sell_order in sell_orders.iter_mut() {
-    server_key.smart_add_assign(&mut total_sell_volume, sell_order);
+fn vector_sum(server_key: &ServerKey, orders: &mut [RadixCiphertext]) -> RadixCiphertext {
+    let mut total_volume = server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS);
+    for order in orders {
+        server_key.smart_add_assign(&mut total_volume, order);
+    }
+    total_volume
 }

-let mut total_buy_volume = server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS);
-for buy_order in buy_orders.iter_mut() {
-    server_key.smart_add_assign(&mut total_buy_volume, buy_order);
-}
+let mut total_sell_volume = vector_sum(server_key, sell_orders);
+let mut total_buy_volume = vector_sum(server_key, buy_orders);
+
 ```

 2. Find the total volume that will be transacted by taking the minimum of the total sell volume and the total buy
@@ -177,17 +177,21 @@ let total_volume = server_key.smart_min(&mut total_sell_volume, &mut total_buy_v
 reduce code duplication since the code for filling buy orders and sell orders are the same.

 ```rust
-let fill_orders = |orders: &mut [RadixCiphertext]| {
-    let mut volume_left_to_transact = total_volume.clone();
-    for mut order in orders.iter_mut() {
-        let mut filled_amount = server_key.smart_min(&mut volume_left_to_transact, &mut order);
+fn fill_orders(
+    server_key: &ServerKey,
+    orders: &mut [RadixCiphertext],
+    total_volume: RadixCiphertext,
+) {
+    let mut volume_left_to_transact = total_volume;
+    for order in orders {
+        let mut filled_amount = server_key.smart_min(&mut volume_left_to_transact, order);
        server_key.smart_sub_assign(&mut volume_left_to_transact, &mut filled_amount);
        *order = filled_amount;
    }
-};
+}

-fill_orders(sell_orders);
-fill_orders(buy_orders);
+fill_orders(server_key, sell_orders, total_volume.clone());
+fill_orders(server_key, buy_orders, total_volume);
 ```

 #### The complete algorithm in TFHE-rs:
@@ -195,36 +199,40 @@ fill_orders(buy_orders);
 ```rust
 const NUMBER_OF_BLOCKS: usize = 8;

-fn volume_match_fhe(
+fn vector_sum(server_key: &ServerKey, orders: &mut [RadixCiphertext]) -> RadixCiphertext {
+    let mut total_volume = server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS);
+    for order in orders {
+        server_key.smart_add_assign(&mut total_volume, order);
+    }
+    total_volume
+}
+
+fn fill_orders(
+    server_key: &ServerKey,
+    orders: &mut [RadixCiphertext],
+    total_volume: RadixCiphertext,
+) {
+    let mut volume_left_to_transact = total_volume;
+    for order in orders {
+        let mut filled_amount = server_key.smart_min(&mut volume_left_to_transact, order);
+        server_key.smart_sub_assign(&mut volume_left_to_transact, &mut filled_amount);
+        *order = filled_amount;
+    }
+}
+
+pub fn volume_match(
    sell_orders: &mut [RadixCiphertext],
    buy_orders: &mut [RadixCiphertext],
    server_key: &ServerKey,
 ) {
-    let mut total_sell_volume = server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS);
-    for sell_order in sell_orders.iter_mut() {
-        server_key.smart_add_assign(&mut total_sell_volume, sell_order);
-    }
-
-    let mut total_buy_volume = server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS);
-    for buy_order in buy_orders.iter_mut() {
-        server_key.smart_add_assign(&mut total_buy_volume, buy_order);
-    }
+    let mut total_sell_volume = vector_sum(server_key, sell_orders);
+    let mut total_buy_volume = vector_sum(server_key, buy_orders);

    let total_volume = server_key.smart_min(&mut total_sell_volume, &mut total_buy_volume);

-    let fill_orders = |orders: &mut [RadixCiphertext]| {
-        let mut volume_left_to_transact = total_volume.clone();
-        for mut order in orders.iter_mut() {
-            let mut filled_amount = server_key.smart_min(&mut volume_left_to_transact, &mut order);
-            server_key.smart_sub_assign(&mut volume_left_to_transact, &mut filled_amount);
-            *order = filled_amount;
-        }
-    };
-
-    fill_orders(sell_orders);
-    fill_orders(buy_orders);
+    fill_orders(server_key, sell_orders, total_volume.clone());
+    fill_orders(server_key, buy_orders, total_volume);
 }
-
 ```

 ### Optimizing the implementation
@@ -235,63 +243,73 @@ fn volume_match_fhe(

 * We can parallelize vector sum with Rayon and `reduce` operation.
 ```rust
-let parallel_vector_sum = |vec: &mut [RadixCiphertext]| {
-    vec.to_vec().into_par_iter().reduce(
+fn vector_sum(server_key: &ServerKey, orders: Vec<RadixCiphertext>) -> RadixCiphertext {
+    orders.into_par_iter().reduce(
        || server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS),
-        |mut acc: RadixCiphertext, mut ele: RadixCiphertext| { 
+        |mut acc: RadixCiphertext, mut ele: RadixCiphertext| {
            server_key.smart_add_parallelized(&mut acc, &mut ele)
        },
    )
-};
+}
 ```

 * We can run vector summation on `buy_orders` and `sell_orders` in parallel since these operations do not depend on each other.
 ```rust
-let (mut total_sell_volume, mut total_buy_volume) =
-    rayon::join(|| vector_sum(sell_orders), || vector_sum(buy_orders));
+let (mut total_sell_volume, mut total_buy_volume) = rayon::join(
+    || vector_sum(server_key, sell_orders.to_owned()),
+    || vector_sum(server_key, buy_orders.to_owned()),
+);
 ```

 * We can match sell and buy orders in parallel since the matching does not depend on each other.
 ```rust
-rayon::join(|| fill_orders(sell_orders), || fill_orders(buy_orders));
+rayon::join(
+    || fill_orders(server_key, sell_orders, total_volume.clone()),
+    || fill_orders(server_key, buy_orders, total_volume.clone()),
+);
 ```

 #### Optimized algorithm
 ```rust
-fn volume_match_fhe_parallelized(
+fn vector_sum(server_key: &ServerKey, orders: Vec<RadixCiphertext>) -> RadixCiphertext {
+    orders.into_par_iter().reduce(
+        || server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS),
+        |mut acc: RadixCiphertext, mut ele: RadixCiphertext| {
+            server_key.smart_add_parallelized(&mut acc, &mut ele)
+        },
+    )
+}
+
+fn fill_orders(
+    server_key: &ServerKey,
+    orders: &mut [RadixCiphertext],
+    total_volume: RadixCiphertext,
+) {
+    let mut volume_left_to_transact = total_volume;
+    for order in orders {
+        let mut filled_amount =
+            server_key.smart_min_parallelized(&mut volume_left_to_transact, order);
+        server_key.smart_sub_assign_parallelized(&mut volume_left_to_transact, &mut filled_amount);
+        *order = filled_amount;
+    }
+}
+
+pub fn volume_match(
    sell_orders: &mut [RadixCiphertext],
    buy_orders: &mut [RadixCiphertext],
    server_key: &ServerKey,
 ) {
-    let parallel_vector_sum = |vec: &mut [RadixCiphertext]| {
-        vec.to_vec().into_par_iter().reduce(
-            || server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS),
-            |mut acc: RadixCiphertext, mut ele: RadixCiphertext| {
-                server_key.smart_add_parallelized(&mut acc, &mut ele)
-            },
-        )
-    };
-
    let (mut total_sell_volume, mut total_buy_volume) = rayon::join(
-        || parallel_vector_sum(sell_orders),
-        || parallel_vector_sum(buy_orders),
+        || vector_sum(server_key, sell_orders.to_owned()),
+        || vector_sum(server_key, buy_orders.to_owned()),
    );

    let total_volume =
        server_key.smart_min_parallelized(&mut total_sell_volume, &mut total_buy_volume);
-
-    let fill_orders = |orders: &mut [RadixCiphertext]| {
-        let mut volume_left_to_transact = total_volume.clone();
-        for mut order in orders.iter_mut() {
-            let mut filled_amount =
-                server_key.smart_min_parallelized(&mut volume_left_to_transact, &mut order);
-            server_key
-                .smart_sub_assign_parallelized(&mut volume_left_to_transact, &mut filled_amount);
-            *order = filled_amount;
-        }
-    };
-    
-    rayon::join(|| fill_orders(sell_orders), || fill_orders(buy_orders));
+    rayon::join(
+        || fill_orders(server_key, sell_orders, total_volume.clone()),
+        || fill_orders(server_key, buy_orders, total_volume.clone()),
+    );
 }
 ```

@@ -312,14 +330,19 @@ We will call the new list the "prefix sum" of the array.

 The new version for the plain `fill_orders` is as follows:
 ```rust
-let fill_orders = |orders: &mut [u64], prefix_sum: &[u64], total_orders: u64|{
+fn fill_orders(total_orders: u16, orders: &mut [u16], prefix_sum_arr: &[u16]) {
    orders.iter().for_each(|order : &mut u64| {
-        if (total_orders >= prefix_sum[i]) {
-            continue;
-        } else if total_orders >= prefix_sum.get(i-1).unwrap_or(0) {
-            *order = total_orders - prefix_sum.get(i-1).unwrap_or(0);
-        } else {
+        let previous_prefix_sum = if i == 0 { 0 } else { prefix_sum_arr[i - 1] };
+        
+        let diff = total_orders as i64 - previous_prefix_sum as i64;
+        
+        if (diff < 0) {
            *order = 0;
+        } else if diff < order {
+            *order = diff as u16;
+        } else {
+            // *order = *order;
+            continue;
        }
    });
 };
@@ -328,11 +351,15 @@ let fill_orders = |orders: &mut [u64], prefix_sum: &[u64], total_orders: u64|{
 To write this new function we need transform the conditional code into a mathematical expression since FHE does not support conditional operations.
 ```rust

-let fill_orders = |orders: &mut [u64], prefix_sum: &[u64], total_orders: u64| {
-    orders.iter().for_each(|order| : &mut){
-        *order = *order + ((total_orders >= prefix_sum - std::cmp::min(total_orders, prefix_sum.get(i - 1).unwrap_or(&0).clone()) - *order);
+fn fill_orders(total_orders: u16, orders: &mut [u16], prefix_sum_arr: &[u16]) {
+    for (i, order) in orders.iter_mut().enumerate() {
+        let previous_prefix_sum = if i == 0 { 0 } else { prefix_sum_arr[i - 1] };
+
+        *order = (total_orders as i64 - previous_prefix_sum as i64)
+            .max(0)
+            .min(*order as i64) as u16;
    }
-};
+}
 ```

 New `fill_order` function requires a prefix sum array. We are going to calculate this prefix sum array in parallel 
@@ -345,108 +372,129 @@ So we modify how the algorithm is implemented, but we don't change the algorithm

 Here is the modified version of the algorithm in TFHE-rs:
 ```rust
-fn volume_match_fhe_modified(
+fn compute_prefix_sum(server_key: &ServerKey, arr: &[RadixCiphertext]) -> Vec<RadixCiphertext> {
+    if arr.is_empty() {
+        return arr.to_vec();
+    }
+    let mut prefix_sum: Vec<RadixCiphertext> = (0..arr.len().next_power_of_two())
+        .into_par_iter()
+        .map(|i| {
+            if i < arr.len() {
+                arr[i].clone()
+            } else {
+                server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS)
+            }
+        })
+        .collect();
+    for d in 0..prefix_sum.len().ilog2() {
+        prefix_sum
+            .par_chunks_exact_mut(2_usize.pow(d + 1))
+            .for_each(move |chunk| {
+                let length = chunk.len();
+                let mut left = chunk.get((length - 1) / 2).unwrap().clone();
+                server_key.smart_add_assign_parallelized(chunk.last_mut().unwrap(), &mut left)
+            });
+    }
+    let last = prefix_sum.last().unwrap().clone();
+    *prefix_sum.last_mut().unwrap() = server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS);
+    for d in (0..prefix_sum.len().ilog2()).rev() {
+        prefix_sum
+            .par_chunks_exact_mut(2_usize.pow(d + 1))
+            .for_each(move |chunk| {
+                let length = chunk.len();
+                let temp = chunk.last().unwrap().clone();
+                let mut mid = chunk.get((length - 1) / 2).unwrap().clone();
+                server_key.smart_add_assign_parallelized(chunk.last_mut().unwrap(), &mut mid);
+                chunk[(length - 1) / 2] = temp;
+            });
+    }
+    prefix_sum.push(last);
+    prefix_sum[1..=arr.len()].to_vec()
+}
+
+fn fill_orders(
+    server_key: &ServerKey,
+    total_orders: &RadixCiphertext,
+    orders: &mut [RadixCiphertext],
+    prefix_sum_arr: &[RadixCiphertext],
+) {
+    orders
+        .into_par_iter()
+        .enumerate()
+        .for_each(move |(i, order)| {
+            // (total_orders - previous_prefix_sum).max(0)
+            let mut diff = if i == 0 {
+                total_orders.clone()
+            } else {
+                let previous_prefix_sum = &prefix_sum_arr[i - 1];
+
+                // total_orders - previous_prefix_sum
+                let mut diff = server_key.smart_sub_parallelized(
+                    &mut total_orders.clone(),
+                    &mut previous_prefix_sum.clone(),
+                );
+
+                // total_orders > prefix_sum
+                let mut cond = server_key.smart_gt_parallelized(
+                    &mut total_orders.clone(),
+                    &mut previous_prefix_sum.clone(),
+                );
+
+                // (total_orders - previous_prefix_sum) * (total_orders > previous_prefix_sum)
+                // = (total_orders - previous_prefix_sum).max(0)
+                server_key.smart_mul_parallelized(&mut cond, &mut diff)
+            };
+
+            // (total_orders - previous_prefix_sum).max(0).min(*order);
+            *order = server_key.smart_min_parallelized(&mut diff, order);
+        });
+}
+
+/// FHE implementation of the volume matching algorithm.
+///
+/// In this function, the implemented algorithm is modified to utilize more concurrency.
+///
+/// Matches the given encrypted [sell_orders] with encrypted [buy_orders] using the given
+/// [server_key]. The amount of the orders that are successfully filled is written over the original
+/// order count.
+pub fn volume_match(
    sell_orders: &mut [RadixCiphertext],
    buy_orders: &mut [RadixCiphertext],
    server_key: &ServerKey,
 ) {
-    let compute_prefix_sum = |arr: &[RadixCiphertext]| {
-        if arr.is_empty() {
-            return arr.to_vec();
-        }
-        let mut prefix_sum: Vec<RadixCiphertext> = (0..arr.len().next_power_of_two())
-            .into_par_iter()
-            .map(|i| {
-                if i < arr.len() {
-                    arr[i].clone()
-                } else {
-                    server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS)
-                }
-            })
-            .collect();
-        // Up sweep
-        for d in 0..(prefix_sum.len().ilog2() as u32) {
-            prefix_sum
-                .par_chunks_exact_mut(2_usize.pow(d + 1))
-                .for_each(move |chunk| {
-                    let length = chunk.len();
-                    let mut left = chunk.get((length - 1) / 2).unwrap().clone();
-                    server_key.smart_add_assign_parallelized(chunk.last_mut().unwrap(), &mut left)
-                });
-        }
-        // Down sweep
-        let last = prefix_sum.last().unwrap().clone();
-        *prefix_sum.last_mut().unwrap() = server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS);
-        for d in (0..(prefix_sum.len().ilog2() as u32)).rev() {
-            prefix_sum
-                .par_chunks_exact_mut(2_usize.pow(d + 1))
-                .for_each(move |chunk| {
-                    let length = chunk.len();
-                    let t = chunk.last().unwrap().clone();
-                    let mut left = chunk.get((length - 1) / 2).unwrap().clone();
-                    server_key.smart_add_assign_parallelized(chunk.last_mut().unwrap(), &mut left);
-                    chunk[(length - 1) / 2] = t;
-                });
-        }
-        prefix_sum.push(last);
-        prefix_sum[1..=arr.len()].to_vec()
-    };
-
    println!("Creating prefix sum arrays...");
    let time = Instant::now();
    let (prefix_sum_sell_orders, prefix_sum_buy_orders) = rayon::join(
-        || compute_prefix_sum(sell_orders),
-        || compute_prefix_sum(buy_orders),
+        || compute_prefix_sum(server_key, sell_orders),
+        || compute_prefix_sum(server_key, buy_orders),
    );
    println!("Created prefix sum arrays in {:?}", time.elapsed());

-    let fill_orders = |total_orders: &RadixCiphertext,
-                        orders: &mut [RadixCiphertext],
-                        prefix_sum_arr: &[RadixCiphertext]| {
-        orders
-            .into_par_iter()
-            .enumerate()
-            .for_each(move |(i, order)| {
-                server_key.smart_add_assign_parallelized(
-                    order,
-                    &mut server_key.smart_mul_parallelized(
-                        &mut server_key
-                            .smart_ge_parallelized(&mut order.clone(), &mut total_orders.clone()),
-                        &mut server_key.smart_sub_parallelized(
-                            &mut server_key.smart_sub_parallelized(
-                                &mut total_orders.clone(),
-                                &mut server_key.smart_min_parallelized(
-                                    &mut total_orders.clone(),
-                                    &mut prefix_sum_arr
-                                        .get(i - 1)
-                                        .unwrap_or(
-                                            &server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS),
-                                        )
-                                        .clone(),
-                                ),
-                            ),
-                            &mut order.clone(),
-                        ),
-                    ),
-                );
-            });
-    };
+    let zero = server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS);

-    let total_buy_orders = &mut prefix_sum_buy_orders
-        .last()
-        .unwrap_or(&server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS))
-        .clone();
+    let total_buy_orders = prefix_sum_buy_orders.last().unwrap_or(&zero);

-    let total_sell_orders = &mut prefix_sum_sell_orders
-        .last()
-        .unwrap_or(&server_key.create_trivial_zero_radix(NUMBER_OF_BLOCKS))
-        .clone();
+    let total_sell_orders = prefix_sum_sell_orders.last().unwrap_or(&zero);

    println!("Matching orders...");
    let time = Instant::now();
    rayon::join(
-        || fill_orders(total_sell_orders, buy_orders, &prefix_sum_buy_orders),
-        || fill_orders(total_buy_orders, sell_orders, &prefix_sum_sell_orders),
+        || {
+            fill_orders(
+                server_key,
+                total_sell_orders,
+                buy_orders,
+                &prefix_sum_buy_orders,
+            )
+        },
+        || {
+            fill_orders(
+                server_key,
+                total_buy_orders,
+                sell_orders,
+                &prefix_sum_sell_orders,
+            )
+        },
    );
    println!("Matched orders in {:?}", time.elapsed());
 }
--- a/tfhe/docs/application_tutorials/sha256_bool.md
+++ b/tfhe/docs/application_tutorials/sha256_bool.md
@@ -17,11 +17,8 @@ The sha256 function processes the input data in blocks or chunks of 512 bits. Be

 Or visually:

-```
-0                                   L   L+1                              L+1+k                  L+1+k+64
-|-----------------------------------|---|--------------------------------|----------------------|
-    Original input (L bits)        "1" bit          "0" bits             Encoding of the number L
-```
+![](../_static/sha256.png)
+
 Where the numbers on the top represent the length of the padded input at each position, and L+1+k+64 is a multiple of 512 (the length of the padded input).

 #### Operations and functions
@@ -63,7 +60,7 @@ Note that all these operations can be evaluated homomorphically. ROTR and SHR ca

 #### Sha256 computation

-As we have mentioned, the sha256 function works with chunks of 512 bits. For each chunk, we will compute 64 32-bit words. 16 will come from the 512 bits and the rest will be computed using the previous functions. After computing the 64 words, and still within the same chunk iteration, a compression loop will compute a hash value (8 32-bit words), again using the previous functions and some constants to mix everything up. When we finish the last chunk iteration, the resulting hash values will be the output of the sha256 function. 
+As we have mentioned, the sha256 function works with chunks of 512 bits. For each chunk, we will compute 64 32-bit words. 16 will come from the 512 bits and the rest will be computed using the previous functions. After computing the 64 words, and still within the same chunk iteration, a compression loop will compute a hash value (8 32-bit words), again using the previous functions and some constants to mix everything up. When we finish the last chunk iteration, the resulting hash values will be the output of the sha256 function.

 Here is how this function looks like using arrays of 32 bools to represent words:

@@ -139,7 +136,11 @@ fn sha256(padded_input: Vec<bool>) -> [bool; 256] {

 The key idea is that we can replace each bit of ```padded_input``` with a Fully Homomorphic Encryption of the same bit value, and operate over the encrypted values using homomorphic operations. To achieve this we need to change the function signatures and deal with the borrowing rules of the Ciphertext type (which represents an encrypted bit) but the structure of the sha256 function remains the same. The part of the code that requires more consideration is the implementation of the sha256 operations, since they will use homomorphic boolean operations internally.

-Homomorphic operations are really expensive, so we have to remove their unnecessary use and maximize parallelization in order to speed up the program. To simplify our code we use the Rayon crate which provides parallel iterators and efficiently manages threads. Let's now take a look at each sha256 operation!
+Homomorphic operations are really expensive, so we have to remove their unnecessary use and maximize parallelization in order to speed up the program. To simplify our code we use the Rayon crate which provides parallel iterators and efficiently manages threads.
+
+The final code is available at https://github.com/zama-ai/tfhe-rs/tree/main/tfhe/examples/sha256_bool
+
+Let's now take a look at each sha256 operation!

 #### Rotate Right and Shift Right

@@ -317,6 +318,6 @@ By using ```stdin``` we can supply the data to hash using a file instead of the

 Our implementation also accepts hexadecimal inputs. To be considered as such, the input must start with "0x" and contain only valid hex digits (otherwise it's interpreted as text).

-Finally see that padding is executed on the client side. This has the advantage of hiding the exact length of the input to the server, who already doesn't know anything about the contents of it but may extract information from the length. 
+Finally see that padding is executed on the client side. This has the advantage of hiding the exact length of the input to the server, who already doesn't know anything about the contents of it but may extract information from the length.

 Another option would be to perform padding on the server side. The padding function would receive the encrypted input and pad it with trivial bit encryptions. We could then integrate the padding function inside the ```sha256_fhe``` function computed by the server.
--- a/tfhe/docs/core_crypto/tutorial.md
+++ b/tfhe/docs/core_crypto/tutorial.md
@@ -9,7 +9,7 @@ Welcome to this tutorial about `TFHE-rs` `core_crypto` module.
 To use `TFHE-rs`, it first has to be added as a dependency in the `Cargo.toml`:

 ```toml
-tfhe = { version = "0.3.0", features = [ "x86_64-unix" ] }
+tfhe = { version = "0.4.0", features = [ "x86_64-unix" ] }
 ```

 This enables the `x86_64-unix` feature to have efficient implementations of various algorithms for `x86_64` CPUs on a Unix-like system. The 'unix' suffix indicates that the `UnixSeeder`, which uses `/dev/random` to generate random numbers, is activated as a fallback if no hardware number generator is available (like `rdseed` on `x86_64` or if the [`Randomization Services`](https://developer.apple.com/documentation/security/1399291-secrandomcopybytes?language=objc) on Apple platforms are not available). To avoid having the `UnixSeeder` as a potential fallback or to run on non-Unix systems (e.g., Windows), the `x86_64` feature is sufficient.
@@ -19,19 +19,19 @@ For Apple Silicon, the `aarch64-unix` or `aarch64` feature should be enabled. `a
 In short: For `x86_64`-based machines running Unix-like OSes:

 ```toml
-tfhe = { version = "0.3.0", features = ["x86_64-unix"] }
+tfhe = { version = "0.4.0", features = ["x86_64-unix"] }
 ```

 For Apple Silicon or aarch64-based machines running Unix-like OSes:

 ```toml
-tfhe = { version = "0.3.0", features = ["aarch64-unix"] }
+tfhe = { version = "0.4.0", features = ["aarch64-unix"] }
 ```

 For `x86_64`-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) running Windows:

 ```toml
-tfhe = { version = "0.3.0", features = ["x86_64"] }
+tfhe = { version = "0.4.0", features = ["x86_64"] }
 ```

 ### Commented code to double a 2-bit message in a leveled fashion and using a PBS with the `core_crypto` module.
--- a/tfhe/docs/fine_grained_api/Boolean/parameters.md
+++ b/tfhe/docs/fine_grained_api/Boolean/parameters.md
@@ -38,6 +38,7 @@ fn main() {
            DecompositionLevelCount(2),
            DecompositionBaseLog(2),
            DecompositionLevelCount(5),
+            EncryptionKeyChoice::Small,
        )
    };
 }
--- a/Show More
+++ b/Show More