refactor(gpu): AES 64

2026-01-11 15:48:20 -05:00 · 2025-10-28 10:42:13 +01:00
273 changed files with 4553 additions and 358699 deletions
--- a/.github/dependabot.yaml
+++ b/.github/dependabot.yaml
@@ -7,5 +7,3 @@ updates:
      # Check for updates to GitHub Actions every sunday
      interval: "weekly"
      day: "sunday"
-    cooldown:
-      default-days: 7
--- a/.github/workflows/approve_label.yml
+++ b/.github/workflows/approve_label.yml
@@ -9,14 +9,12 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] this workflow needs to react to any event in a pull-request
-
 jobs:
  trigger-tests:
    name: approve_label/trigger-tests
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write # Needed to apply or remove label
+      pull-requests: write
    steps:
      - name: Get current labels
        uses: snnaplab/get-labels-action@f426df40304808ace3b5282d4f036515f7609576
--- a/.github/workflows/aws_tfhe_backward_compat_tests.yml
+++ b/.github/workflows/aws_tfhe_backward_compat_tests.yml
@@ -29,8 +29,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  setup-instance:
    name: aws_tfhe_backward_compat_tests/setup-instance
@@ -60,10 +58,10 @@ jobs:
  backward-compat-tests:
    name: aws_tfhe_backward_compat_tests/backward-compat-tests (bpr)
    needs: [ setup-instance ]
-    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    concurrency:
      group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -27,14 +27,12 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: aws_tfhe_fast_tests/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read # Needed to check for file change
+      pull-requests: read
    outputs:
      csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
      zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -33,8 +33,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: aws_tfhe_integer_tests/should-run
@@ -44,7 +42,7 @@ jobs:
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      integer_test: ${{ github.event_name == 'workflow_dispatch' ||
        steps.changed-files.outputs.integer_any_changed }}
--- a/.github/workflows/aws_tfhe_noise_checks.yml
+++ b/.github/workflows/aws_tfhe_noise_checks.yml
@@ -23,8 +23,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  setup-instance:
    name: aws_tfhe_noise_checks/setup-instance
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -33,8 +33,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: aws_tfhe_signed_integer_tests/should-run
@@ -45,7 +43,7 @@ jobs:
      github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      integer_test: ${{ github.event_name == 'workflow_dispatch' ||
        steps.changed-files.outputs.integer_any_changed }}
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -30,8 +30,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: aws_tfhe_tests/should-run
@@ -39,7 +37,7 @@ jobs:
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
      zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -26,8 +26,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  setup-instance:
    name: aws_tfhe_wasm_tests/setup-instance
@@ -59,7 +57,7 @@ jobs:
    name: aws_tfhe_wasm_tests/wasm-tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow_ref }}_${{github.event_name}}
+      group: ${{ github.workflow_ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
--- a/.github/workflows/benchmark_boolean.yml
+++ b/.github/workflows/benchmark_boolean.yml
@@ -0,0 +1,156 @@
+# Run boolean benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: benchmark_boolean
+
+on:
+  workflow_dispatch:
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 1a.m.
+    - cron: '0 1 * * 6'
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+
+permissions: {}
+
+jobs:
+  setup-instance:
+    name: benchmark_boolean/setup-instance
+    runs-on: ubuntu-latest
+    if: github.event_name != 'schedule' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          profile: bench
+
+  boolean-benchmarks:
+    name: benchmark_boolean/boolean-benchmarks
+    needs: setup-instance
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    concurrency:
+      group: ${{ github.workflow_ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    steps:
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Get benchmark details
+        run: |
+          COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=${COMMIT_DATE}";
+            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+          } >> "${GITHUB_ENV}"
+        env:
+          SHA: ${{ github.sha }}
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: nightly
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make bench_boolean
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
+          --database tfhe_rs \
+          --hardware "hpc7a.96xlarge" \
+          --project-version "${COMMIT_HASH}" \
+          --branch "${REF_NAME}" \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${BENCH_DATE}" \
+          --walk-subdirs \
+          --name-suffix avx512
+        env:
+          REF_NAME: ${{ github.ref_name }}
+
+      - name: Measure key sizes
+        run: |
+          make measure_boolean_key_sizes
+
+      - name: Parse key sizes results
+        run: |
+          python3 ./ci/benchmark_parser.py tfhe-benchmark/boolean_key_sizes.csv "${RESULTS_FILENAME}" \
+          --object-sizes \
+          --append-results
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        with:
+          name: ${{ github.sha }}_boolean
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
+          --slab-url "${SLAB_URL}"
+        env:
+          JOB_SECRET: ${{ secrets.JOB_SECRET }}
+          SLAB_URL: ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Boolean benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-instance:
+    name: benchmark_boolean/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, boolean-benchmarks ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (boolean-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_core_crypto.yml
+++ b/.github/workflows/benchmark_core_crypto.yml
@@ -0,0 +1,234 @@
+# Run core crypto benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: benchmark_core_crypto
+
+on:
+  workflow_dispatch:
+    inputs:
+      param_type:
+        description: "Parameters type"
+        type: choice
+        default: classical
+        options:
+          - classical
+          - multi_bit
+          - classical + multi_bit
+          - classical_documentation
+          - multi_bit_documentation
+          - classical_documentation + multi_bit_documentation
+      bench_type:
+        description: "Benchmarks type"
+        type: choice
+        default: latency
+        options:
+          - latency
+          - throughput
+          - both
+
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 5a.m.
+    - cron: '0 5 * * 6'
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+
+permissions: {}
+
+jobs:
+  prepare-matrix:
+    name: benchmark_core_crypto/prepare-matrix
+    runs-on: ubuntu-latest
+    if: github.event_name != 'schedule' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
+    outputs:
+      param_type: ${{ steps.set_param_type.outputs.param_type }}
+      bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
+    steps:
+      - name: Set parameters types
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          if [[ "${INPUTS_PARAM_TYPE}" == "classical + multi_bit" ]]; then
+            echo "PARAM_TYPE=[\"classical\", \"multi_bit\"]" >> "${GITHUB_ENV}"
+          elif [[ "${INPUTS_PARAM_TYPE}" == "classical_documentation + multi_bit_documentation" ]]; then
+            echo "PARAM_TYPE=[\"classical_documentation\", \"multi_bit_documentation\"]" >> "${GITHUB_ENV}"
+          else
+            echo "PARAM_TYPE=[\"${INPUTS_PARAM_TYPE}\"]" >> "${GITHUB_ENV}"
+          fi
+        env:
+          INPUTS_PARAM_TYPE: ${{ inputs.param_type }}
+
+      - name: Default parameters type
+        if: github.event_name != 'workflow_dispatch'
+        run: |
+          echo "PARAM_TYPE=[\"classical\"]" >> "${GITHUB_ENV}"
+
+      - name: Set benchmark types
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}"
+          if [[ "${INPUTS_BENCH_TYPE}" == "both" ]]; then
+            echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
+          else
+            echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}"
+          fi
+        env:
+          INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}
+
+      - name: Default benchmark type
+        if: github.event_name != 'workflow_dispatch'
+        run: |
+          echo "BENCH_TYPE=[\"latency\"]" >> "${GITHUB_ENV}"
+
+      - name: Set parameters types output
+        id: set_param_type
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "param_type=${{ toJSON(env.PARAM_TYPE) }}" >> "${GITHUB_OUTPUT}"
+
+      - name: Set benchmark types output
+        id: set_bench_type
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
+
+  setup-instance:
+    name: benchmark_core_crypto/setup-instance
+    needs: prepare-matrix
+    runs-on: ubuntu-latest
+    if: github.event_name != 'schedule' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          profile: bench
+
+  core-crypto-benchmarks:
+    name: benchmark_core_crypto/core-crypto-benchmarks
+    needs: [ prepare-matrix, setup-instance ]
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    concurrency:
+      group: ${{ github.workflow_ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    timeout-minutes: 1440  # 24 hours
+    strategy:
+      max-parallel: 1
+      matrix:
+        param_type: ${{ fromJSON(needs.prepare-matrix.outputs.param_type) }}
+        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
+    steps:
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Get benchmark details
+        run: |
+          COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=${COMMIT_DATE}";
+            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+          } >> "${GITHUB_ENV}"
+        env:
+          SHA: ${{ github.sha }}
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: nightly
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make bench_ks_pbs
+          make bench_pbs
+          make bench_pbs128
+          make bench_ks
+        env:
+          BENCH_PARAM_TYPE: ${{ matrix.param_type }}
+          BENCH_TYPE: ${{ matrix.bench_type }}
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
+          --database tfhe_rs \
+          --hardware "hpc7a.96xlarge" \
+          --project-version "${COMMIT_HASH}" \
+          --branch "${REF_NAME}" \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${BENCH_DATE}" \
+          --name-suffix avx512 \
+          --walk-subdirs
+        env:
+          REF_NAME: ${{ github.ref_name }}
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        with:
+          name: ${{ github.sha }}_core_crypto_${{ matrix.param_type }}_pbs
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
+          --slab-url "${SLAB_URL}"
+        env:
+          JOB_SECRET: ${{ secrets.JOB_SECRET }}
+          SLAB_URL: ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "PBS benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-instance:
+    name: benchmark_core_crypto/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, core-crypto-benchmarks ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (core-crypto-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_cpu.yml
+++ b/.github/workflows/benchmark_cpu.yml
@@ -1,87 +0,0 @@
-# Run benchmarks on an AWS instance and return parsed results to Slab CI bot.
-name: benchmark_cpu
-
-on:
-  workflow_dispatch:
-    inputs:
-      command:
-        description: "Benchmark command to run"
-        type: choice
-        options:
-          - integer
-          - signed_integer
-          - integer_compression
-          - integer_zk
-          - shortint
-          - shortint_oprf
-          - hlapi
-          - hlapi_erc20
-          - hlapi_dex
-          - hlapi_noise_squash
-          - tfhe_zk_pok
-          - boolean
-          - pbs
-          - pbs128
-          - ks
-          - ks_pbs
-      op_flavor:
-        description: "Operations set to run"
-        type: choice
-        default: default
-        options:
-          - default
-          - fast_default
-          - smart
-          - unchecked
-          - misc
-      precisions_set:
-        description: "Bit precisions set"
-        type: choice
-        default: fast
-        options:
-          - fast
-          - all
-          - documentation
-      bench_type:
-        description: "Benchmarks type"
-        type: choice
-        default: latency
-        options:
-          - latency
-          - throughput
-          - both
-      params_type:
-        description: "Parameters type"
-        type: choice
-        default: classical
-        options:
-          - classical
-          - multi_bit
-          - classical + multi_bit
-          - classical_documentation
-          - multi_bit_documentation
-          - classical_documentation + multi_bit_documentation
-
-permissions: {}
-
-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
-jobs:
-  run-benchmarks:
-    name: benchmark_cpu/run-benchmarks
-    uses: ./.github/workflows/benchmark_cpu_common.yml
-    with:
-      command: ${{ inputs.command }}
-      op_flavor: ${{ inputs.op_flavor }}
-      bench_type: ${{ inputs.bench_type }}
-      params_type: ${{ inputs.params_type }}
-      precisions_set: ${{ inputs.precisions_set }}
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
--- a/.github/workflows/benchmark_cpu_weekly.yml
+++ b/.github/workflows/benchmark_cpu_weekly.yml
@@ -1,222 +0,0 @@
-# Run CPU latencies benchmarks AWS VMs and return parsed results to Slab CI bot.
-name: benchmark_cpu_weekly
-
-on:
-  schedule:
-    # Weekly schedules are separated in two groups to avoid spawning too many the machines at once thus risking resource shortages.
-    # Group 1
-    # -------
-    # Weekly benchmarks will be triggered each Saturday at 1a.m.
-    - cron: '0 1 * * 6'
-    # Group 2
-    # -------
-    # Weekly benchmarks will be triggered each Sunday at 3a.m.
-    - cron: '0 3 * * 0'
-
-    # Quarterly benchmarks will be triggered right before the end of the quarter, the 25th of the current month at 4a.m.
-    # These benchmarks are far longer to execute, hence the reason to run them only four times a year.
-    - cron: '0 4 25 MAR,JUN,SEP,DEC *'
-
-permissions: {}
-
-# zizmor: ignore[concurrency-limits] only GitHub can trigger this workflow
-
-jobs:
-  prepare-inputs:
-    name: benchmark_cpu_weekly/prepare-inputs
-    runs-on: ubuntu-latest
-    outputs:
-      is_weekly_bench_group_1: ${{ steps.check_bench_group_1.outputs.is_weekly_bench_group_1 }}
-      is_weekly_bench_group_2: ${{ steps.check_bench_group_2.outputs.is_weekly_bench_group_2 }}
-      is_quarterly_bench: ${{ steps.check_quarterly_bench.outputs.is_quarterly_bench }}
-      op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
-      precisions_set: ${{ steps.set_precisions_set.outputs.precisions_set }}
-    steps:
-      - name: Check is weekly bench group 1
-        id: check_bench_group_1
-        run: | # zizmor: ignore[template-injection] this env variable is safe
-          echo "is_weekly_bench_group_1=${{ github.event.schedule == '0 1 * * 6' }}" >> "${GITHUB_OUTPUT}"
-
-      - name: Check is weekly bench group 2
-        id: check_bench_group_2
-        run: | # zizmor: ignore[template-injection] this env variable is safe
-          echo "is_weekly_bench_group_2=${{ github.event.schedule == '0 3 * * 0' }}" >> "${GITHUB_OUTPUT}"
-
-      - name: Check is quarterly bench
-        id: check_quarterly_bench
-        run: | # zizmor: ignore[template-injection] this env variable is safe
-          echo "is_quarterly_bench=${{ github.event.schedule == '0 4 25 MAR,JUN,SEP,DEC *' }}" >> "${GITHUB_OUTPUT}"
-
-      - name: Weekly benchmarks
-        if: steps.check_bench_group_1.outputs.is_weekly_bench_group_1 || steps.check_bench_group_2.outputs.is_weekly_bench_group_2
-        run: |
-          echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}"
-          echo "PRECISIONS_SET=false" >> "${GITHUB_ENV}"
-
-      - name: Quarterly benchmarks
-        if: steps.check_quarterly_bench.outputs.is_quarterly_bench
-        run: |
-          echo "OP_FLAVOR=[\"default\", \"unchecked\"]" >> "${GITHUB_ENV}"
-          echo "PRECISIONS_SET=true" >> "${GITHUB_ENV}"
-
-      - name: Set operation flavor output
-        id: set_op_flavor
-        run: | # zizmor: ignore[template-injection] this env variable is safe
-          echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
-
-      - name: Set bit precisions output
-        id: set_precisions_set
-        run: | # zizmor: ignore[template-injection] this env variable is safe
-          echo "precisions_set=${{ toJSON(env.PRECISIONS_SET) }}" >> "${GITHUB_OUTPUT}"
-
-  run-benchmarks-integer:
-    name: benchmark_gpu_weekly/run-benchmarks-integer
-    if: github.repository == 'zama-ai/tfhe-rs' 
-      && (needs.prepare-inputs.outputs.is_weekly_bench_group_1 || needs.prepare-inputs.outputs.is_quarterly_bench)
-    needs: prepare-inputs
-    uses: ./.github/workflows/benchmark_cpu_common.yml
-    with:
-      command: integer,signed_integer, integer_compression
-      op_flavor: ${{ needs.prepare-inputs.outputs.op_flavor }}
-      precisions_set: ${{ needs.prepare-inputs.outputs.precisions_set }}
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-
-  run-benchmarks-integer-zk-pke:
-    name: benchmark_gpu_weekly/run-benchmarks-integer-zk-pke
-    if: github.repository == 'zama-ai/tfhe-rs'
-      && needs.prepare-inputs.outputs.is_weekly_bench_group_1
-    needs: prepare-inputs
-    uses: ./.github/workflows/benchmark_cpu_common.yml
-    with:
-      command: integer_zk
-      additional_file_to_parse: pke_zk_crs_sizes.csv
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-
-  run-benchmarks-hlapi-erc20:
-    name: benchmark_gpu_weekly/run-benchmarks-hlapi-erc20
-    if: github.repository == 'zama-ai/tfhe-rs'
-      && needs.prepare-inputs.outputs.is_weekly_bench_group_2
-    needs: prepare-inputs
-    uses: ./.github/workflows/benchmark_cpu_common.yml
-    with:
-      command: hlapi_erc20
-      additional_file_to_parse: erc20_pbs_count.csv
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-
-  run-benchmarks-hlapi-dex:
-    name: benchmark_gpu_weekly/run-benchmarks-hlapi-dex
-    if: github.repository == 'zama-ai/tfhe-rs'
-      && needs.prepare-inputs.outputs.is_weekly_bench_group_1
-    needs: prepare-inputs
-    uses: ./.github/workflows/benchmark_cpu_common.yml
-    with:
-      command: hlapi_dex
-      additional_file_to_parse: dex_swap_request_update_dex_balance_pbs_count.csv,dex_swap_request_finalize_pbs_count.csv,dex_swap_claim_prepare_pbs_count.csv,dex_swap_claim_update_dex_balance_pbs_count.csv
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-
-  run-benchmarks-core-crypto:
-    name: benchmark_gpu_weekly/run-benchmarks-core-crypto
-    if: github.repository == 'zama-ai/tfhe-rs'
-      && needs.prepare-inputs.outputs.is_weekly_bench_group_1
-    needs: prepare-inputs
-    uses: ./.github/workflows/benchmark_cpu_common.yml
-    with:
-      command: ks,pbs,pbs128,ks_pbs
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-
-  run-benchmarks-shortint:
-    name: benchmark_gpu_weekly/run-benchmarks-shortint
-    if: github.repository == 'zama-ai/tfhe-rs'
-      && (needs.prepare-inputs.outputs.is_weekly_bench_group_2 || needs.prepare-inputs.outputs.is_quarterly_bench)
-    needs: prepare-inputs
-    uses: ./.github/workflows/benchmark_cpu_common.yml
-    with:
-      op_flavor: ${{ needs.prepare-inputs.outputs.op_flavor }}
-      command: shortint
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-
-  run-benchmarks-boolean:
-    name: benchmark_gpu_weekly/run-benchmarks-boolean
-    if: github.repository == 'zama-ai/tfhe-rs'
-      && needs.prepare-inputs.outputs.is_weekly_bench_group_2
-    needs: prepare-inputs
-    uses: ./.github/workflows/benchmark_cpu_common.yml
-    with:
-      command: boolean
-      additional_recipe: measure_boolean_key_sizes
-      additional_file_to_parse: boolean_key_sizes.csv
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-
-  run-benchmarks-tfhe-zk-pok:
-    name: benchmark_gpu_weekly/run-benchmarks-tfhe-zk-pok
-    if: github.repository == 'zama-ai/tfhe-rs'
-      && needs.prepare-inputs.outputs.is_weekly_bench_group_1
-    needs: prepare-inputs
-    uses: ./.github/workflows/benchmark_cpu_common.yml
-    with:
-      command: tfhe_zk_pok
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
--- a/.github/workflows/benchmark_ct_key_sizes.yml
+++ b/.github/workflows/benchmark_ct_key_sizes.yml
@@ -20,8 +20,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members and GitHub can trigger this workflow
-
 jobs:
  setup-instance:
    name: Setup instance (sizes-benchmarks)
--- a/.github/workflows/benchmark_dex.yml
+++ b/.github/workflows/benchmark_dex.yml
@@ -0,0 +1,170 @@
+# Run all DEX benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: benchmark_dex
+
+on:
+  workflow_dispatch:
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 5a.m.
+    - cron: '0 5 * * 6'
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+permissions: {}
+
+jobs:
+  setup-instance:
+    name: benchmark_dex/setup-instance
+    runs-on: ubuntu-latest
+    if: github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          profile: bench
+
+  dex-benchmarks:
+    name: benchmark_dex/dex-benchmarks
+    needs: setup-instance
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    concurrency:
+      group: ${{ github.workflow_ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    timeout-minutes: 720  # 12 hours
+    steps:
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Get benchmark details
+        run: |
+          COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=${COMMIT_DATE}";
+            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+          } >> "${GITHUB_ENV}"
+        env:
+          SHA: ${{ github.sha }}
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Run benchmarks
+        run: |
+          make bench_hlapi_dex
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
+          --database tfhe_rs \
+          --hardware "hpc7a.96xlarge" \
+          --project-version "${COMMIT_HASH}" \
+          --branch "${REF_NAME}" \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${BENCH_DATE}" \
+          --walk-subdirs \
+          --name-suffix avx512
+        env:
+          REF_NAME: ${{ github.ref_name }}
+
+      - name: Parse swap request update PBS counts
+        run: |
+          python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_request_update_dex_balance_pbs_count.csv "${RESULTS_FILENAME}" \
+          --object-sizes \
+          --append-results
+
+      - name: Parse swap request finalize PBS counts
+        run: |
+          python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_request_finalize_pbs_count.csv "${RESULTS_FILENAME}" \
+          --object-sizes \
+          --append-results
+
+      - name: Parse swap claim prepare PBS counts
+        run: |
+          python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_claim_prepare_pbs_count.csv "${RESULTS_FILENAME}" \
+          --object-sizes \
+          --append-results
+
+      - name: Parse swap claim update PBS counts
+        run: |
+          python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_claim_update_dex_balance_pbs_count.csv "${RESULTS_FILENAME}" \
+          --object-sizes \
+          --append-results
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        with:
+          name: ${{ github.sha }}_dex
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
+          --slab-url "${SLAB_URL}"
+        env:
+          JOB_SECRET: ${{ secrets.JOB_SECRET }}
+          SLAB_URL: ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "DEX benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-instance:
+    name: benchmark_dex/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, dex-benchmarks ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (dex-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_documentation.yml
+++ b/.github/workflows/benchmark_documentation.yml
@@ -1,209 +0,0 @@
-# Run all benchmarks displayed in the public documentation.
-name: benchmark_documentation
-
-on:
-  workflow_dispatch:
-    inputs:
-      run-cpu-benchmarks:
-        description: "Run CPU benchmarks"
-        type: boolean
-        default: true
-      run-gpu-benchmarks:
-        description: "Run GPU benchmarks"
-        type: boolean
-        default: true
-      run-hpu-benchmarks:
-        description: "Run HPU benchmarks"
-        type: boolean
-        default: true
-      generate-svgs:
-        description: "Generate SVG tables"
-        type: boolean
-        default: true
-      open-pr:
-        description: "Open a PR with the benchmark results"
-        type: boolean
-        default: false
-
-permissions: {}
-
-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
-jobs:
-  run-benchmarks-cpu-integer:
-    name: benchmark_documentation/run-benchmarks-cpu-integer
-    uses: ./.github/workflows/benchmark_cpu_common.yml
-    if: inputs.run-cpu-benchmarks
-    with:
-      command: integer
-      op_flavor: fast_default
-#      bench_type: both
-      bench_type: latency
-      precisions_set: documentation
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-
-  run-benchmarks-gpu-integer:
-    name: benchmark_documentation/run-benchmarks-gpu-integer
-    uses: ./.github/workflows/benchmark_gpu_common.yml
-    if: inputs.run-gpu-benchmarks
-    with:
-      profile: l40
-      hardware_name: n3-L40x1
-      command: integer_multi_bit
-      op_flavor: fast_default
-#      bench_type: both
-      bench_type: latency
-      precisions_set: documentation
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-
-  run-benchmarks-hpu-integer:
-    name: benchmark_documentation/run-benchmarks-hpu-integer
-    uses: ./.github/workflows/benchmark_hpu_common.yml
-    if: inputs.run-hpu-benchmarks
-    with:
-      command: integer
-      op_flavor: default
-      bench_type: both
-      precisions_set: documentation
-      v80_pcie_dev: 24
-      v80_serial_number: XFL12NWY3ZKG
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-      SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
-
-  run-benchmarks-cpu-core-crypto:
-    name: benchmark_documentation/run-benchmarks-cpu-core-crypto
-    uses: ./.github/workflows/benchmark_cpu_common.yml
-    if: inputs.run-cpu-benchmarks
-    with:
-      command: pbs, ks_pbs
-      bench_type: latency
-      params_type: classical_documentation + multi_bit_documentation
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-
-  run-benchmarks-gpu-core-crypto:
-    name: benchmark_documentation/run-benchmarks-gpu-core-crypto
-    uses: ./.github/workflows/benchmark_gpu_common.yml
-    if: inputs.run-gpu-benchmarks
-    with:
-      profile: l40
-      hardware_name: n3-L40x1
-      command: pbs, ks_pbs
-      bench_type: latency
-      params_type: classical_documentation + multi_bit_documentation
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-
-  generate-svgs-with-benchmarks-run:
-    name: benchmark-documentation/generate-svgs-with-benchmarks-run
-    if: ${{ always() &&
-      (inputs.run-cpu-benchmarks || inputs.run-gpu-benchmarks ||inputs.run-hpu-benchmarks) &&
-      inputs.generate-svgs }}
-    needs: [
-      run-benchmarks-cpu-integer, run-benchmarks-gpu-integer, run-benchmarks-hpu-integer,
-      run-benchmarks-cpu-core-crypto, run-benchmarks-gpu-core-crypto
-    ]
-    uses: ./.github/workflows/generate_svgs.yml
-    with:
-      time_span_days: 5
-      generate-cpu-svgs: ${{ inputs.run-cpu-benchmarks }}
-      generate-gpu-svgs: ${{ inputs.run-gpu-benchmarks }}
-      generate-hpu-svgs: ${{ inputs.run-hpu-benchmarks }}
-    secrets:
-      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
-      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
-      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
-
-  generate-svgs-without-benchmarks-run:
-    name: benchmark-documentation/generate-svgs-without-benchmarks-run
-    if: ${{ !(inputs.run-cpu-benchmarks || inputs.run-gpu-benchmarks || inputs.run-hpu-benchmarks) &&
-      inputs.generate-svgs }}
-    uses: ./.github/workflows/generate_svgs.yml
-    with:
-      time_span_days: 60
-    secrets:
-      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
-      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
-      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
-
-  open-pr:
-    name: benchmark-documentation/open-pr
-    needs: [ generate-svgs-with-benchmarks-run, generate-svgs-without-benchmarks-run ]
-    if: ${{ always() && inputs.open-pr &&
-      (needs.generate-svgs-with-benchmarks-run.result == 'success' || needs.generate-svgs-without-benchmarks-run.result == 'success') }}
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write # Needed to create a commit
-      pull-requests: write # Needed to open a pull-request
-    env:
-      PATH_TO_DOC_ASSETS: tfhe/docs/.gitbook/assets
-    steps:
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
-        with:
-          persist-credentials: 'false'
-
-      - name: Download SVG tables
-        uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
-        with:
-          path: svg_tables
-          merge-multiple: 'true'
-
-      - name: Copy SVG tables to documentation location
-        run: |
-          cp -f svg_tables/*integer-benchmark*.svg "${PATH_TO_DOC_ASSETS}"
-          cp -f svg_tables/*pbs-benchmark-tuniform*.svg "${PATH_TO_DOC_ASSETS}"
-
-      - name: Create pull-request
-        uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
-        with:
-          sign-commits: true
-          #token: ${{ secrets.FHE_ACTIONS_TOKEN }} # Sign commit as Zama Bot
-          add-paths: ${{ env.PATH_TO_DOC_ASSETS }}/*.svg
-          commit-message: |
-            chore(docs): update benchmark results for all backends
-
-            Automated documentation update from tfhe-rs CI pipeline.
-          title: |
-            [CI] chore(docs): update benchmark results for all backends
-          body: |
-            Documentation update triggered by GitHub workflow.
-          labels: documentation
--- a/.github/workflows/benchmark_erc20.yml
+++ b/.github/workflows/benchmark_erc20.yml
@@ -0,0 +1,153 @@
+# Run all ERC20 benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: benchmark_erc20
+
+on:
+  workflow_dispatch:
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 5a.m.
+    - cron: '0 5 * * 6'
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+
+permissions: {}
+
+jobs:
+  setup-instance:
+    name: benchmark_erc20/setup-instance
+    runs-on: ubuntu-latest
+    if: github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          profile: bench
+
+  erc20-benchmarks:
+    name: benchmark_erc20/erc20-benchmarks
+    needs: setup-instance
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    concurrency:
+      group: ${{ github.workflow_ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    timeout-minutes: 720  # 12 hours
+    steps:
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Get benchmark details
+        run: |
+          COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=${COMMIT_DATE}";
+            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+          } >> "${GITHUB_ENV}"
+        env:
+          SHA: ${{ github.sha }}
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Run benchmarks
+        run: |
+          make bench_hlapi_erc20
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
+          --database tfhe_rs \
+          --hardware "hpc7a.96xlarge" \
+          --project-version "${COMMIT_HASH}" \
+          --branch "${REF_NAME}" \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${BENCH_DATE}" \
+          --walk-subdirs \
+          --name-suffix avx512
+        env:
+          REF_NAME: ${{ github.ref_name }}
+
+      - name: Parse PBS counts
+        run: |
+          python3 ./ci/benchmark_parser.py tfhe-benchmark/erc20_pbs_count.csv "${RESULTS_FILENAME}" \
+          --object-sizes \
+          --append-results
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        with:
+          name: ${{ github.sha }}_erc20
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
+          --slab-url "${SLAB_URL}"
+        env:
+          JOB_SECRET: ${{ secrets.JOB_SECRET }}
+          SLAB_URL: ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "ERC20 benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-instance:
+    name: benchmark_erc20/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, erc20-benchmarks ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (erc20-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_gpu.yml
+++ b/.github/workflows/benchmark_gpu.yml
@@ -17,7 +17,7 @@ on:
          - "4-h100 (n3-H100x4)"
          - "multi-h100 (n3-H100x8)"
          - "multi-h100-nvlink (n3-H100x8-NVLink)"
-          - "multi-h100-sxm5 (n3-H100-SXM5x8)"
+          - "multi-h100-sxm5 (n3-H100x8-SXM5)"
      command:
        description: "Benchmark command to run"
        type: choice
@@ -40,14 +40,10 @@ on:
          - default
          - fast_default
          - unchecked
-      precisions_set:
-        description: "Bit precisions set"
-        type: choice
-        default: fast
-        options:
-          - fast
-          - all
-          - documentation
+      all_precisions:
+        description: "Run all precisions"
+        type: boolean
+        default: false
      bench_type:
        description: "Benchmarks type"
        type: choice
@@ -71,8 +67,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  parse-inputs:
    name: benchmark_gpu/parse-inputs
@@ -110,7 +104,7 @@ jobs:
      op_flavor: ${{ inputs.op_flavor }}
      bench_type: ${{ inputs.bench_type }}
      params_type: ${{ inputs.params_type }}
-      precisions_set: ${{ inputs.precisions_set }}
+      all_precisions: ${{ inputs.all_precisions }}
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/benchmark_gpu_4090.yml
+++ b/.github/workflows/benchmark_gpu_4090.yml
@@ -11,7 +11,7 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-  BIT_SIZES_SET: FAST
+  FAST_BENCH: TRUE

 on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -25,8 +25,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] each job manage its concurrency
-
 jobs:
  cuda-integer-benchmarks:
    name: benchmark_gpu_4090/cuda-integer-benchmarks
@@ -53,6 +51,7 @@ jobs:
            echo "BENCH_DATE=$(date --iso-8601=seconds)";
            echo "COMMIT_DATE=${COMMIT_DATE}";
            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+            echo "FAST_BENCH=TRUE";
          } >> "${GITHUB_ENV}"
        env:
          SHA: ${{ github.sha }}
--- a/.github/workflows/benchmark_gpu_common.yml
+++ b/.github/workflows/benchmark_gpu_common.yml
@@ -25,9 +25,9 @@ on:
      params_type:
        type: string
        default: multi_bit
-      precisions_set:
-        type: string
-        default: fast
+      all_precisions:
+        type: boolean
+        default: false
    secrets:
      REPO_CHECKOUT_TOKEN:
        required: true
@@ -56,55 +56,89 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  FAST_BENCH: TRUE
+

 permissions: {}

-# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
-
 jobs:
  prepare-matrix:
    name: benchmark_gpu_common/prepare-matrix
    runs-on: ubuntu-latest
    outputs:
-      command: ${{ steps.set_matrix_args.outputs.command }}
-      op_flavor: ${{ steps.set_matrix_args.outputs.op_flavor }}
-      bench_type: ${{ steps.set_matrix_args.outputs.bench_type }}
-      params_type: ${{ steps.set_matrix_args.outputs.params_type }}
+      command: ${{ steps.set_command.outputs.command }}
+      op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
+      bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
+      params_type: ${{ steps.set_params_type.outputs.params_type }}
+    env:
+      INPUTS_COMMAND: ${{ inputs.command }}
+      INPUTS_OP_FLAVOR: ${{ inputs.op_flavor }}
    steps:
-      - name: Parse user inputs
-        shell: python
-        run: | # zizmor: ignore[template-injection] these env variables are safe
-          split_command = "${{ inputs.command }}".replace(" ", "").split(",")
-          split_op_flavor = "${{ inputs.op_flavor }}".replace(" ", "").split(",")
+      - name: Set single command
+        if: ${{ !contains(inputs.command, ',')}}
+        run: |
+          echo "COMMAND=[\"${INPUTS_COMMAND}\"]" >> "${GITHUB_ENV}"

-          if "${{ inputs.bench_type }}" == "both":
-            bench_type = ["latency", "throughput"]
-          else:
-            bench_type = ["${{ inputs.bench_type }}", ]
+      - name: Set multiple commands
+        if: ${{ contains(inputs.command, ',')}}
+        run: |
+          # Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
+          # shellcheck disable=SC2001
+          PARSED_COMMAND=$(echo "${INPUTS_COMMAND}" | sed 's/[[:space:]]*,[[:space:]]*/\", \"/g')
+          echo "COMMAND=[\"${PARSED_COMMAND}\"]" >> "${GITHUB_ENV}"

-          if "+" in "${{ inputs.params_type }}":
-            split_params_type= "${{ inputs.params_type }}".replace(" ", "").split("+")
-          else:
-            split_params_type = ["${{ inputs.params_type }}", ]
+      - name: Set single operations flavor
+        if: ${{ !contains(inputs.op_flavor, ',')}}
+        run: |
+          echo "OP_FLAVOR=[\"${INPUTS_OP_FLAVOR}\"]" >> "${GITHUB_ENV}"

-          with open("${{ github.env }}", "a") as f:
-            for env_name, values_to_join in [
-              ("COMMAND", split_command),
-              ("OP_FLAVOR", split_op_flavor),
-              ("BENCH_TYPE", bench_type),
-              ("PARAMS_TYPE", split_params_type),
-            ]:
-              f.write(f"""{env_name}=["{'", "'.join(values_to_join)}"]\n""")
+      - name: Set multiple operations flavors
+        if: ${{ contains(inputs.op_flavor, ',')}}
+        run: |
+          # Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
+          # shellcheck disable=SC2001
+          PARSED_OP_FLAVOR=$(echo "${INPUTS_OP_FLAVOR}" | sed 's/[[:space:]]*,[[:space:]]*/", "/g')
+          echo "OP_FLAVOR=[\"${PARSED_OP_FLAVOR}\"]" >> "${GITHUB_ENV}"

-      - name: Set martix arguments outputs
-        id: set_matrix_args
-        run: | # zizmor: ignore[template-injection] these env variable are safe
-          {
-            echo "command=${{ toJSON(env.COMMAND) }}";
-            echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}";
-            echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}";
-            echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}";
-          } >> "${GITHUB_OUTPUT}"
+      - name: Set benchmark types
+        run: |
+          if [[ "${INPUTS_BENCH_TYPE}" == "both" ]]; then
+            echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
+          else
+            echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}"
+          fi
+        env:
+          INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}
+
+      - name: Set parameters types
+        run: |
+          if [[ "${INPUTS_PARAMS_TYPE}" == "both" ]]; then
+            echo "PARAMS_TYPE=[\"classical\", \"multi_bit\"]" >> "${GITHUB_ENV}"
+          else
+            echo "PARAMS_TYPE=[\"${INPUTS_PARAMS_TYPE}\"]" >> "${GITHUB_ENV}"
+          fi
+        env:
+          INPUTS_PARAMS_TYPE: ${{ inputs.params_type }}
+
+      - name: Set command output
+        id: set_command
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "command=${{ toJSON(env.COMMAND) }}" >> "${GITHUB_OUTPUT}"
+
+      - name: Set operation flavor output
+        id: set_op_flavor
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
+
+      - name: Set benchmark types output
+        id: set_bench_type
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
+
+      - name: Set parameters types output
+        id: set_params_type
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}" >> "${GITHUB_OUTPUT}"

  setup-instance:
    name: benchmark_gpu_common/setup-instance
@@ -239,15 +273,19 @@ jobs:
        with:
          toolchain: nightly

+      - name: Should run benchmarks with all precisions
+        if: inputs.all_precisions
+        run: |
+          echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
+
      - name: Run benchmarks
        run: |
-          make BIT_SIZES_SET="${PRECISIONS_SET}" BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" BENCH_PARAM_TYPE="${BENCH_PARAMS_TYPE}" bench_"${BENCH_COMMAND}"_gpu
+          make BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" BENCH_PARAM_TYPE="${BENCH_PARAMS_TYPE}" bench_"${BENCH_COMMAND}"_gpu
        env:
          OP_FLAVOR: ${{ matrix.op_flavor }}
          BENCH_TYPE: ${{ matrix.bench_type }}
          BENCH_PARAMS_TYPE: ${{ matrix.params_type }}
          BENCH_COMMAND: ${{ matrix.command }}
-          PRECISIONS_SET: ${{ inputs.precisions_set }}

      - name: Parse results
        run: |
--- a/.github/workflows/benchmark_gpu_coprocessor.yml
+++ b/.github/workflows/benchmark_gpu_coprocessor.yml
@@ -16,8 +16,8 @@ on:
          - "4-h100 (n3-H100x4)"
          - "multi-h100 (n3-H100x8)"
          - "multi-h100-nvlink (n3-H100x8-NVLink)"
-          - "multi-h100-sxm5 (n3-H100-SXM5x8)"
-          - "multi-h100-sxm5_fallback (n3-H100-SXM5x8)"
+          - "multi-h100-sxm5 (n3-H100x8-SXM5)"
+          - "multi-h100-sxm5_fallback (n3-H100x8-SXM5)"

  schedule:
    # Weekly tests @ 1AM
@@ -26,8 +26,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
@@ -35,7 +33,7 @@ env:
  RUST_BACKTRACE: "full"
  RUST_MIN_STACK: "8388608"
  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
-  PROFILE_SCHEDULED_RUN: "multi-h100-sxm5 (n3-H100-SXM5x8)"
+  PROFILE_SCHEDULED_RUN: "multi-h100-sxm5 (n3-H100x8-SXM5)"
  PROFILE_MANUAL_RUN: ${{ inputs.profile }}
  IS_MANUAL_RUN: ${{ github.event_name == 'workflow_dispatch' }}
  BENCHMARK_TYPE: "ALL"
@@ -108,8 +106,8 @@ jobs:
    continue-on-error: true
    timeout-minutes: 720  # 12 hours
    permissions:
-      contents: 'read' # Needed to read repositories contents
-      packages: 'read' # Needed to get fhevm packages
+      contents: 'read'
+      packages: 'read'
    strategy:
      fail-fast: false
      # explicit include-based build matrix, of known valid options
--- a/.github/workflows/benchmark_gpu_dex.yml
+++ b/.github/workflows/benchmark_gpu_dex.yml
@@ -17,12 +17,10 @@ on:
          - "4-h100 (n3-H100x4)"
          - "multi-h100 (n3-H100x8)"
          - "multi-h100-nvlink (n3-H100x8-NVLink)"
-          - "multi-h100-sxm5 (n3-H100-SXM5x8)"
+          - "multi-h100-sxm5 (n3-H100x8-SXM5)"

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  parse-inputs:
    name: benchmark_gpu_dex/parse-inputs
--- a/.github/workflows/benchmark_gpu_dex_common.yml
+++ b/.github/workflows/benchmark_gpu_dex_common.yml
@@ -45,8 +45,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
-
 jobs:
  setup-instance:
    name: benchmark_gpu_dex_common/setup-instance
--- a/.github/workflows/benchmark_gpu_dex_weekly.yml
+++ b/.github/workflows/benchmark_gpu_dex_weekly.yml
@@ -8,8 +8,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only GitHub can trigger this workflow
-
 jobs:
  run-benchmarks-1-h100:
    name: benchmark_gpu_dex_weekly/run-benchmarks-1-h100
--- a/.github/workflows/benchmark_gpu_erc20.yml
+++ b/.github/workflows/benchmark_gpu_erc20.yml
@@ -17,13 +17,11 @@ on:
          - "4-h100 (n3-H100x4)"
          - "multi-h100 (n3-H100x8)"
          - "multi-h100-nvlink (n3-H100x8-NVLink)"
-          - "multi-h100-sxm5 (n3-H100-SXM5x8)"
+          - "multi-h100-sxm5 (n3-H100x8-SXM5)"


 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  parse-inputs:
    name: benchmark_gpu_erc20/parse-inputs
--- a/.github/workflows/benchmark_gpu_erc20_common.yml
+++ b/.github/workflows/benchmark_gpu_erc20_common.yml
@@ -46,8 +46,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
-
 jobs:
  setup-instance:
    name: benchmark_gpu_erc20_common/setup-instance
--- a/.github/workflows/benchmark_gpu_erc20_weekly.yml
+++ b/.github/workflows/benchmark_gpu_erc20_weekly.yml
@@ -9,8 +9,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only GitHub can trigger this workflow
-
 jobs:
  run-benchmarks-1-h100:
    name: benchmark_gpu_erc20_weekly/run-benchmarks-1-h100
--- a/.github/workflows/benchmark_gpu_weekly.yml
+++ b/.github/workflows/benchmark_gpu_weekly.yml
@@ -9,8 +9,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only GitHub can trigger this workflow
-
 jobs:
  run-benchmarks-8-h100-sxm5-integer:
    name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer
@@ -18,11 +16,11 @@ jobs:
    uses: ./.github/workflows/benchmark_gpu_common.yml
    with:
      profile: multi-h100-sxm5
-      hardware_name: n3-H100-SXM5x8
+      hardware_name: n3-H100x8-SXM5
      command: integer_multi_bit
      op_flavor: default
      bench_type: both
-      precisions_set: fast
+      all_precisions: true
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
@@ -39,11 +37,11 @@ jobs:
    uses: ./.github/workflows/benchmark_gpu_common.yml
    with:
      profile: multi-h100-sxm5
-      hardware_name: n3-H100-SXM5x8
+      hardware_name: n3-H100x8-SXM5
      command: integer_compression
      op_flavor: default
      bench_type: both
-      precisions_set: fast
+      all_precisions: true
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
@@ -60,11 +58,11 @@ jobs:
    uses: ./.github/workflows/benchmark_gpu_common.yml
    with:
      profile: multi-h100-sxm5
-      hardware_name: n3-H100-SXM5x8
+      hardware_name: n3-H100x8-SXM5
      command: integer_zk
      op_flavor: default
      bench_type: both
-      precisions_set: fast
+      all_precisions: true
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
@@ -81,11 +79,11 @@ jobs:
    uses: ./.github/workflows/benchmark_gpu_common.yml
    with:
      profile: multi-h100-sxm5
-      hardware_name: n3-H100-SXM5x8
+      hardware_name: n3-H100x8-SXM5
      command: hlapi_noise_squash
      op_flavor: default
      bench_type: both
-      precisions_set: fast
+      all_precisions: true
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/benchmark_hpu.yml
+++ b/.github/workflows/benchmark_hpu.yml
@@ -1,69 +0,0 @@
-# Run benchmarks on a permanent HPU instance and return parsed results to Slab CI bot.
-name: benchmark_hpu
-
-on:
-  workflow_dispatch:
-    inputs:
-      command:
-        description: "Benchmark command to run"
-        type: choice
-        default: integer
-        options:
-          - integer
-          - hlapi
-          - hlapi_erc20
-      op_flavor:
-        description: "Operations set to run"
-        type: choice
-        default: default
-        options:
-          - default
-          - fast_default
-      precisions_set:
-        description: "Bit precisions set"
-        type: choice
-        default: fast
-        options:
-          - fast
-          - all
-          - documentation
-      bench_type:
-        description: "Benchmarks type"
-        type: choice
-        default: latency
-        options:
-          - latency
-          - throughput
-          - both
-      v80_pcie_dev:
-        description: "V80 PCIe device number"
-        default: 24
-      v80_serial_number:
-        description: "V80 serial number"
-        default: XFL12NWY3ZKG
-
-permissions: {}
-
-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
-jobs:
-  run-benchmarks:
-    name: benchmark_hpu/run-benchmarks
-    uses: ./.github/workflows/benchmark_hpu_common.yml
-    with:
-      command: ${{ inputs.command }}
-      op_flavor: ${{ inputs.op_flavor }}
-      bench_type: ${{ inputs.bench_type }}
-      precisions_set: ${{ inputs.precisions_set }}
-      v80_pcie_dev: ${{ inputs.v80_pcie_dev }}
-      v80_serial_number: ${{ inputs.v80_serial_number }}
-    secrets:
-      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
-      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
-      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-      REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-      JOB_SECRET: ${{ secrets.JOB_SECRET }}
-      SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
-      SLAB_URL: ${{ secrets.SLAB_URL }}
-      SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
-      SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
--- a/.github/workflows/benchmark_hpu_hlapi.yml
+++ b/.github/workflows/benchmark_hpu_hlapi.yml
@@ -0,0 +1,101 @@
+# Run all integer benchmarks on a permanent HPU instance and return parsed results to Slab CI bot.
+name: Hpu Hlapi Benchmarks
+
+on:
+  workflow_dispatch:
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+
+permissions: {}
+
+jobs:
+  hlapi-benchmarks-hpu:
+    name: Execute HLAPI benchmarks for HPU backend
+    runs-on: v80-marais
+    concurrency:
+      group: ${{ github.workflow }}_${{ github.ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    timeout-minutes: 1440  # 24 hours
+    steps:
+      # Needed as long as hw_regmap repository is private
+      - name: Configure SSH
+        uses: webfactory/ssh-agent@a6f90b1f127823b31d4d4a8d96047790581349bd # v0.9.1
+        with:
+          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
+
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          lfs: true
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Get benchmark details
+        run: |
+          COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=${COMMIT_DATE}";
+            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+          } >> "${GITHUB_ENV}"
+        env:
+          SHA: ${{ github.sha }}
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Select HPU board
+        run: |
+          echo "V80_PCIE_DEV=24" >> "${GITHUB_ENV}"
+          echo "V80_SERIAL_NUMBER=XFL12NWY3ZKG" >> "${GITHUB_ENV}"
+
+      - name: Run benchmarks
+        run: |
+          make pull_hpu_files
+          make bench_hlapi_erc20_hpu
+          make bench_hlapi_hpu
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
+          --database tfhe_rs \
+          --hardware "hpu_x1" \
+          --backend hpu \
+          --project-version "${COMMIT_HASH}" \
+          --branch "${REF_NAME}" \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${BENCH_DATE}" \
+          --walk-subdirs
+        env:
+          REF_NAME: ${{ github.ref_name }}
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        with:
+          name: ${{ github.sha }}_hlapi_benchmarks
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
+          --slab-url "${SLAB_URL}"
+        env:
+          JOB_SECRET: ${{ secrets.JOB_SECRET }}
+          SLAB_URL: ${{ secrets.SLAB_URL }}
--- a/.github/workflows/benchmark_hpu_integer.yml
+++ b/.github/workflows/benchmark_hpu_integer.yml
@@ -1,46 +1,20 @@
-# Run benchmarks on a permanent HPU instance and return parsed results to Slab CI bot.
-name: benchmark_hpu_common
+# Run all integer benchmarks on a permanent HPU instance and return parsed results to Slab CI bot.
+name: benchmark_hpu_integer

 on:
-  workflow_call:
+  workflow_dispatch:
    inputs:
-      command: # Use a comma separated values to generate an array
-        type: string
-        required: true
-      op_flavor: # Use a comma separated values to generate an array
-        type: string
-        default: default
+      all_precisions:
+        description: "Run all precisions"
+        type: boolean
      bench_type:
-        type: string
-        default: latency
-      precisions_set:
-        type: string
-        default: fast
-      v80_pcie_dev:
-        type: string
-        default: 24
-      v80_serial_number:
-        type: string
-        default: XFL12NWY3ZKG
-    secrets:
-      REPO_CHECKOUT_TOKEN:
-        required: true
-      SLAB_ACTION_TOKEN:
-        required: true
-      SLAB_BASE_URL:
-        required: true
-      SLAB_URL:
-        required: true
-      JOB_SECRET:
-        required: true
-      SLACK_CHANNEL:
-        required: true
-      BOT_USERNAME:
-        required: true
-      SLACK_WEBHOOK:
-        required: true
-      SSH_PRIVATE_KEY:
-        required: true
+        description: "Benchmarks type"
+        type: choice
+        default: both
+        options:
+          - latency
+          - throughput
+          - both

 env:
  CARGO_TERM_COLOR: always
@@ -48,53 +22,41 @@ env:
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUST_BACKTRACE: "full"
  RUST_MIN_STACK: "8388608"
+  FAST_BENCH: TRUE

 permissions: {}

-# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
-
 jobs:
  prepare-matrix:
-    name: benchmark_hpu_common/prepare-matrix
-    runs-on: ubuntu-latest
+    name: Prepare operations matrix
+    runs-on: v80-marais
    outputs:
-      command: ${{ steps.set_matrix_args.outputs.command }}
-      op_flavor: ${{ steps.set_matrix_args.outputs.op_flavor }}
-      bench_type: ${{ steps.set_matrix_args.outputs.bench_type }}
-    env:
-      INPUTS_COMMAND: ${{ inputs.command }}
-      INPUTS_OP_FLAVOR: ${{ inputs.op_flavor }}
+      bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
    steps:
-      - name: Parse user inputs
-        shell: python
-        run: | # zizmor: ignore[template-injection] these env variables are safe
-          split_command = "${{ inputs.command }}".replace(" ", "").split(",")
-          split_op_flavor = "${{ inputs.op_flavor }}".replace(" ", "").split(",")
+      - name: Set benchmark types
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          if [[ -z $INPUTS_BENCH_TYPE || "${INPUTS_BENCH_TYPE}" == "both" ]]; then
+            echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
+          else
+            echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}"
+          fi
+        env:
+          INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}

-          if "${{ inputs.bench_type }}" == "both":
-            bench_type = ["latency", "throughput"]
-          else:
-            bench_type = ["${{ inputs.bench_type }}", ]
+      - name: Default benchmark type
+        if: github.event_name != 'workflow_dispatch'
+        run: |
+          echo "BENCH_TYPE=[\"latency\"]" >> "${GITHUB_ENV}"
+      
+      - name: Set benchmark types output
+        id: set_bench_type
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"

-          with open("${{ github.env }}", "a") as f:
-            for env_name, values_to_join in [
-              ("COMMAND", split_command),
-              ("OP_FLAVOR", split_op_flavor),
-              ("BENCH_TYPE", bench_type),
-            ]:
-              f.write(f"""{env_name}=["{'", "'.join(values_to_join)}"]\n""")

-      - name: Set martix arguments outputs
-        id: set_matrix_args
-        run: | # zizmor: ignore[template-injection] these env variable are safe
-          {
-            echo "command=${{ toJSON(env.COMMAND) }}";
-            echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}";
-            echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}";
-          } >> "${GITHUB_OUTPUT}"
-
-  hpu-benchmarks:
-    name: benchmark_hpu_common/hpu-benchmarks
+  integer-benchmarks-hpu:
+    name: benchmark_hpu_integer/integer-benchmarks-hpu
    needs: prepare-matrix
    runs-on: v80-marais
    concurrency:
@@ -104,8 +66,6 @@ jobs:
    strategy:
      max-parallel: 1
      matrix:
-        command: ${{ fromJSON(needs.prepare-matrix.outputs.command) }}
-        op_flavor: ${{ fromJSON(needs.prepare-matrix.outputs.op_flavor) }}
        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
    steps:
      # Needed as long as hw_regmap repository is private
@@ -138,24 +98,31 @@ jobs:
        with:
          toolchain: nightly

+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Should run benchmarks with all precisions
+        if: inputs.all_precisions
+        run: |
+          echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
+
      - name: Select HPU board
        run: |
-          echo "V80_PCIE_DEV=${PCIE_DEV}" >> "${GITHUB_ENV}"
-          echo "V80_SERIAL_NUMBER=${SERIAL_NUMBER}" >> "${GITHUB_ENV}"
-        env:
-          PCIE_DEV: ${{ inputs.v80_pcie_dev }}
-          SERIAL_NUMBER: ${{ inputs.v80_serial_number }}
+          echo "V80_PCIE_DEV=24" >> "${GITHUB_ENV}"
+          echo "V80_SERIAL_NUMBER=XFL12NWY3ZKG" >> "${GITHUB_ENV}"

      - name: Run benchmarks
        run: |
          echo "${V80_PCIE_DEV} ${V80_SERIAL_NUMBER}"
          make pull_hpu_files
-          make BIT_SIZES_SET="${PRECISIONS_SET}" BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" BENCH_PARAM_TYPE="${BENCH_PARAMS_TYPE}" bench_"${BENCH_COMMAND}"_hpu
+          make BENCH_TYPE="${BENCH_TYPE}" bench_integer_hpu
        env:
-          OP_FLAVOR: ${{ matrix.op_flavor }}
          BENCH_TYPE: ${{ matrix.bench_type }}
-          BENCH_COMMAND: ${{ matrix.command }}
-          PRECISIONS_SET: ${{ inputs.precisions_set }}

      - name: Parse results
        run: |
@@ -179,14 +146,6 @@ jobs:
          name: ${{ github.sha }}_${{ matrix.bench_type }}_integer_benchmarks
          path: ${{ env.RESULTS_FILENAME }}

-      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
-        with:
-          repository: zama-ai/slab
-          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-
      - name: Send data to Slab
        shell: bash
        run: |
--- a/.github/workflows/benchmark_cpu_common.yml
+++ b/.github/workflows/benchmark_cpu_common.yml
@@ -1,48 +1,28 @@
-# Run benchmarks on an instance and return parsed results to Slab CI bot.
-name: benchmark_cpu_common
+# Run all integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: benchmark_integer

 on:
-  workflow_call:
+  workflow_dispatch:
    inputs:
-      command: # Any make recipes stripped of the "bench_" prefix in the Makefile
-        type: string # Use comma separated values to generate an array
-        required: true
-      op_flavor:
-        type: string # Use comma separated values to generate an array
-        default: default
+      all_precisions:
+        description: "Run all precisions"
+        type: boolean
+        default: false
      bench_type:
-        type: string
+        description: "Benchmarks type"
+        type: choice
        default: latency
-      params_type:
-        type: string
-        default: classical
-      precisions_set:
-        type: string
-        default: fast
-      additional_recipe: # Make recipes to run aside the benchmarks.
-        type: string # Use comma separated values to generate an array
-      additional_file_to_parse: # Other files to parse, located under tfhe-benchmark/ directory
-        type: string # Use comma separated values to generate an array
-      additional_results_type:
-        type: string
-        default: object-size
-    secrets:
-      REPO_CHECKOUT_TOKEN:
-        required: true
-      SLAB_ACTION_TOKEN:
-        required: true
-      SLAB_BASE_URL:
-        required: true
-      SLAB_URL:
-        required: true
-      JOB_SECRET:
-        required: true
-      SLACK_CHANNEL:
-        required: true
-      BOT_USERNAME:
-        required: true
-      SLACK_WEBHOOK:
-        required: true
+        options:
+          - latency
+          - throughput
+          - both
+
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 1a.m.
+    - cron: '0 1 * * 6'
+    # Quarterly benchmarks will be triggered right before end of quarter, the 25th of the current month at 4a.m.
+    # These benchmarks are far longer to execute hence the reason to run them only four time a year.
+    - cron: '0 4 25 MAR,JUN,SEP,DEC *'

 env:
  CARGO_TERM_COLOR: always
@@ -54,58 +34,60 @@ env:
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  FAST_BENCH: TRUE
+

 permissions: {}

-# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
-
 jobs:
  prepare-matrix:
-    name: benchmark_cpu_common/prepare-matrix
+    name: benchmark_integer/prepare-matrix
    runs-on: ubuntu-latest
+    if: github.event_name != 'schedule' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
    outputs:
-      command: ${{ steps.set_matrix_args.outputs.command }}
-      op_flavor: ${{ steps.set_matrix_args.outputs.op_flavor }}
-      bench_type: ${{ steps.set_matrix_args.outputs.bench_type }}
-      params_type: ${{ steps.set_matrix_args.outputs.params_type }}
+      op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
+      bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
    steps:
-      - name: Parse user inputs
-        shell: python
-        run: | # zizmor: ignore[template-injection] these env variables are safe
-          split_command = "${{ inputs.command }}".replace(" ", "").split(",")
-          split_op_flavor = "${{ inputs.op_flavor }}".replace(" ", "").split(",")
+      - name: Weekly benchmarks
+        if: github.event.schedule == '0 1 * * 6'
+        run: |
+          echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}"

-          if "${{ inputs.bench_type }}" == "both":
-            bench_type = ["latency", "throughput"]
-          else:
-            bench_type = ["${{ inputs.bench_type }}", ]
+      - name: Quarterly benchmarks
+        if: github.event.schedule == '0 4 25 MAR,JUN,SEP,DEC *'
+        run: |
+          echo "OP_FLAVOR=[\"default\", \"smart\", \"unchecked\", \"misc\"]" >> "${GITHUB_ENV}"

-          if "+" in "${{ inputs.params_type }}":
-            split_params_type= "${{ inputs.params_type }}".replace(" ", "").split("+")
-          else:
-            split_params_type = ["${{ inputs.params_type }}", ]
+      - name: Set benchmark types
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}"
+          if [[ "${INPUTS_BENCH_TYPE}" == "both" ]]; then
+            echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
+          else
+            echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}"
+          fi
+        env:
+          INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}

-          with open("${{ github.env }}", "a") as f:
-            for env_name, values_to_join in [
-              ("COMMAND", split_command),
-              ("OP_FLAVOR", split_op_flavor),
-              ("BENCH_TYPE", bench_type),
-              ("PARAMS_TYPE", split_params_type),
-            ]:
-              f.write(f"""{env_name}=["{'", "'.join(values_to_join)}"]\n""")
+      - name: Default benchmark type
+        if: github.event_name != 'workflow_dispatch'
+        run: |
+          echo "BENCH_TYPE=[\"latency\"]" >> "${GITHUB_ENV}"

-      - name: Set martix arguments outputs
-        id: set_matrix_args
-        run: | # zizmor: ignore[template-injection] these env variable are safe
-          {
-            echo "command=${{ toJSON(env.COMMAND) }}";
-            echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}";
-            echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}";
-            echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}";
-          } >> "${GITHUB_OUTPUT}"
+      - name: Set operation flavor output
+        id: set_op_flavor
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
+
+      - name: Set benchmark types output
+        id: set_bench_type
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"

  setup-instance:
-    name: benchmark_cpu_common/setup-instance
+    name: benchmark_integer/setup-instance
    needs: prepare-matrix
    runs-on: ubuntu-latest
    outputs:
@@ -123,17 +105,19 @@ jobs:
          profile: bench

  integer-benchmarks:
-    name: benchmark_cpu_common/integer-benchmarks
+    name: benchmark_integer/integer-benchmarks
    needs: [ prepare-matrix, setup-instance ]
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    concurrency:
+      group: ${{ github.workflow_ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
    timeout-minutes: 1440  # 24 hours
    strategy:
      max-parallel: 1
      matrix:
-        command: ${{ fromJSON(needs.prepare-matrix.outputs.command) }}
-        op_flavor: ${{ fromJSON(needs.prepare-matrix.outputs.op_flavor) }}
+        command: [ integer, integer_multi_bit]
+        op_flavor: ${{ fromJson(needs.prepare-matrix.outputs.op_flavor) }}
        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
-        params_type: ${{ fromJSON(needs.prepare-matrix.outputs.params_type) }}
    steps:
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
@@ -158,15 +142,34 @@ jobs:
        with:
          toolchain: nightly

+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Should run benchmarks with all precisions
+        if: inputs.all_precisions
+        run: |
+          echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
+
      - name: Run benchmarks with AVX512
        run: |
-          make BIT_SIZES_SET="${PRECISIONS_SET}" BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" BENCH_PARAM_TYPE="${BENCH_PARAMS_TYPE}" bench_"${BENCH_COMMAND}"
+          make BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" bench_"${BENCH_COMMAND}"
        env:
          OP_FLAVOR: ${{ matrix.op_flavor }}
          BENCH_TYPE: ${{ matrix.bench_type }}
-          BENCH_PARAMS_TYPE: ${{ matrix.params_type }}
          BENCH_COMMAND: ${{ matrix.command }}
-          PRECISIONS_SET: ${{ inputs.precisions_set }}
+
+      # Run these benchmarks only once per benchmark type
+      - name: Run compression benchmarks with AVX512
+        if: matrix.op_flavor == 'default' && matrix.command == 'integer'
+        run: |
+          make BENCH_TYPE="${BENCH_TYPE}" bench_integer_compression
+        env:
+          BENCH_TYPE: ${{ matrix.bench_type }}

      - name: Parse results
        run: |
@@ -184,45 +187,12 @@ jobs:
          REF_NAME: ${{ github.ref_name }}
          BENCH_TYPE: ${{ matrix.bench_type }}

-      - name: Run additional benchmarks
-        if: ${{ inputs.additional_recipe }}
-        run: |
-          targets_list="${targets}"
-          IFS=','
-          for target in $targets_list; do
-            make "$target"
-          done
-        env:
-          targets: ${{ inputs.additional_recipe }}
-
-      - name: Parse additional benchmarks results files
-        if: ${{ inputs.additional_file_to_parse }}
-        run: |
-          filenames_list="${filenames}"
-          IFS=','
-          for filename in $filenames_list; do
-            python3 ./ci/benchmark_parser.py "tfhe-benchmark/${filename}" "${RESULTS_FILENAME}" \
-            --"${results_type}" \
-            --append-results
-          done
-        env:
-          filenames: ${{ inputs.additional_file_to_parse }}
-          results_type: ${{ inputs.additional_results_type }}
-
      - name: Upload parsed results artifact
        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
-          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
+          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
          path: ${{ env.RESULTS_FILENAME }}

-      - name: Checkout Slab repo
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
-        with:
-          repository: zama-ai/slab
-          path: slab
-          persist-credentials: 'false'
-          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
-
      - name: Send data to Slab
        shell: bash
        run: |
@@ -233,15 +203,15 @@ jobs:
          SLAB_URL: ${{ secrets.SLAB_URL }}

      - name: Slack Notification
-        if: ${{ failure() }}
+        if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
        continue-on-error: true
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "CPU bencmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Integer full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
-    name: benchmark_cpu_common/teardown-instance
+    name: benchmark_integer/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, integer-benchmarks ]
    runs-on: ubuntu-latest
@@ -262,4 +232,4 @@ jobs:
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
-          SLACK_MESSAGE: "Instance teardown (cpu-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+          SLACK_MESSAGE: "Instance teardown (integer-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_perf_regression.yml
+++ b/.github/workflows/benchmark_perf_regression.yml
@@ -20,8 +20,6 @@ env:

 permissions: { }

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  verify-triggering-actor:
    name: benchmark_perf_regression/verify-actor
@@ -47,7 +45,8 @@ jobs:
      selected-regression-profile: ${{ steps.set_regression_details.outputs.selected-profile }}
      custom-env: ${{ steps.get_custom_env.outputs.custom_env }}
    permissions:
-      pull-requests: write # Needed to write a comment in a pull-request
+      # Needed to write a comment in a pull-request
+      pull-requests: write
    steps:
      - name: Checkout tfhe-rs repo
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
@@ -299,8 +298,10 @@ jobs:
    needs: [ prepare-benchmarks, regression-benchmarks ]
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: write # Needed to write a comment in a pull-request
-      contents: read # Needed to set up Python dependencies
+      # Needed to write a comment in a pull-request
+      pull-requests: write
+      # Needed to set up Python dependencies
+      contents: read
    env:
      REF_NAME: ${{ github.head_ref || github.ref_name }}
    steps:
@@ -354,7 +355,8 @@ jobs:
    if: ${{ failure() && github.event_name == 'issue_comment' }}
    continue-on-error: true
    permissions:
-      pull-requests: write # Needed to write a comment in a pull-request
+      # Needed to write a comment in a pull-request
+      pull-requests: write
    steps:
      - name: Write failure message
        uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5.0.0
--- a/.github/workflows/benchmark_shortint.yml
+++ b/.github/workflows/benchmark_shortint.yml
@@ -0,0 +1,179 @@
+# Run all shortint benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: benchmark_shortint
+
+on:
+  workflow_dispatch:
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 1a.m.
+    - cron: '0 1 * * 6'
+    # Quarterly benchmarks will be triggered right before end of quarter, the 25th of the current month at 4a.m.
+    # These benchmarks are far longer to execute hence the reason to run them only four time a year.
+    - cron: '0 4 25 MAR,JUN,SEP,DEC *'
+
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+
+permissions: {}
+
+jobs:
+  prepare-matrix:
+    name: benchmark_shortint/prepare-matrix
+    runs-on: ubuntu-latest
+    if: github.event_name != 'schedule' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
+    outputs:
+      op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
+    steps:
+      - name: Weekly benchmarks
+        if: github.event_name == 'workflow_dispatch' ||
+          github.event.schedule == '0 1 * * 6'
+        run: |
+          echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}"
+
+      - name: Quarterly benchmarks
+        if: github.event.schedule == '0 4 25 MAR,JUN,SEP,DEC *'
+        run: |
+          echo "OP_FLAVOR=[\"default\", \"smart\", \"unchecked\"]" >> "${GITHUB_ENV}"
+
+      - name: Set operation flavor output
+        id: set_op_flavor
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
+
+  setup-instance:
+    name: benchmark_shortint/setup-instance
+    needs: prepare-matrix
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          profile: bench
+
+  shortint-benchmarks:
+    name: benchmark_shortint/shortint-benchmarks
+    needs: [ prepare-matrix, setup-instance ]
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    concurrency:
+      group: ${{ github.workflow_ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    strategy:
+      max-parallel: 1
+      matrix:
+        op_flavor: ${{ fromJson(needs.prepare-matrix.outputs.op_flavor) }}
+    steps:
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Get benchmark details
+        run: |
+          COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=${COMMIT_DATE}";
+            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+          } >> "${GITHUB_ENV}"
+        env:
+          SHA: ${{ github.sha }}
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make BENCH_OP_FLAVOR="${OP_FLAVOR}" bench_shortint
+        env:
+          OP_FLAVOR: ${{ matrix.op_flavor }}
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
+          --database tfhe_rs \
+          --hardware "hpc7a.96xlarge" \
+          --project-version "${COMMIT_HASH}" \
+          --branch "${REF_NAME}" \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${BENCH_DATE}" \
+          --walk-subdirs \
+          --name-suffix avx512
+        env:
+          REF_NAME: ${{ github.ref_name }}
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        with:
+          name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
+          --slab-url "${SLAB_URL}"
+        env:
+          JOB_SECRET: ${{ secrets.JOB_SECRET }}
+          SLAB_URL: ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Shortint full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-instance:
+    name: benchmark_shortint/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, shortint-benchmarks ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (shortint-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_signed_integer.yml
+++ b/.github/workflows/benchmark_signed_integer.yml
@@ -0,0 +1,227 @@
+# Run all signed integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: benchmark_signed_integer
+
+on:
+  workflow_dispatch:
+    inputs:
+      all_precisions:
+        description: "Run all precisions"
+        type: boolean
+        default: false
+      bench_type:
+        description: "Benchmarks type"
+        type: choice
+        default: latency
+        options:
+          - latency
+          - throughput
+          - both
+
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 1a.m.
+    - cron: '0 1 * * 6'
+    # Quarterly benchmarks will be triggered right before end of quarter, the 25th of the current month at 4a.m.
+    # These benchmarks are far longer to execute hence the reason to run them only four time a year.
+    - cron: '0 4 25 MAR,JUN,SEP,DEC *'
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  FAST_BENCH: TRUE
+
+
+permissions: {}
+
+jobs:
+  prepare-matrix:
+    name: benchmark_signed_integer/prepare-matrix
+    runs-on: ubuntu-latest
+    if: github.event_name != 'schedule' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
+    outputs:
+      op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
+      bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
+    steps:
+      - name: Weekly benchmarks
+        if: github.event.schedule == '0 1 * * 6'
+        run: |
+          echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}"
+
+      - name: Quarterly benchmarks
+        if: github.event.schedule == '0 4 25 MAR,JUN,SEP,DEC *'
+        run: |
+          echo "OP_FLAVOR=[\"default\", \"unchecked\"]" >> "${GITHUB_ENV}"
+
+      - name: Set benchmark types
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}"
+          if [[ "${INPUTS_BENCH_TYPE}" == "both" ]]; then
+            echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
+          else
+            echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}"
+          fi
+        env:
+          INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}
+
+      - name: Default benchmark type
+        if: github.event_name != 'workflow_dispatch'
+        run: |
+          echo "BENCH_TYPE=[\"latency\"]" >> "${GITHUB_ENV}"
+
+      - name: Set operation flavor output
+        id: set_op_flavor
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
+
+      - name: Set benchmark types output
+        id: set_bench_type
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
+
+  setup-instance:
+    name: benchmark_signed_integer/setup-instance
+    needs: prepare-matrix
+    runs-on: ubuntu-latest
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          profile: bench
+
+  signed-integer-benchmarks:
+    name: benchmark_signed_integer/signed-integer-benchmarks
+    needs: [ prepare-matrix, setup-instance ]
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    concurrency:
+      group: ${{ github.workflow_ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    timeout-minutes: 1440  # 24 hours
+    strategy:
+      max-parallel: 1
+      matrix:
+        command: [ integer, integer_multi_bit ]
+        op_flavor: ${{ fromJSON(needs.prepare-matrix.outputs.op_flavor) }}
+        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
+    steps:
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Get benchmark details
+        run: |
+          COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=${COMMIT_DATE}";
+            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+          } >> "${GITHUB_ENV}"
+        env:
+          SHA: ${{ github.sha }}
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Should run benchmarks with all precisions
+        if: inputs.all_precisions
+        run: |
+          echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" bench_signed_"${BENCH_COMMAND}"
+        env:
+          OP_FLAVOR: ${{ matrix.op_flavor }}
+          BENCH_TYPE: ${{ matrix.bench_type }}
+          BENCH_COMMAND: ${{ matrix.command }}
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
+          --database tfhe_rs \
+          --hardware "hpc7a.96xlarge" \
+          --project-version "${COMMIT_HASH}" \
+          --branch "${REF_NAME}" \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${BENCH_DATE}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --bench-type "${BENCH_TYPE}"
+        env:
+          REF_NAME: ${{ github.ref_name }}
+          BENCH_TYPE: ${{ matrix.bench_type }}
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        with:
+          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
+          --slab-url "${SLAB_URL}"
+        env:
+          JOB_SECRET: ${{ secrets.JOB_SECRET }}
+          SLAB_URL: ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Signed integer full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-instance:
+    name: benchmark_signed_integer/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, signed-integer-benchmarks ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (signed-integer-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_tfhe_fft.yml
+++ b/.github/workflows/benchmark_tfhe_fft.yml
@@ -26,8 +26,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  setup-instance:
    name: benchmark_tfhe_fft/setup-instance
--- a/.github/workflows/benchmark_tfhe_ntt.yml
+++ b/.github/workflows/benchmark_tfhe_ntt.yml
@@ -26,8 +26,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  setup-instance:
    name: benchmark_tfhe_ntt/setup-instance
--- a/.github/workflows/benchmark_tfhe_zk_pok.yml
+++ b/.github/workflows/benchmark_tfhe_zk_pok.yml
@@ -0,0 +1,198 @@
+# Run benchmarks of the tfhe-zk-pok crate on an instance and return parsed results to Slab CI bot.
+name: benchmark_tfhe_zk_pok
+
+on:
+  workflow_dispatch:
+    inputs:
+      bench_type:
+        description: "Benchmarks type"
+        type: choice
+        default: latency
+        options:
+          - latency
+          - throughput
+  push:
+    branches:
+      - main
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 3a.m.
+    - cron: '0 3 * * 6'
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  BENCH_TYPE: ${{ inputs.bench_type || 'latency' }}
+
+
+permissions: {}
+
+jobs:
+  should-run:
+    name: benchmark_tfhe_zk_pok/should-run
+    runs-on: ubuntu-latest
+    if: github.event_name == 'workflow_dispatch' ||
+      ((github.event_name == 'push' || github.event_name == 'schedule') && github.repository == 'zama-ai/tfhe-rs')
+    outputs:
+      zk_pok_changed: ${{ steps.changed-files.outputs.zk_pok_any_changed }}
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Check for file changes
+        id: changed-files
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        with:
+          files_yaml: |
+            zk_pok:
+              - tfhe-zk-pok/**
+              - .github/workflows/benchmark_tfhe_zk_pok.yml
+
+  setup-instance:
+    name: benchmark_tfhe_zk_pok/setup-instance
+    runs-on: ubuntu-latest
+    needs: should-run
+    if: github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
+      (github.event_name == 'push' &&
+      github.repository == 'zama-ai/tfhe-rs' &&
+      needs.should-run.outputs.zk_pok_changed == 'true')
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          profile: bench
+
+  tfhe-zk-pok-benchmarks:
+    name: benchmark_tfhe_zk_pok/tfhe-zk-pok-benchmarks
+    if: needs.setup-instance.result != 'skipped'
+    needs: setup-instance
+    concurrency:
+      group: ${{ github.workflow_ref }}_${{github.event_name}}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    steps:
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Get benchmark details
+        run: |
+          COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=${COMMIT_DATE}";
+            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+          } >> "${GITHUB_ENV}"
+        env:
+          SHA: ${{ github.sha }}
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Run benchmarks
+        run: |
+          make BENCH_TYPE="${BENCH_TYPE}" bench_tfhe_zk_pok
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
+          --database tfhe_rs \
+          --crate tfhe-zk-pok \
+          --hardware "hpc7a.96xlarge" \
+          --backend cpu \
+          --project-version "${COMMIT_HASH}" \
+          --branch "${REF_NAME}" \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${BENCH_DATE}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --bench-type "${BENCH_TYPE}"
+        env:
+          REF_NAME: ${{ github.ref_name }}
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        with:
+          name: ${{ github.sha }}_tfhe_zk_pok_${{ env.BENCH_TYPE }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
+          --slab-url "${SLAB_URL}"
+        env:
+          JOB_SECRET: ${{ secrets.JOB_SECRET }}
+          SLAB_URL: ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "tfhe-zk-pok benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-instance:
+    name: benchmark_tfhe_zk_pok/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, tfhe-zk-pok-benchmarks ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (tfhe-zk-pok-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/benchmark_wasm_client.yml
+++ b/.github/workflows/benchmark_wasm_client.yml
@@ -24,8 +24,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members and GitHub can trigger this workflow
-
 jobs:
  should-run:
    name: benchmark_wasm_client/should-run
@@ -34,7 +32,7 @@ jobs:
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
      (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
    steps:
--- a/.github/workflows/benchmark_zk_pke.yml
+++ b/.github/workflows/benchmark_zk_pke.yml
@@ -0,0 +1,248 @@
+# Run PKE Zero-Knowledge benchmarks on an instance and return parsed results to Slab CI bot.
+name: benchmark_zk_pke
+
+on:
+  workflow_dispatch:
+    inputs:
+      bench_type:
+        description: "Benchmarks type"
+        type: choice
+        default: latency
+        options:
+          - latency
+          - throughput
+          - both
+
+  push:
+    branches:
+      - main
+  schedule:
+    # Weekly benchmarks will be triggered each Saturday at 3a.m.
+    - cron: '0 3 * * 6'
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+  PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+
+
+permissions: {}
+
+jobs:
+  should-run:
+    name: benchmark_zk_pke/should-run
+    runs-on: ubuntu-latest
+    if: github.event_name == 'workflow_dispatch' ||
+      ((github.event_name == 'push' || github.event_name == 'schedule') && github.repository == 'zama-ai/tfhe-rs')
+    outputs:
+      zk_pok_changed: ${{ steps.changed-files.outputs.zk_pok_any_changed }}
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Check for file changes
+        id: changed-files
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
+        with:
+          files_yaml: |
+            zk_pok:
+              - tfhe/Cargo.toml
+              - tfhe-csprng/**
+              - tfhe-fft/**
+              - tfhe-zk-pok/**
+              - tfhe/src/core_crypto/**
+              - tfhe/src/shortint/**
+              - tfhe/src/integer/**
+              - tfhe/src/zk.rs
+              - tfhe/benches/integer/zk_pke.rs
+              - .github/workflows/zk_pke_benchmark.yml
+
+  prepare-matrix:
+    name: benchmark_zk_pke/prepare-matrix
+    runs-on: ubuntu-latest
+    if: github.event_name != 'schedule' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
+    outputs:
+      bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
+    steps:
+      - name: Set benchmark types
+        if: github.event_name == 'workflow_dispatch'
+        run: |
+          if [[ "${INPUTS_BENCH_TYPE}" == "both" ]]; then
+            echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
+          else
+            echo "BENCH_TYPE=[\"${INPUTS_BENCH_TYPE}\"]" >> "${GITHUB_ENV}"
+          fi
+        env:
+          INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}
+
+      - name: Default benchmark type
+        if: github.event_name != 'workflow_dispatch'
+        run: |
+          echo "BENCH_TYPE=[\"latency\"]" >> "${GITHUB_ENV}"
+
+      - name: Set benchmark types output
+        id: set_bench_type
+        run: | # zizmor: ignore[template-injection] this env variable is safe
+          echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
+
+  setup-instance:
+    name: benchmark_zk_pke/setup-instance
+    runs-on: ubuntu-latest
+    needs: [ should-run, prepare-matrix ]
+    if: github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
+      (github.event_name == 'push' &&
+      github.repository == 'zama-ai/tfhe-rs' &&
+      needs.should-run.outputs.zk_pok_changed == 'true')
+    outputs:
+      runner-name: ${{ steps.start-instance.outputs.label }}
+    steps:
+      - name: Start instance
+        id: start-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: aws
+          profile: bench
+
+  pke-zk-benchmarks:
+    name: benchmark_zk_pke/pke-zk-benchmarks
+    if: needs.setup-instance.result != 'skipped'
+    needs: [ prepare-matrix, setup-instance ]
+    concurrency:
+      group: ${{ github.workflow_ref }}_${{github.event_name}}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    strategy:
+      max-parallel: 1
+      matrix:
+        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
+    steps:
+      - name: Checkout tfhe-rs repo with tags
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          fetch-depth: 0
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Get benchmark details
+        run: |
+          COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
+          {
+            echo "BENCH_DATE=$(date --iso-8601=seconds)";
+            echo "COMMIT_DATE=${COMMIT_DATE}";
+            echo "COMMIT_HASH=$(git describe --tags --dirty)";
+          } >> "${GITHUB_ENV}"
+        env:
+          SHA: ${{ github.sha }}
+
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: nightly
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Run benchmarks with AVX512
+        run: |
+          make BENCH_TYPE="${BENCH_TYPE}" bench_integer_zk
+        env:
+          BENCH_TYPE: ${{ matrix.bench_type }}
+
+      - name: Parse results
+        run: |
+          python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
+          --database tfhe_rs \
+          --hardware "hpc7a.96xlarge" \
+          --backend cpu \
+          --project-version "${COMMIT_HASH}" \
+          --branch "${REF_NAME}" \
+          --commit-date "${COMMIT_DATE}" \
+          --bench-date "${BENCH_DATE}" \
+          --walk-subdirs \
+          --name-suffix avx512 \
+          --bench-type "${BENCH_TYPE}"
+        env:
+          REF_NAME: ${{ github.ref_name }}
+          BENCH_TYPE: ${{ matrix.bench_type }}
+
+      - name: Parse CRS sizes results
+        run: |
+          python3 ./ci/benchmark_parser.py tfhe-benchmark/pke_zk_crs_sizes.csv "${RESULTS_FILENAME}" \
+          --object-sizes \
+          --append-results
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
+        with:
+          name: ${{ github.sha }}_integer_zk_${{ matrix.bench_type }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+        with:
+          repository: zama-ai/slab
+          path: slab
+          persist-credentials: 'false'
+          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
+          --slab-url "${SLAB_URL}"
+        env:
+          JOB_SECRET: ${{ secrets.JOB_SECRET }}
+          SLAB_URL: ${{ secrets.SLAB_URL }}
+
+      - name: Slack Notification
+        if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "PKE ZK benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
+
+  teardown-instance:
+    name: benchmark_zk_pke/teardown-instance
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, pke-zk-benchmarks ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop instance
+        id: stop-instance
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (pke-zk-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
--- a/.github/workflows/cargo_audit.yml
+++ b/.github/workflows/cargo_audit.yml
@@ -19,8 +19,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members and GitHub can trigger this workflow
-
 jobs:
  audit:
    name: cargo_audit/audit
--- a/.github/workflows/cargo_build_common.yml
+++ b/.github/workflows/cargo_build_common.yml
@@ -1,17 +0,0 @@
-name: cargo_build_common
-
-on:
-  workflow_call:
-
-permissions: {}
-
-# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
-
-jobs:
-  placeholder:
-    name: cargo_build_common/placeholder
-    runs-on: ubuntu-latest
-
-    steps:
-      - run: |
-          echo "Hello this is a placeholder workflow"
--- a/.github/workflows/cargo_test_fft.yml
+++ b/.github/workflows/cargo_test_fft.yml
@@ -24,7 +24,7 @@ jobs:
    name: cargo_test_fft/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      fft_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.fft_any_changed }}
    steps:
--- a/.github/workflows/cargo_test_ntt.yml
+++ b/.github/workflows/cargo_test_ntt.yml
@@ -25,7 +25,7 @@ jobs:
    name: cargo_test_ntt/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      ntt_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ntt_any_changed }}
    steps:
--- a/.github/workflows/check_commit.yml
+++ b/.github/workflows/check_commit.yml
@@ -5,8 +5,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow (via manual approval for PR from forks)
-
 jobs:
  check-commit-pr:
    name: check_commit/check-commit-pr (bpr)
--- a/.github/workflows/ci_lint.yml
+++ b/.github/workflows/ci_lint.yml
@@ -12,8 +12,6 @@ env:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow (via manual approval for PR from forks)
-
 jobs:
  lint-check:
    name: ci_lint/lint-check (bpr)
@@ -37,17 +35,11 @@ jobs:
        run: |
          make lint_workflow

-      - name: Get Zimzor version to use
-        id: get_zizmor
-        run: |
-          echo "version=$(make zizmor_version)" >> "${GITHUB_OUTPUT}"
-
      - name: Check workflows security
-        uses: zizmorcore/zizmor-action@e673c3917a1aef3c65c972347ed84ccd013ecda4 # v0.2.0
-        with:
-          advanced-security: 'false' # Print results directly in logs
-          persona: pedantic
-          version: ${{ steps.get_zizmor.outputs.version }}
+        run: |
+          make check_workflow_security
+        env:
+          GH_TOKEN: ${{ env.CHECKOUT_TOKEN }}

      - name: Ensure SHA pinned actions
        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@9e9574ef04ea69da568d6249bd69539ccc704e74 # v4.0.0
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -20,8 +20,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  setup-instance:
    name: code_coverage/setup-instance
--- a/.github/workflows/csprng_randomness_tests.yml
+++ b/.github/workflows/csprng_randomness_tests.yml
@@ -24,8 +24,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  setup-instance:
    name: csprng_randomness_tests/setup-instance
@@ -57,7 +55,7 @@ jobs:
    name: csprng_randomness_tests/csprng-randomness-tests
    needs: setup-instance
    concurrency:
-      group: ${{ github.workflow_ref }}_${{ github.sha }}_${{ github.event_name }}
+      group: ${{ github.workflow_ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    steps:
--- a/.github/workflows/generate_svg_common.yml
+++ b/.github/workflows/generate_svg_common.yml
@@ -1,85 +0,0 @@
-name: generate_svg_common
-
-on:
-  workflow_call:
-    inputs:
-      backend:
-        type: string
-        required: true
-      hardware_name:
-        type: string
-        required: true
-      layer:
-        type: string
-        required: true
-      pbs_kind: # Valid values are 'classical', 'multi_bit' or 'any'
-        type: string
-        required: true
-      grouping_factor: # Valid values are 2, 3, or 4
-        type: string
-        default: 4
-      bench_type: # Valid values are 'latency', 'throughput'
-        type: string
-        required: true
-      time_span_days:
-        type: string
-        default: 60
-      output_filename:
-        type: string
-        required: true
-    secrets:
-      DATA_EXTRACTOR_DATABASE_USER:
-        required: true
-      DATA_EXTRACTOR_DATABASE_HOST:
-        required: true
-      DATA_EXTRACTOR_DATABASE_PASSWORD:
-        required: true
-
-permissions: {}
-
-# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
-
-jobs:
-  generate-table:
-    name: generate_svg_common/generate-table
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout tfhe-rs
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
-        with:
-          persist-credentials: 'false'
-
-      - name: Produce table from database
-        run: |
-          python3 -m pip install -r ci/data_extractor/requirements.txt
-          python3 ci/data_extractor/src/data_extractor.py "${OUTPUT_FILENAME}" \
-          --generate-svg \
-          --branch "${REF_NAME}" \
-          --backend "${BACKEND}" \
-          --hardware "${HARDWARE_NAME}" \
-          --tfhe-rs-layer "${LAYER}" \
-          --pbs-kind "${PBS_KIND}" \
-          --grouping-factor "${GROUPING_FACTOR}" \
-          --bench-type "${BENCH_TYPE}" \
-          --time-span-days "${TIME_SPAN}"
-        env:
-          OUTPUT_FILENAME: ${{ inputs.output_filename }}
-          REF_NAME: ${{ github.ref_name }}
-          BACKEND: ${{ inputs.backend }}
-          HARDWARE_NAME: ${{ inputs.hardware_name }}
-          LAYER: ${{ inputs.layer }}
-          PBS_KIND: ${{ inputs.pbs_kind }}
-          GROUPING_FACTOR: ${{ inputs.grouping_factor }}
-          BENCH_TYPE: ${{ inputs.bench_type }}
-          TIME_SPAN: ${{ inputs.time_span_days }}
-          DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
-          DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
-          DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
-
-      - name: Upload tables
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
-        with:
-          name: ${{ github.sha }}_${{ inputs.backend }}_${{ inputs.layer }}_${{ inputs.pbs_kind }}_${{ inputs.bench_type }}_tables
-          # This will upload all the file generated
-          path: ${{ inputs.output_filename }}*.svg
-          retention-days: 60
--- a/.github/workflows/generate_svgs.yml
+++ b/.github/workflows/generate_svgs.yml
@@ -1,178 +0,0 @@
-# Generate benchmark SVGs for public documentation
-name: generate_documentation_svgs
-
-on:
-  workflow_call:
-    inputs:
-      time_span_days:
-        type: string
-        required: true
-      generate-cpu-svgs:
-        type: boolean
-        default: true
-      generate-gpu-svgs:
-        type: boolean
-        default: true
-      generate-hpu-svgs:
-        type: boolean
-        default: true
-    secrets:
-      DATA_EXTRACTOR_DATABASE_USER:
-        required: true
-      DATA_EXTRACTOR_DATABASE_HOST:
-        required: true
-      DATA_EXTRACTOR_DATABASE_PASSWORD:
-        required: true
-
-permissions: {}
-
-# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
-
-jobs:
-  # -----------------------------------------------------------
-  # Integer benchmarks tables
-  # -----------------------------------------------------------
-
-  cpu-integer-latency-table:
-    name: generate_documentation_svgs/cpu-integer-latency-table
-    uses: ./.github/workflows/generate_svg_common.yml
-    if: inputs.generate-cpu-svgs
-    with:
-      backend: cpu
-      hardware_name: hpc7a.96xlarge
-      layer: integer
-      pbs_kind: classical
-      bench_type: latency
-      time_span_days: ${{ inputs.time_span_days }}
-      output_filename: cpu-integer-benchmark-tuniform-2m128-latency
-    secrets:
-      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
-      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
-      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
-
-#  cpu-integer-throughput-table:
-#    name: generate_documentation_svgs/cpu-integer-latency-table
-#    uses: ./.github/workflows/generate_svg_common.yml
-#    if: inputs.generate-cpu-svgs
-#    with:
-#      backend: cpu
-#      hardware_name: hpc7a.96xlarge
-#      layer: integer
-#      pbs_kind: classical
-#      bench_type: throughput
-#      time_span_days: ${{ inputs.time_span_days }}
-#      output_filename: cpu-integer-benchmark-tuniform-2m128-throughput
-#    secrets:
-#      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
-#      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
-#      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
-
-#  gpu-integer-latency-table:
-#    name: generate_documentation_svgs/gpu-integer-latency-table
-#    uses: ./.github/workflows/generate_svg_common.yml
-#    if: inputs.generate-gpu-svgs
-#    with:
-#      backend: gpu
-#      hardware_name: n3-L40x1
-#      layer: integer
-#      pbs_kind: multi_bit
-#      grouping_factor: 4
-#      bench_type: latency
-#      time_span_days: ${{ inputs.time_span_days }}
-#      output_filename: gpu-integer-benchmark-h100x8-sxm5-multi-bit-tuniform-2m128-latency
-#    secrets:
-#      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
-#      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
-#      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
-
-#  gpu-integer-throughput-table:
-#    name: generate_documentation_svgs/gpu-integer-throughput-table
-#    uses: ./.github/workflows/generate_svg_common.yml
-#    if: inputs.generate-gpu-svgs
-#    with:
-#      backend: gpu
-#      hardware_name: n3-L40x1
-#      layer: integer
-#      pbs_kind: multi_bit
-#      grouping_factor: 4
-#      bench_type: throughput
-#      time_span_days: ${{ inputs.time_span_days }}
-#      output_filename: gpu-integer-benchmark-h100x8-sxm5-multi-bit-tuniform-2m128-throughput
-#    secrets:
-#      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
-#      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
-#      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
-
-  hpu-integer-latency-table:
-    name: generate_documentation_svgs/hpu-integer-latency-table
-    uses: ./.github/workflows/generate_svg_common.yml
-    if: inputs.generate-hpu-svgs
-    with:
-      backend: hpu
-      hardware_name: hpu_x1
-      layer: integer
-      pbs_kind: classical
-      bench_type: latency
-      time_span_days: ${{ inputs.time_span_days }}
-      output_filename: hpu-integer-benchmark-hpux1-tuniform-2m128-latency
-    secrets:
-      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
-      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
-      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
-
-  hpu-integer-throughput-table:
-    name: generate_documentation_svgs/hpu-integer-throughput-table
-    uses: ./.github/workflows/generate_svg_common.yml
-    if: inputs.generate-hpu-svgs
-    with:
-      backend: hpu
-      hardware_name: hpu_x1
-      layer: integer
-      pbs_kind: classical
-      bench_type: throughput
-      time_span_days: ${{ inputs.time_span_days }}
-      output_filename: hpu-integer-benchmark-hpux1-tuniform-2m128-throughput
-    secrets:
-      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
-      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
-      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
-
-  # -----------------------------------------------------------
-  # PBS benchmarks tables
-  # -----------------------------------------------------------
-
-  cpu-pbs-tables:
-    name: generate_documentation_svgs/cpu-pbs-tables
-    uses: ./.github/workflows/generate_svg_common.yml
-    if: inputs.generate-cpu-svgs
-    with:
-      backend: cpu
-      hardware_name: hpc7a.96xlarge
-      layer: core_crypto
-      pbs_kind: any
-      grouping_factor: 4
-      bench_type: latency
-      time_span_days: ${{ inputs.time_span_days }}
-      output_filename: cpu-pbs-benchmark
-    secrets:
-      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
-      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
-      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
-
-  gpu-pbs-tables:
-    name: generate_documentation_svgs/gpu-pbs-tables
-    uses: ./.github/workflows/generate_svg_common.yml
-    if: inputs.generate-gpu-svgs
-    with:
-      backend: gpu
-      hardware_name: n3-L40x1
-      layer: core_crypto
-      pbs_kind: any
-      grouping_factor: 4
-      bench_type: latency
-      time_span_days: ${{ inputs.time_span_days }}
-      output_filename: gpu-pbs-benchmark
-    secrets:
-      DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
-      DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
-      DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
--- a/.github/workflows/gpu_4090_tests.yml
+++ b/.github/workflows/gpu_4090_tests.yml
@@ -25,8 +25,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] only Zama organization members and GitHub can trigger this workflow
-
 jobs:
  cuda-tests-linux:
    name: gpu_4090_tests/cuda-tests-linux
--- a/.github/workflows/gpu_code_validation_tests.yml
+++ b/.github/workflows/gpu_code_validation_tests.yml
@@ -29,8 +29,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  setup-instance:
    name: gpu_code_validation_tests/setup-instance
--- a/.github/workflows/gpu_fast_h100_tests.yml
+++ b/.github/workflows/gpu_fast_h100_tests.yml
@@ -28,14 +28,12 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: gpu_fast_h100_tests/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
--- a/.github/workflows/gpu_fast_tests.yml
+++ b/.github/workflows/gpu_fast_tests.yml
@@ -27,14 +27,12 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: gpu_fast_tests/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
--- a/.github/workflows/gpu_full_h100_tests.yml
+++ b/.github/workflows/gpu_full_h100_tests.yml
@@ -18,8 +18,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  setup-instance:
    name: gpu_full_h100_tests/setup-instance
--- a/.github/workflows/gpu_full_multi_gpu_tests.yml
+++ b/.github/workflows/gpu_full_multi_gpu_tests.yml
@@ -28,14 +28,12 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: gpu_full_multi_gpu_tests/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
--- a/.github/workflows/gpu_integer_long_run_tests.yml
+++ b/.github/workflows/gpu_integer_long_run_tests.yml
@@ -25,8 +25,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  setup-instance:
    name: gpu_integer_long_run_tests/setup-instance
--- a/.github/workflows/gpu_memory_sanitizer.yml
+++ b/.github/workflows/gpu_memory_sanitizer.yml
@@ -28,8 +28,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  setup-instance:
    name: gpu_memory_sanitizer/setup-instance
--- a/.github/workflows/gpu_pcc.yml
+++ b/.github/workflows/gpu_pcc.yml
@@ -26,8 +26,6 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow (via manual approval for PR from forks)
-
 jobs:
  setup-instance:
    name: gpu_pcc/setup-instance
--- a/.github/workflows/gpu_signed_integer_classic_tests.yml
+++ b/.github/workflows/gpu_signed_integer_classic_tests.yml
@@ -28,14 +28,12 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: gpu_signed_integer_classic_tests/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
--- a/.github/workflows/gpu_signed_integer_h100_tests.yml
+++ b/.github/workflows/gpu_signed_integer_h100_tests.yml
@@ -28,14 +28,12 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: gpu_signed_integer_h100_tests/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
--- a/.github/workflows/gpu_signed_integer_tests.yml
+++ b/.github/workflows/gpu_signed_integer_tests.yml
@@ -29,14 +29,12 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: gpu_signed_integer_tests/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
--- a/.github/workflows/gpu_unsigned_integer_classic_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_classic_tests.yml
@@ -28,14 +28,12 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: gpu_unsigned_integer_classic_tests/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
--- a/.github/workflows/gpu_unsigned_integer_h100_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_h100_tests.yml
@@ -28,14 +28,12 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: gpu_unsigned_integer_h100_tests/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
--- a/.github/workflows/gpu_unsigned_integer_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_tests.yml
@@ -29,14 +29,12 @@ on:
 permissions:
  contents: read

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  should-run:
    name: gpu_unsigned_integer_tests/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
    steps:
--- a/.github/workflows/hpu_hlapi_tests.yml
+++ b/.github/workflows/hpu_hlapi_tests.yml
@@ -16,14 +16,15 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
  cancel-in-progress: true

-permissions: {}
+
+permissions: { }

 jobs:
  should-run:
    name: hpu_hlapi_tests/should-run
    runs-on: ubuntu-latest
    permissions:
-      pull-requests: read  # Needed to check for file change
+      pull-requests: read
    outputs:
      hpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.hpu_any_changed }}
    steps:
--- a/.github/workflows/integer_long_run_tests.yml
+++ b/.github/workflows/integer_long_run_tests.yml
@@ -21,8 +21,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
-
 jobs:
  setup-instance:
    name: integer_long_run_tests/setup-instance
--- a/.github/workflows/make_release_common.yml
+++ b/.github/workflows/make_release_common.yml
@@ -33,8 +33,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
-
 jobs:
  verify-triggering-actor:
    name: make_release_common/verify-triggering-actor
@@ -77,9 +75,12 @@ jobs:
    needs: package
    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
    permissions:
-      actions: read # Needed to detect the GitHub Actions environment
-      id-token: write # Needed to create the provenance via GitHub OIDC
-      contents: write # Needed to upload assets/artifacts
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
    with:
      # SHA-256 hashes of the Crate package.
      base64-subjects: ${{ needs.package.outputs.hash }}
@@ -90,7 +91,8 @@ jobs:
    needs: package
    runs-on: ubuntu-latest
    permissions:
-      id-token: write # Needed for OIDC token exchange on crates.io
+      # Needed for OIDC token exchange on crates.io
+      id-token: write
    steps:
      - name: Checkout
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
@@ -106,7 +108,7 @@ jobs:
          path: target/package

      - name: Authenticate on registry
-        uses: rust-lang/crates-io-auth-action@b7e9a28eded4986ec6b1fa40eeee8f8f165559ec # v1.0.3
+        uses: rust-lang/crates-io-auth-action@041cce5b4b821e6b0ebc9c9c38b58cac4e34dcc2 # v1.0.2
        id: auth

      - name: Publish crate.io package
--- a/.github/workflows/make_release_cuda.yml
+++ b/.github/workflows/make_release_cuda.yml
@@ -17,8 +17,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  verify-triggering-actor:
    name: make_release_cuda/verify-triggering-actor
@@ -119,9 +117,12 @@ jobs:
    needs: [package]
    uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
    permissions:
-      actions: read # Needed to detect the GitHub Actions environment
-      id-token: write # Needed to create the provenance via GitHub OIDC
-      contents: write # Needed to upload assets/artifacts
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
    with:
      # SHA-256 hashes of the Crate package.
      base64-subjects: ${{ needs.package.outputs.hash }}
@@ -131,7 +132,8 @@ jobs:
    needs: [setup-instance, package] # for comparing hashes
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    permissions:
-      id-token: write # Needed for OIDC token exchange on crates.io
+      # Needed for OIDC token exchange on crates.io
+      id-token: write
    strategy:
      fail-fast: false
      # explicit include-based build matrix, of known valid options
@@ -180,7 +182,7 @@ jobs:
          path: target/package

      - name: Authenticate on registry
-        uses: rust-lang/crates-io-auth-action@b7e9a28eded4986ec6b1fa40eeee8f8f165559ec # v1.0.3
+        uses: rust-lang/crates-io-auth-action@041cce5b4b821e6b0ebc9c9c38b58cac4e34dcc2 # v1.0.2
        id: auth

      - name: Publish crate.io package
--- a/.github/workflows/make_release_hpu.yml
+++ b/.github/workflows/make_release_hpu.yml
@@ -17,8 +17,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  make-release:
    name: make_release_hpu/make-release
@@ -27,9 +25,12 @@ jobs:
      package-name: "tfhe-hpu-backend"
      dry-run: ${{ inputs.dry_run }}
    permissions:
-      actions: read # Needed to detect the GitHub Actions environment
-      id-token: write # Needed to create the provenance via GitHub OIDC
-      contents: write # Needed to upload assets/artifacts
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe.yml
+++ b/.github/workflows/make_release_tfhe.yml
@@ -35,8 +35,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  make-release:
    name: make_release_tfhe/make-release
@@ -45,9 +43,12 @@ jobs:
      package-name: "tfhe"
      dry-run: ${{ inputs.dry_run }}
    permissions:
-      actions: read # Needed to detect the GitHub Actions environment
-      id-token: write # Needed to create the provenance via GitHub OIDC
-      contents: write # Needed to upload assets/artifacts
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_csprng.yml
+++ b/.github/workflows/make_release_tfhe_csprng.yml
@@ -10,8 +10,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  make-release:
    name: make_release_tfhe_csprng/make-release
@@ -20,9 +18,12 @@ jobs:
      package-name: "tfhe-csprng"
      dry-run: ${{ inputs.dry_run }}
    permissions:
-      actions: read # Needed to detect the GitHub Actions environment
-      id-token: write # Needed to create the provenance via GitHub OIDC
-      contents: write # Needed to upload assets/artifacts
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_fft.yml
+++ b/.github/workflows/make_release_tfhe_fft.yml
@@ -18,8 +18,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  make-release:
    name: make_release_tfhe_fft/make-release
@@ -28,9 +26,12 @@ jobs:
      package-name: "tfhe-fft"
      dry-run: ${{ inputs.dry_run }}
    permissions:
-      actions: read # Needed to detect the GitHub Actions environment
-      id-token: write # Needed to create the provenance via GitHub OIDC
-      contents: write # Needed to upload assets/artifacts
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_ntt.yml
+++ b/.github/workflows/make_release_tfhe_ntt.yml
@@ -18,8 +18,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  make-release:
    name: make_release_tfhe_ntt/make-release
@@ -28,9 +26,12 @@ jobs:
      package-name: "tfhe-ntt"
      dry-run: ${{ inputs.dry_run }}
    permissions:
-      actions: read # Needed to detect the GitHub Actions environment
-      id-token: write # Needed to create the provenance via GitHub OIDC
-      contents: write # Needed to upload assets/artifacts
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_tfhe_versionable.yml
+++ b/.github/workflows/make_release_tfhe_versionable.yml
@@ -17,8 +17,6 @@ env:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  make-release-derive:
    name: make_release_tfhe_versionable/make-release-derive
@@ -27,9 +25,12 @@ jobs:
      package-name: "tfhe-versionable-derive"
      dry-run: ${{ inputs.dry_run }}
    permissions:
-      actions: read # Needed to detect the GitHub Actions environment
-      id-token: write # Needed to create the provenance via GitHub OIDC
-      contents: write # Needed to upload assets/artifacts
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
@@ -46,9 +47,12 @@ jobs:
      package-name: "tfhe-versionable"
      dry-run: ${{ inputs.dry_run }}
    permissions:
-      actions: read # Needed to detect the GitHub Actions environment
-      id-token: write # Needed to create the provenance via GitHub OIDC
-      contents: write # Needed to upload assets/artifacts
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/make_release_zk_pok.yml
+++ b/.github/workflows/make_release_zk_pok.yml
@@ -17,8 +17,6 @@ env:

 permissions: { }

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  make-release:
    name: make_release_zk_pok/make-release
@@ -27,9 +25,12 @@ jobs:
      package-name: "tfhe-zk-pok"
      dry-run: ${{ inputs.dry_run }}
    permissions:
-      actions: read # Needed to detect the GitHub Actions environment
-      id-token: write # Needed to create the provenance via GitHub OIDC
-      contents: write # Needed to upload assets/artifacts
+      # Needed to detect the GitHub Actions environment
+      actions: read
+      # Needed to create the provenance via GitHub OIDC
+      id-token: write
+      # Needed to upload assets/artifacts
+      contents: write
    secrets:
      BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
      SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
--- a/.github/workflows/parameters_check.yml
+++ b/.github/workflows/parameters_check.yml
@@ -14,8 +14,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members and GitHub can trigger this workflow
-
 jobs:
  params-curves-security-check:
    name: parameters_check/params-curves-security-check
@@ -32,7 +30,7 @@ jobs:
        with:
          repository: malb/lattice-estimator
          path: lattice_estimator
-          ref: '352ddaf4a288a0543f5d9eb588d2f89c7acec463'
+          ref: 'e35f45b7976a90a79c3c6625a45bbc344c1abc67'
          persist-credentials: 'false'

      - name: Install Sage
--- a/.github/workflows/placeholder_workflow.yml
+++ b/.github/workflows/placeholder_workflow.yml
@@ -6,8 +6,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
-
 jobs:
  placeholder:
    name: placeholder_workflow/placeholder
--- a/.github/workflows/sync_on_push.yml
+++ b/.github/workflows/sync_on_push.yml
@@ -7,11 +7,7 @@ on:
      - 'main'
  workflow_dispatch:

-permissions: {}
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.sha }}
-  cancel-in-progress: ${{ github.event_name == 'push' }}
+permissions: { }

 jobs:
  sync-repo:
--- a/.github/workflows/unverified_prs.yml
+++ b/.github/workflows/unverified_prs.yml
@@ -6,16 +6,13 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] only GitHub can trigger this workflow
-
-
 jobs:
  stale:
    name: unverified_prs/stale
    runs-on: ubuntu-latest
    permissions:
-      issues: read # Needed to fetch all issues
-      pull-requests: write # Needed to write message and close the PR
+      issues: read
+      pull-requests: write
    steps:
      - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
        with:
--- a/.github/workflows/verify_triggering_actor.yml
+++ b/.github/workflows/verify_triggering_actor.yml
@@ -11,8 +11,6 @@ on:

 permissions: {}

-# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency
-
 jobs:
  check-actor:
    name: verify_triggering_actor/check-actor
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -5,7 +5,7 @@ This document provides guidance on how to contribute to **TFHE-rs**.
 There are two ways to contribute:

 - **Report issues:** Open issues on GitHub to report bugs, suggest improvements, or note typos.
- **Submit code**: To become an official contributor, you must sign our Contributor License Agreement (CLA). Our CLA-bot will guide you through this process when you open your first pull request.
+- **Submit codes**: To become an official contributor, you must sign our Contributor License Agreement (CLA). Our CLA-bot will guide you through this process when you open your first pull request.

 ## 1. Setting up the project

--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,10 +22,6 @@ exclude = [
    "utils/tfhe-lints",
    "apps/trivium",
 ]
-
-[workspace.package]
-rust-version = "1.85"
-
 [workspace.dependencies]
 aligned-vec = { version = "0.6", default-features = false }
 bytemuck = "<1.24"
--- a/54
+++ b/54
@@ -7,7 +7,7 @@ CPU_COUNT=$(shell ./scripts/cpu_count.sh)
 RS_BUILD_TOOLCHAIN:=stable
 CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
 CARGO_PROFILE?=release
-MIN_RUST_VERSION:=$(shell grep '^rust-version[[:space:]]*=' Cargo.toml | cut -d '=' -f 2 | xargs)
+MIN_RUST_VERSION:=$(shell grep '^rust-version[[:space:]]*=' tfhe/Cargo.toml | cut -d '=' -f 2 | xargs)
 AVX512_SUPPORT?=OFF
 WASM_RUSTFLAGS:=
 BIG_TESTS_INSTANCE?=FALSE
@@ -15,7 +15,7 @@ GEN_KEY_CACHE_MULTI_BIT_ONLY?=FALSE
 GEN_KEY_CACHE_COVERAGE_ONLY?=FALSE
 PARSE_INTEGER_BENCH_CSV_FILE?=tfhe_rs_integer_benches.csv
 FAST_TESTS?=FALSE
-BIT_SIZES_SET?=ALL
+FAST_BENCH?=FALSE
 NIGHTLY_TESTS?=FALSE
 BENCH_OP_FLAVOR?=DEFAULT
 BENCH_TYPE?=latency
@@ -30,8 +30,6 @@ WASM_PACK_VERSION="0.13.1"
 WASM_BINDGEN_VERSION:=$(shell cargo tree --target wasm32-unknown-unknown -e all --prefix none | grep "wasm-bindgen v" | head -n 1 | cut -d 'v' -f2)
 WEB_RUNNER_DIR=web-test-runner
 WEB_SERVER_DIR=tfhe/web_wasm_parallel_tests
-TYPOS_VERSION=1.39.0
-ZIZMOR_VERSION=1.16.2
 # This is done to avoid forgetting it, we still precise the RUSTFLAGS in the commands to be able to
 # copy paste the command in the terminal and change them if required without forgetting the flags
 export RUSTFLAGS?=-C target-cpu=native
@@ -184,17 +182,15 @@ install_cargo_audit: install_rs_build_toolchain

 .PHONY: install_typos_checker # Install typos checker
 install_typos_checker: install_rs_build_toolchain
-	@./scripts/install_typos.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-	--typos-version $(TYPOS_VERSION)
+	@typos --version > /dev/null 2>&1 || \
+	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked typos-cli || \
+	( echo "Unable to install typos-cli, unknown error." && exit 1 )

 .PHONY: install_zizmor # Install zizmor workflow security checker
 install_zizmor: install_rs_build_toolchain
-	@./scripts/install_zizmor.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
-	--zizmor-version $(ZIZMOR_VERSION)
-
-.PHONY: zizmor_version  # Return zizmor version that will be installed
-zizmor_version:
-	@echo "$(ZIZMOR_VERSION)"
+	@zizmor --version > /dev/null 2>&1 || \
+	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked zizmor --version ~1.9 || \
+	( echo "Unable to install zizmor, unknown error." && exit 1 )

 .PHONY: install_cargo_cross # Install cross for big endian tests
 install_cargo_cross: install_rs_build_toolchain
@@ -1345,28 +1341,28 @@ print_doc_bench_parameters:

 .PHONY: bench_integer # Run benchmarks for unsigned integer
 bench_integer: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer \
 	--features=integer,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --

 .PHONY: bench_signed_integer # Run benchmarks for signed integer
 bench_signed_integer: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-signed \
 	--features=integer,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --

 .PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
 bench_integer_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer \
 	--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --profile release_lto_off --

 .PHONY: bench_signed_integer_gpu # Run benchmarks for signed integer on GPU backend
 bench_signed_integer_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-signed \
 	--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --profile release_lto_off --
@@ -1375,7 +1371,7 @@ bench_signed_integer_gpu: install_rs_check_toolchain
 bench_integer_hpu: install_rs_check_toolchain
 	source ./setup_hpu.sh --config $(HPU_CONFIG); \
 	export V80_PCIE_DEV=${V80_PCIE_DEV}; \
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer \
 	--features=integer,internal-keycache,pbs-stats,hpu,hpu-v80 -p tfhe-benchmark -- --quick
@@ -1411,7 +1407,7 @@ bench_integer_zk_gpu: install_rs_check_toolchain
 .PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
 bench_integer_multi_bit: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=MULTI_BIT __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
-	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer \
 	--features=integer,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --
@@ -1419,7 +1415,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
 .PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
 bench_signed_integer_multi_bit: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=MULTI_BIT __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
-	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-signed \
 	--features=integer,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --
@@ -1427,7 +1423,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
 .PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
 bench_integer_multi_bit_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=MULTI_BIT \
-	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer \
 	--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --profile release_lto_off --
@@ -1435,7 +1431,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
 .PHONY: bench_signed_integer_multi_bit_gpu # Run benchmarks for signed integer on GPU backend using multi-bit parameters
 bench_signed_integer_multi_bit_gpu: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=MULTI_BIT \
-	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench integer-signed \
 	--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p tfhe-benchmark --profile release_lto_off --
@@ -1491,7 +1487,7 @@ bench_pbs: install_rs_check_toolchain

 .PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
 bench_pbs_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench core_crypto-pbs \
 	--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p tfhe-benchmark --profile release_lto_off
@@ -1577,29 +1573,25 @@ bench_hlapi_hpu: install_rs_check_toolchain

 .PHONY: bench_hlapi_erc20 # Run benchmarks for ERC20 operations
 bench_hlapi_erc20: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-erc20 \
 	--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --

 .PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ERC20 operations on GPU
 bench_hlapi_erc20_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
-    cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-erc20 \
 	--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --profile release_lto_off --

 .PHONY: bench_hlapi_dex # Run benchmarks for DEX operations
 bench_hlapi_dex: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-dex \
 	--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --

 .PHONY: bench_hlapi_dex_gpu # Run benchmarks for DEX operations on GPU
 bench_hlapi_dex_gpu: install_rs_check_toolchain
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
-	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-dex \
 	--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --profile release_lto_off --

@@ -1607,7 +1599,7 @@ bench_hlapi_dex_gpu: install_rs_check_toolchain
 bench_hlapi_erc20_hpu: install_rs_check_toolchain
 	source ./setup_hpu.sh --config $(HPU_CONFIG); \
 	export V80_PCIE_DEV=${V80_PCIE_DEV}; \
-	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
+	RUSTFLAGS="$(RUSTFLAGS)" \
 	cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
 	--bench hlapi-erc20 \
 	--features=integer,internal-keycache,hpu,hpu-v80,pbs-stats -p tfhe-benchmark --
--- a/backends/tfhe-cuda-backend/build.rs
+++ b/backends/tfhe-cuda-backend/build.rs
@@ -84,7 +84,6 @@ fn main() {
            "cuda/include/ciphertext.h",
            "cuda/include/integer/compression/compression.h",
            "cuda/include/integer/integer.h",
-            "cuda/include/integer/rerand.h",
            "cuda/include/aes/aes.h",
            "cuda/include/zk/zk.h",
            "cuda/include/keyswitch/keyswitch.h",
--- a/backends/tfhe-cuda-backend/cuda/include/aes/aes.h
+++ b/backends/tfhe-cuda-backend/cuda/include/aes/aes.h
@@ -39,29 +39,6 @@ void cuda_integer_key_expansion_64(CudaStreamsFFI streams,

 void cleanup_cuda_integer_key_expansion_64(CudaStreamsFFI streams,
                                           int8_t **mem_ptr_void);
-
-void cuda_integer_aes_ctr_256_encrypt_64(
-    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output,
-    CudaRadixCiphertextFFI const *iv, CudaRadixCiphertextFFI const *round_keys,
-    const uint64_t *counter_bits_le_all_blocks, uint32_t num_aes_inputs,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks);
-
-uint64_t scratch_cuda_integer_key_expansion_256_64(
-    CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
-    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
-    uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
-    PBS_TYPE pbs_type, bool allocate_gpu_memory,
-    PBS_MS_REDUCTION_T noise_reduction_type);
-
-void cuda_integer_key_expansion_256_64(CudaStreamsFFI streams,
-                                       CudaRadixCiphertextFFI *expanded_keys,
-                                       CudaRadixCiphertextFFI const *key,
-                                       int8_t *mem_ptr, void *const *bsks,
-                                       void *const *ksks);
-
-void cleanup_cuda_integer_key_expansion_256_64(CudaStreamsFFI streams,
-                                               int8_t **mem_ptr_void);
 }

 #endif
--- a/backends/tfhe-cuda-backend/cuda/include/aes/aes_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/aes/aes_utilities.h
@@ -20,7 +20,7 @@ template <typename Torus> struct int_aes_lut_buffers {
                      bool allocate_gpu_memory, uint32_t num_aes_inputs,
                      uint32_t sbox_parallelism, uint64_t &size_tracker) {

-    constexpr uint32_t AES_STATE_BITS = 128;
+    constexpr uint32_t AES_STATE_BITS = 64;
    constexpr uint32_t SBOX_MAX_AND_GATES = 18;

    this->and_lut = new int_radix_lut<Torus>(
@@ -238,7 +238,7 @@ template <typename Torus> struct int_aes_main_workspaces {
                          bool allocate_gpu_memory, uint32_t num_aes_inputs,
                          uint32_t sbox_parallelism, uint64_t &size_tracker) {

-    constexpr uint32_t AES_STATE_BITS = 128;
+    constexpr uint32_t AES_STATE_BITS = 64;
    constexpr uint32_t SBOX_MAX_AND_GATES = 18;
    constexpr uint32_t BATCH_BUFFER_OPERANDS = 3;

@@ -397,70 +397,7 @@ template <typename Torus> struct int_key_expansion_buffer {
    this->params = params;
    this->allocate_gpu_memory = allocate_gpu_memory;

-    constexpr uint32_t TOTAL_WORDS = 44;
-    constexpr uint32_t BITS_PER_WORD = 32;
-    constexpr uint32_t TOTAL_BITS = TOTAL_WORDS * BITS_PER_WORD;
-
-    this->words_buffer = new CudaRadixCiphertextFFI;
-    create_zero_radix_ciphertext_async<Torus>(
-        streams.stream(0), streams.gpu_index(0), this->words_buffer, TOTAL_BITS,
-        params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
-
-    this->tmp_word_buffer = new CudaRadixCiphertextFFI;
-    create_zero_radix_ciphertext_async<Torus>(
-        streams.stream(0), streams.gpu_index(0), this->tmp_word_buffer,
-        BITS_PER_WORD, params.big_lwe_dimension, size_tracker,
-        allocate_gpu_memory);
-
-    this->tmp_rotated_word_buffer = new CudaRadixCiphertextFFI;
-    create_zero_radix_ciphertext_async<Torus>(
-        streams.stream(0), streams.gpu_index(0), this->tmp_rotated_word_buffer,
-        BITS_PER_WORD, params.big_lwe_dimension, size_tracker,
-        allocate_gpu_memory);
-
-    this->aes_encrypt_buffer = new int_aes_encrypt_buffer<Torus>(
-        streams, params, allocate_gpu_memory, 1, 4, size_tracker);
-  }
-
-  void release(CudaStreams streams) {
-    release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
-                                   this->words_buffer, allocate_gpu_memory);
-    delete this->words_buffer;
-
-    release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
-                                   this->tmp_word_buffer, allocate_gpu_memory);
-    delete this->tmp_word_buffer;
-
-    release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
-                                   this->tmp_rotated_word_buffer,
-                                   allocate_gpu_memory);
-    delete this->tmp_rotated_word_buffer;
-
-    this->aes_encrypt_buffer->release(streams);
-    delete this->aes_encrypt_buffer;
-    cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
-  }
-};
-
-template <typename Torus> struct int_key_expansion_256_buffer {
-  int_radix_params params;
-  bool allocate_gpu_memory;
-
-  CudaRadixCiphertextFFI *words_buffer;
-
-  CudaRadixCiphertextFFI *tmp_word_buffer;
-  CudaRadixCiphertextFFI *tmp_rotated_word_buffer;
-
-  int_aes_encrypt_buffer<Torus> *aes_encrypt_buffer;
-
-  int_key_expansion_256_buffer(CudaStreams streams,
-                               const int_radix_params &params,
-                               bool allocate_gpu_memory,
-                               uint64_t &size_tracker) {
-    this->params = params;
-    this->allocate_gpu_memory = allocate_gpu_memory;
-
-    constexpr uint32_t TOTAL_WORDS = 60;
+    constexpr uint32_t TOTAL_WORDS = 22;
    constexpr uint32_t BITS_PER_WORD = 32;
    constexpr uint32_t TOTAL_BITS = TOTAL_WORDS * BITS_PER_WORD;

--- a/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression.h
@@ -17,9 +17,9 @@ uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64(
    uint32_t encryption_glwe_dimension, uint32_t encryption_polynomial_size,
    uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
    uint32_t lwe_dimension, uint32_t pbs_level, uint32_t pbs_base_log,
-    uint32_t grouping_factor, uint32_t num_blocks_to_decompress,
-    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
+    uint32_t num_blocks_to_decompress, uint32_t message_modulus,
+    uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type);

 void cuda_integer_compress_radix_ciphertext_64(
    CudaStreamsFFI streams, CudaPackedGlweCiphertextListFFI *glwe_array_out,
--- a/backends/tfhe-cuda-backend/cuda/include/integer/rerand.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/rerand.h
@@ -1,19 +0,0 @@
-#pragma once
-
-#include "integer.h"
-
-extern "C" {
-uint64_t
-scratch_cuda_rerand_64(CudaStreamsFFI streams, int8_t **mem_ptr,
-                       uint32_t big_lwe_dimension, uint32_t small_lwe_dimension,
-                       uint32_t ks_level, uint32_t ks_base_log,
-                       uint32_t lwe_ciphertext_count, uint32_t message_modulus,
-                       uint32_t carry_modulus, bool allocate_gpu_memory);
-
-void cuda_rerand_64(
-    CudaStreamsFFI streams, void *lwe_array,
-    const void *lwe_flattened_encryptions_of_zero_compact_array_in,
-    int8_t *mem_ptr, void *const *ksk);
-
-void cleanup_cuda_rerand(CudaStreamsFFI streams, int8_t **mem_ptr_void);
-}
--- a/backends/tfhe-cuda-backend/cuda/include/integer/rerand_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/rerand_utilities.h
@@ -1,78 +0,0 @@
-#pragma once
-
-#include "integer_utilities.h"
-#include "keyswitch/ks_enums.h"
-#include "zk/expand.cuh"
-#include "zk/zk_utilities.h"
-
-template <typename Torus> struct int_rerand_mem {
-  int_radix_params params;
-
-  Torus *tmp_zero_lwes;
-  Torus *tmp_ksed_zero_lwes;
-  Torus *lwe_trivial_indexes;
-  uint32_t num_lwes;
-
-  bool gpu_memory_allocated;
-
-  expand_job<Torus> *d_expand_jobs;
-  expand_job<Torus> *h_expand_jobs;
-
-  int_rerand_mem(CudaStreams streams, int_radix_params params,
-                 const uint32_t num_lwes, const bool allocate_gpu_memory,
-                 uint64_t &size_tracker)
-      : params(params), num_lwes(num_lwes),
-        gpu_memory_allocated(allocate_gpu_memory) {
-
-    tmp_zero_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
-        num_lwes * (params.big_lwe_dimension + 1) * sizeof(Torus),
-        streams.stream(0), streams.gpu_index(0), size_tracker,
-        allocate_gpu_memory);
-
-    tmp_ksed_zero_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
-        num_lwes * (params.small_lwe_dimension + 1) * sizeof(Torus),
-        streams.stream(0), streams.gpu_index(0), size_tracker,
-        allocate_gpu_memory);
-
-    d_expand_jobs =
-        static_cast<expand_job<Torus> *>(cuda_malloc_with_size_tracking_async(
-            num_lwes * sizeof(expand_job<Torus>), streams.stream(0),
-            streams.gpu_index(0), size_tracker, allocate_gpu_memory));
-
-    h_expand_jobs = static_cast<expand_job<Torus> *>(
-        malloc(num_lwes * sizeof(expand_job<Torus>)));
-
-    auto h_lwe_trivial_indexes =
-        static_cast<Torus *>(malloc(num_lwes * sizeof(Torus)));
-    for (auto i = 0; i < num_lwes; ++i) {
-      h_lwe_trivial_indexes[i] = i;
-    }
-    lwe_trivial_indexes = (Torus *)cuda_malloc_with_size_tracking_async(
-        num_lwes * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
-        size_tracker, allocate_gpu_memory);
-    cuda_memcpy_async_to_gpu(lwe_trivial_indexes, h_lwe_trivial_indexes,
-                             num_lwes * sizeof(Torus), streams.stream(0),
-                             streams.gpu_index(0));
-
-    cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
-
-    free(h_lwe_trivial_indexes);
-  }
-
-  void release(CudaStreams streams) {
-    cuda_drop_with_size_tracking_async(tmp_zero_lwes, streams.stream(0),
-                                       streams.gpu_index(0),
-                                       gpu_memory_allocated);
-    cuda_drop_with_size_tracking_async(tmp_ksed_zero_lwes, streams.stream(0),
-                                       streams.gpu_index(0),
-                                       gpu_memory_allocated);
-    cuda_drop_with_size_tracking_async(lwe_trivial_indexes, streams.stream(0),
-                                       streams.gpu_index(0),
-                                       gpu_memory_allocated);
-    cuda_drop_with_size_tracking_async(d_expand_jobs, streams.stream(0),
-                                       streams.gpu_index(0),
-                                       gpu_memory_allocated);
-    cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
-    free(h_expand_jobs);
-  }
-};
--- a/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_multibit_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/pbs/pbs_multibit_utilities.h
@@ -97,13 +97,12 @@ uint64_t get_buffer_size_full_sm_tbc_multibit_programmable_bootstrap(
    uint32_t polynomial_size);

 template <typename Torus, class params>
-uint64_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
-                            uint32_t polynomial_size, uint32_t glwe_dimension,
-                            uint32_t level_count, uint64_t full_sm_keybundle);
+uint32_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
+                            uint32_t polynomial_size,
+                            uint64_t full_sm_keybundle);
 template <typename Torus, class params>
-uint64_t get_lwe_chunk_size_128(uint32_t gpu_index, uint32_t max_num_pbs,
+uint32_t get_lwe_chunk_size_128(uint32_t gpu_index, uint32_t max_num_pbs,
                                uint32_t polynomial_size,
-                                uint32_t glwe_dimension, uint32_t level_count,
                                uint64_t full_sm_keybundle);
 template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
  int8_t *d_mem_keybundle = NULL;
@@ -111,7 +110,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
  int8_t *d_mem_acc_step_two = NULL;
  int8_t *d_mem_acc_cg = NULL;
  int8_t *d_mem_acc_tbc = NULL;
-  uint64_t lwe_chunk_size;
+  uint32_t lwe_chunk_size;
  double2 *keybundle_fft;
  Torus *global_accumulator;
  double2 *global_join_buffer;
@@ -121,7 +120,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {

  pbs_buffer(cudaStream_t stream, uint32_t gpu_index, uint32_t glwe_dimension,
             uint32_t polynomial_size, uint32_t level_count,
-             uint32_t input_lwe_ciphertext_count, uint64_t lwe_chunk_size,
+             uint32_t input_lwe_ciphertext_count, uint32_t lwe_chunk_size,
             PBS_VARIANT pbs_variant, bool allocate_gpu_memory,
             uint64_t &size_tracker) {
    gpu_memory_allocated = allocate_gpu_memory;
@@ -296,7 +295,7 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::MULTI_BIT> {
  int8_t *d_mem_acc_step_two = NULL;
  int8_t *d_mem_acc_cg = NULL;
  int8_t *d_mem_acc_tbc = NULL;
-  uint64_t lwe_chunk_size;
+  uint32_t lwe_chunk_size;
  double *keybundle_fft;
  __uint128_t *global_accumulator;
  double *global_join_buffer;
@@ -307,7 +306,7 @@ struct pbs_buffer_128<InputTorus, PBS_TYPE::MULTI_BIT> {
  pbs_buffer_128(cudaStream_t stream, uint32_t gpu_index,
                 uint32_t glwe_dimension, uint32_t polynomial_size,
                 uint32_t level_count, uint32_t input_lwe_ciphertext_count,
-                 uint64_t lwe_chunk_size, PBS_VARIANT pbs_variant,
+                 uint32_t lwe_chunk_size, PBS_VARIANT pbs_variant,
                 bool allocate_gpu_memory, uint64_t &size_tracker) {
    gpu_memory_allocated = allocate_gpu_memory;
    cuda_set_device(gpu_index);
--- a/backends/tfhe-cuda-backend/cuda/src/aes/aes.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/aes/aes.cuh
@@ -621,7 +621,7 @@ __host__ void vectorized_shift_rows(CudaStreams streams,
                                    CudaRadixCiphertextFFI *state_bitsliced,
                                    uint32_t num_aes_inputs,
                                    int_aes_encrypt_buffer<Torus> *mem) {
-  constexpr uint32_t NUM_BYTES = 16;
+  constexpr uint32_t NUM_BYTES = 8;
  constexpr uint32_t LEN_BYTE = 8;
  constexpr uint32_t NUM_BITS = NUM_BYTES * LEN_BYTE;

@@ -649,8 +649,7 @@ __host__ void vectorized_shift_rows(CudaStreams streams,
        i * num_aes_inputs, (i + 1) * num_aes_inputs);
  }

-  const int shift_rows_map[] = {0, 5,  10, 15, 4,  9, 14, 3,
-                                8, 13, 2,  7,  12, 1, 6,  11};
+  const int shift_rows_map[] = {0, 1, 3, 2, 4, 5, 7, 6};

  for (int i = 0; i < NUM_BYTES; i++) {
    for (int bit = 0; bit < LEN_BYTE; bit++) {
@@ -711,7 +710,7 @@ __host__ void vectorized_mix_columns(CudaStreams streams,

  constexpr uint32_t BITS_PER_BYTE = 8;
  constexpr uint32_t BYTES_PER_COLUMN = 4;
-  constexpr uint32_t NUM_COLUMNS = 4;
+  constexpr uint32_t NUM_COLUMNS = 2;
  constexpr uint32_t BITS_PER_COLUMN = BYTES_PER_COLUMN * BITS_PER_BYTE;

  for (uint32_t col = 0; col < NUM_COLUMNS; ++col) {
@@ -849,7 +848,7 @@ __host__ void vectorized_aes_encrypt_inplace(
    int_aes_encrypt_buffer<Torus> *mem, void *const *bsks, Torus *const *ksks) {

  constexpr uint32_t BITS_PER_BYTE = 8;
-  constexpr uint32_t STATE_BYTES = 16;
+  constexpr uint32_t STATE_BYTES = 8;
  constexpr uint32_t STATE_BITS = STATE_BYTES * BITS_PER_BYTE;
  constexpr uint32_t ROUNDS = 10;

@@ -910,6 +909,7 @@ __host__ void vectorized_aes_encrypt_inplace(
                                       mem, bsks, ksks);
      }
      break;
+    case 16:
    case 8:
      for (uint32_t i = 0; i < STATE_BYTES; i += 8) {
        CudaRadixCiphertextFFI *sbox_inputs[] = {
@@ -921,19 +921,6 @@ __host__ void vectorized_aes_encrypt_inplace(
                                       mem, bsks, ksks);
      }
      break;
-    case 16: {
-      CudaRadixCiphertextFFI *sbox_inputs[] = {
-          &s_bits[0 * BITS_PER_BYTE],  &s_bits[1 * BITS_PER_BYTE],
-          &s_bits[2 * BITS_PER_BYTE],  &s_bits[3 * BITS_PER_BYTE],
-          &s_bits[4 * BITS_PER_BYTE],  &s_bits[5 * BITS_PER_BYTE],
-          &s_bits[6 * BITS_PER_BYTE],  &s_bits[7 * BITS_PER_BYTE],
-          &s_bits[8 * BITS_PER_BYTE],  &s_bits[9 * BITS_PER_BYTE],
-          &s_bits[10 * BITS_PER_BYTE], &s_bits[11 * BITS_PER_BYTE],
-          &s_bits[12 * BITS_PER_BYTE], &s_bits[13 * BITS_PER_BYTE],
-          &s_bits[14 * BITS_PER_BYTE], &s_bits[15 * BITS_PER_BYTE]};
-      vectorized_sbox_n_bytes<Torus>(streams, sbox_inputs, 16, num_aes_inputs,
-                                     mem, bsks, ksks);
-    } break;
    default:
      PANIC("Unsupported S-Box parallelism level selected: %u",
            sbox_parallelism);
@@ -993,7 +980,7 @@ __host__ void vectorized_aes_full_adder_inplace(
    const Torus *counter_bits_le_all_blocks, uint32_t num_aes_inputs,
    int_aes_encrypt_buffer<Torus> *mem, void *const *bsks, Torus *const *ksks) {

-  constexpr uint32_t NUM_BITS = 128;
+  constexpr uint32_t NUM_BITS = 64;

  // --- Initialization ---
  CudaRadixCiphertextFFI *carry_vec =
@@ -1098,7 +1085,7 @@ __host__ void host_integer_aes_ctr_encrypt(
    const Torus *counter_bits_le_all_blocks, uint32_t num_aes_inputs,
    int_aes_encrypt_buffer<Torus> *mem, void *const *bsks, Torus *const *ksks) {

-  constexpr uint32_t NUM_BITS = 128;
+  constexpr uint32_t NUM_BITS = 64;

  CudaRadixCiphertextFFI *initial_states =
      mem->main_workspaces->initial_states_and_jit_key_workspace;
@@ -1159,8 +1146,8 @@ __host__ void host_integer_key_expansion(CudaStreams streams,
  constexpr uint32_t BITS_PER_WORD = 32;
  constexpr uint32_t BITS_PER_BYTE = 8;
  constexpr uint32_t BYTES_PER_WORD = 4;
-  constexpr uint32_t TOTAL_WORDS = 44;
-  constexpr uint32_t KEY_WORDS = 4;
+  constexpr uint32_t TOTAL_WORDS = 22;
+  constexpr uint32_t KEY_WORDS = 2;

  const Torus rcon[] = {0x01, 0x02, 0x04, 0x08, 0x10,
                        0x20, 0x40, 0x80, 0x1b, 0x36};
@@ -1178,8 +1165,8 @@ __host__ void host_integer_key_expansion(CudaStreams streams,

    as_radix_ciphertext_slice<Torus>(&tmp_word_buffer, mem->tmp_word_buffer, 0,
                                     BITS_PER_WORD);
-    as_radix_ciphertext_slice<Torus>(&tmp_far, words, (w - 4) * BITS_PER_WORD,
-                                     (w - 3) * BITS_PER_WORD);
+    as_radix_ciphertext_slice<Torus>(&tmp_far, words, (w - 2) * BITS_PER_WORD,
+                                     (w - 1) * BITS_PER_WORD);
    as_radix_ciphertext_slice<Torus>(&tmp_near, words, (w - 1) * BITS_PER_WORD,
                                     w * BITS_PER_WORD);

--- a/backends/tfhe-cuda-backend/cuda/src/aes/aes256.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/aes/aes256.cu
@@ -1,55 +0,0 @@
-#include "../../include/aes/aes.h"
-#include "aes256.cuh"
-
-void cuda_integer_aes_ctr_256_encrypt_64(
-    CudaStreamsFFI streams, CudaRadixCiphertextFFI *output,
-    CudaRadixCiphertextFFI const *iv, CudaRadixCiphertextFFI const *round_keys,
-    const uint64_t *counter_bits_le_all_blocks, uint32_t num_aes_inputs,
-    int8_t *mem_ptr, void *const *bsks, void *const *ksks) {
-
-  host_integer_aes_ctr_256_encrypt<uint64_t>(
-      CudaStreams(streams), output, iv, round_keys, counter_bits_le_all_blocks,
-      num_aes_inputs, (int_aes_encrypt_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)ksks);
-}
-
-uint64_t scratch_cuda_integer_key_expansion_256_64(
-    CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
-    uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
-    uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
-    uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
-    PBS_TYPE pbs_type, bool allocate_gpu_memory,
-    PBS_MS_REDUCTION_T noise_reduction_type) {
-
-  int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
-                          glwe_dimension * polynomial_size, lwe_dimension,
-                          ks_level, ks_base_log, pbs_level, pbs_base_log,
-                          grouping_factor, message_modulus, carry_modulus,
-                          noise_reduction_type);
-
-  return scratch_cuda_integer_key_expansion_256<uint64_t>(
-      CudaStreams(streams), (int_key_expansion_256_buffer<uint64_t> **)mem_ptr,
-      params, allocate_gpu_memory);
-}
-
-void cuda_integer_key_expansion_256_64(CudaStreamsFFI streams,
-                                       CudaRadixCiphertextFFI *expanded_keys,
-                                       CudaRadixCiphertextFFI const *key,
-                                       int8_t *mem_ptr, void *const *bsks,
-                                       void *const *ksks) {
-
-  host_integer_key_expansion_256<uint64_t>(
-      CudaStreams(streams), expanded_keys, key,
-      (int_key_expansion_256_buffer<uint64_t> *)mem_ptr, bsks,
-      (uint64_t **)ksks);
-}
-
-void cleanup_cuda_integer_key_expansion_256_64(CudaStreamsFFI streams,
-                                               int8_t **mem_ptr_void) {
-  int_key_expansion_256_buffer<uint64_t> *mem_ptr =
-      (int_key_expansion_256_buffer<uint64_t> *)(*mem_ptr_void);
-
-  mem_ptr->release(CudaStreams(streams));
-  delete mem_ptr;
-  *mem_ptr_void = nullptr;
-}
--- a/backends/tfhe-cuda-backend/cuda/src/aes/aes256.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/aes/aes256.cuh
@@ -1,355 +0,0 @@
-#pragma once
-
-#include "../../include/aes/aes_utilities.h"
-#include "../integer/integer.cuh"
-#include "../integer/radix_ciphertext.cuh"
-#include "../integer/scalar_addition.cuh"
-#include "../linearalgebra/addition.cuh"
-#include "aes.cuh"
-
-/**
- * The main AES encryption function. It orchestrates the full 14-round AES-256
- * encryption process on the bitsliced state.
- *
- * The process is broken down into three phases:
- *
- * 1. Initial Round (Round 0):
- * - AddRoundKey, which is a XOR
- *
- * 2. Main Rounds (Rounds 1-13):
- * This sequence is repeated 13 times.
- * - SubBytes
- * - ShiftRows
- * - MixColumns
- * - AddRoundKey
- *
- * 3. Final Round (Round 14):
- * - SubBytes
- * - ShiftRows
- * - AddRoundKey
- *
- */
-template <typename Torus>
-__host__ void vectorized_aes_256_encrypt_inplace(
-    CudaStreams streams, CudaRadixCiphertextFFI *all_states_bitsliced,
-    CudaRadixCiphertextFFI const *round_keys, uint32_t num_aes_inputs,
-    int_aes_encrypt_buffer<Torus> *mem, void *const *bsks, Torus *const *ksks) {
-
-  constexpr uint32_t BITS_PER_BYTE = 8;
-  constexpr uint32_t STATE_BYTES = 16;
-  constexpr uint32_t STATE_BITS = STATE_BYTES * BITS_PER_BYTE;
-  constexpr uint32_t ROUNDS = 14;
-
-  CudaRadixCiphertextFFI *jit_transposed_key =
-      mem->main_workspaces->initial_states_and_jit_key_workspace;
-
-  CudaRadixCiphertextFFI round_0_key_slice;
-  as_radix_ciphertext_slice<Torus>(
-      &round_0_key_slice, (CudaRadixCiphertextFFI *)round_keys, 0, STATE_BITS);
-  for (uint32_t block = 0; block < num_aes_inputs; ++block) {
-    CudaRadixCiphertextFFI tile_slice;
-    as_radix_ciphertext_slice<Torus>(
-        &tile_slice, mem->main_workspaces->tmp_tiled_key_buffer,
-        block * STATE_BITS, (block + 1) * STATE_BITS);
-    copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
-                                       &tile_slice, &round_0_key_slice);
-  }
-  transpose_blocks_to_bitsliced<Torus>(
-      streams.stream(0), streams.gpu_index(0), jit_transposed_key,
-      mem->main_workspaces->tmp_tiled_key_buffer, num_aes_inputs, STATE_BITS);
-
-  aes_xor<Torus>(streams, mem, all_states_bitsliced, all_states_bitsliced,
-                 jit_transposed_key);
-
-  aes_flush_inplace<Torus>(streams, all_states_bitsliced, mem, bsks, ksks);
-
-  for (uint32_t round = 1; round <= ROUNDS; ++round) {
-    CudaRadixCiphertextFFI s_bits[STATE_BITS];
-    for (uint32_t i = 0; i < STATE_BITS; i++) {
-      as_radix_ciphertext_slice<Torus>(&s_bits[i], all_states_bitsliced,
-                                       i * num_aes_inputs,
-                                       (i + 1) * num_aes_inputs);
-    }
-
-    uint32_t sbox_parallelism = mem->sbox_parallel_instances;
-    switch (sbox_parallelism) {
-    case 1:
-      for (uint32_t i = 0; i < STATE_BYTES; ++i) {
-        CudaRadixCiphertextFFI *sbox_inputs[] = {&s_bits[i * BITS_PER_BYTE]};
-        vectorized_sbox_n_bytes<Torus>(streams, sbox_inputs, 1, num_aes_inputs,
-                                       mem, bsks, ksks);
-      }
-      break;
-    case 2:
-      for (uint32_t i = 0; i < STATE_BYTES; i += 2) {
-        CudaRadixCiphertextFFI *sbox_inputs[] = {
-            &s_bits[i * BITS_PER_BYTE], &s_bits[(i + 1) * BITS_PER_BYTE]};
-        vectorized_sbox_n_bytes<Torus>(streams, sbox_inputs, 2, num_aes_inputs,
-                                       mem, bsks, ksks);
-      }
-      break;
-    case 4:
-      for (uint32_t i = 0; i < STATE_BYTES; i += 4) {
-        CudaRadixCiphertextFFI *sbox_inputs[] = {
-            &s_bits[i * BITS_PER_BYTE], &s_bits[(i + 1) * BITS_PER_BYTE],
-            &s_bits[(i + 2) * BITS_PER_BYTE], &s_bits[(i + 3) * BITS_PER_BYTE]};
-        vectorized_sbox_n_bytes<Torus>(streams, sbox_inputs, 4, num_aes_inputs,
-                                       mem, bsks, ksks);
-      }
-      break;
-    case 8:
-      for (uint32_t i = 0; i < STATE_BYTES; i += 8) {
-        CudaRadixCiphertextFFI *sbox_inputs[] = {
-            &s_bits[i * BITS_PER_BYTE],       &s_bits[(i + 1) * BITS_PER_BYTE],
-            &s_bits[(i + 2) * BITS_PER_BYTE], &s_bits[(i + 3) * BITS_PER_BYTE],
-            &s_bits[(i + 4) * BITS_PER_BYTE], &s_bits[(i + 5) * BITS_PER_BYTE],
-            &s_bits[(i + 6) * BITS_PER_BYTE], &s_bits[(i + 7) * BITS_PER_BYTE]};
-        vectorized_sbox_n_bytes<Torus>(streams, sbox_inputs, 8, num_aes_inputs,
-                                       mem, bsks, ksks);
-      }
-      break;
-    case 16: {
-      CudaRadixCiphertextFFI *sbox_inputs[] = {
-          &s_bits[0 * BITS_PER_BYTE],  &s_bits[1 * BITS_PER_BYTE],
-          &s_bits[2 * BITS_PER_BYTE],  &s_bits[3 * BITS_PER_BYTE],
-          &s_bits[4 * BITS_PER_BYTE],  &s_bits[5 * BITS_PER_BYTE],
-          &s_bits[6 * BITS_PER_BYTE],  &s_bits[7 * BITS_PER_BYTE],
-          &s_bits[8 * BITS_PER_BYTE],  &s_bits[9 * BITS_PER_BYTE],
-          &s_bits[10 * BITS_PER_BYTE], &s_bits[11 * BITS_PER_BYTE],
-          &s_bits[12 * BITS_PER_BYTE], &s_bits[13 * BITS_PER_BYTE],
-          &s_bits[14 * BITS_PER_BYTE], &s_bits[15 * BITS_PER_BYTE]};
-      vectorized_sbox_n_bytes<Torus>(streams, sbox_inputs, 16, num_aes_inputs,
-                                     mem, bsks, ksks);
-    } break;
-    default:
-      PANIC("Unsupported S-Box parallelism level selected: %u",
-            sbox_parallelism);
-    }
-
-    vectorized_shift_rows<Torus>(streams, all_states_bitsliced, num_aes_inputs,
-                                 mem);
-
-    if (round != ROUNDS) {
-      vectorized_mix_columns<Torus>(streams, s_bits, num_aes_inputs, mem, bsks,
-                                    ksks);
-      aes_flush_inplace<Torus>(streams, all_states_bitsliced, mem, bsks, ksks);
-    }
-
-    CudaRadixCiphertextFFI round_key_slice;
-    as_radix_ciphertext_slice<Torus>(
-        &round_key_slice, (CudaRadixCiphertextFFI *)round_keys,
-        round * STATE_BITS, (round + 1) * STATE_BITS);
-    for (uint32_t block = 0; block < num_aes_inputs; ++block) {
-      CudaRadixCiphertextFFI tile_slice;
-      as_radix_ciphertext_slice<Torus>(
-          &tile_slice, mem->main_workspaces->tmp_tiled_key_buffer,
-          block * STATE_BITS, (block + 1) * STATE_BITS);
-      copy_radix_ciphertext_async<Torus>(streams.stream(0),
-                                         streams.gpu_index(0), &tile_slice,
-                                         &round_key_slice);
-    }
-    transpose_blocks_to_bitsliced<Torus>(
-        streams.stream(0), streams.gpu_index(0), jit_transposed_key,
-        mem->main_workspaces->tmp_tiled_key_buffer, num_aes_inputs, STATE_BITS);
-
-    aes_xor<Torus>(streams, mem, all_states_bitsliced, all_states_bitsliced,
-                   jit_transposed_key);
-
-    aes_flush_inplace<Torus>(streams, all_states_bitsliced, mem, bsks, ksks);
-  }
-}
-
-/**
- * Top-level function to perform a full AES-256-CTR encryption homomorphically.
- *
- * +----------+     +-------------------+
- * |   IV_CT  |     | Plaintext Counter |
- * +----------+     +-------------------+
- * |                  |
- * V                  V
- * +---------------------------------+
- * |   Homomorphic Full Adder        |
- * |   (IV_CT + Counter)             |
- * +---------------------------------+
- * |
- * V
- * +---------------------------------+
- * |   Homomorphic AES Encryption    | -> Final Output Ciphertext
- * |   (14 Rounds)                   |
- * +---------------------------------+
- *
- */
-template <typename Torus>
-__host__ void host_integer_aes_ctr_256_encrypt(
-    CudaStreams streams, CudaRadixCiphertextFFI *output,
-    CudaRadixCiphertextFFI const *iv, CudaRadixCiphertextFFI const *round_keys,
-    const Torus *counter_bits_le_all_blocks, uint32_t num_aes_inputs,
-    int_aes_encrypt_buffer<Torus> *mem, void *const *bsks, Torus *const *ksks) {
-
-  constexpr uint32_t NUM_BITS = 128;
-
-  CudaRadixCiphertextFFI *initial_states =
-      mem->main_workspaces->initial_states_and_jit_key_workspace;
-
-  for (uint32_t block = 0; block < num_aes_inputs; ++block) {
-    CudaRadixCiphertextFFI output_slice;
-    as_radix_ciphertext_slice<Torus>(&output_slice, initial_states,
-                                     block * NUM_BITS, (block + 1) * NUM_BITS);
-    copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
-                                       &output_slice, iv);
-  }
-
-  CudaRadixCiphertextFFI *transposed_states =
-      mem->main_workspaces->main_bitsliced_states_buffer;
-  transpose_blocks_to_bitsliced<Torus>(streams.stream(0), streams.gpu_index(0),
-                                       transposed_states, initial_states,
-                                       num_aes_inputs, NUM_BITS);
-
-  vectorized_aes_full_adder_inplace<Torus>(streams, transposed_states,
-                                           counter_bits_le_all_blocks,
-                                           num_aes_inputs, mem, bsks, ksks);
-
-  vectorized_aes_256_encrypt_inplace<Torus>(
-      streams, transposed_states, round_keys, num_aes_inputs, mem, bsks, ksks);
-
-  transpose_bitsliced_to_blocks<Torus>(streams.stream(0), streams.gpu_index(0),
-                                       output, transposed_states,
-                                       num_aes_inputs, NUM_BITS);
-}
-
-template <typename Torus>
-uint64_t scratch_cuda_integer_key_expansion_256(
-    CudaStreams streams, int_key_expansion_256_buffer<Torus> **mem_ptr,
-    int_radix_params params, bool allocate_gpu_memory) {
-
-  uint64_t size_tracker = 0;
-  *mem_ptr = new int_key_expansion_256_buffer<Torus>(
-      streams, params, allocate_gpu_memory, size_tracker);
-  return size_tracker;
-}
-
-/**
- * Homomorphically performs the AES-256 key expansion schedule on the GPU.
- *
- * This function expands an encrypted 256-bit key into 60 words (15 round keys).
- * The generation logic for a new word `w_i` depends on its position (with
- * KEY_WORDS = 8):
- * - If (i % 8 == 0): w_i = w_{i-8} + SubWord(RotWord(w_{i-1})) + Rcon[i/8]
- * - If (i % 8 == 4): w_i = w_{i-8} + SubWord(w_{i-1})
- * - Otherwise:       w_i = w_{i-8} + w_{i-1}
- */
-template <typename Torus>
-__host__ void host_integer_key_expansion_256(
-    CudaStreams streams, CudaRadixCiphertextFFI *expanded_keys,
-    CudaRadixCiphertextFFI const *key, int_key_expansion_256_buffer<Torus> *mem,
-    void *const *bsks, Torus *const *ksks) {
-
-  constexpr uint32_t BITS_PER_WORD = 32;
-  constexpr uint32_t BITS_PER_BYTE = 8;
-  constexpr uint32_t BYTES_PER_WORD = 4;
-  constexpr uint32_t TOTAL_WORDS = 60;
-  constexpr uint32_t KEY_WORDS = 8;
-
-  const Torus rcon[] = {0x01, 0x02, 0x04, 0x08, 0x10,
-                        0x20, 0x40, 0x80, 0x1b, 0x36};
-
-  CudaRadixCiphertextFFI *words = mem->words_buffer;
-
-  CudaRadixCiphertextFFI initial_key_dest_slice;
-  as_radix_ciphertext_slice<Torus>(&initial_key_dest_slice, words, 0,
-                                   KEY_WORDS * BITS_PER_WORD);
-  copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
-                                     &initial_key_dest_slice, key);
-
-  for (uint32_t w = KEY_WORDS; w < TOTAL_WORDS; ++w) {
-    CudaRadixCiphertextFFI tmp_word_buffer, tmp_far, tmp_near;
-
-    as_radix_ciphertext_slice<Torus>(&tmp_word_buffer, mem->tmp_word_buffer, 0,
-                                     BITS_PER_WORD);
-    as_radix_ciphertext_slice<Torus>(&tmp_far, words, (w - 8) * BITS_PER_WORD,
-                                     (w - 7) * BITS_PER_WORD);
-    as_radix_ciphertext_slice<Torus>(&tmp_near, words, (w - 1) * BITS_PER_WORD,
-                                     w * BITS_PER_WORD);
-
-    copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
-                                       &tmp_word_buffer, &tmp_near);
-
-    if (w % KEY_WORDS == 0) {
-      CudaRadixCiphertextFFI rotated_word_buffer;
-      as_radix_ciphertext_slice<Torus>(
-          &rotated_word_buffer, mem->tmp_rotated_word_buffer, 0, BITS_PER_WORD);
-
-      copy_radix_ciphertext_slice_async<Torus>(
-          streams.stream(0), streams.gpu_index(0), &rotated_word_buffer, 0,
-          BITS_PER_WORD - BITS_PER_BYTE, &tmp_word_buffer, BITS_PER_BYTE,
-          BITS_PER_WORD);
-      copy_radix_ciphertext_slice_async<Torus>(
-          streams.stream(0), streams.gpu_index(0), &rotated_word_buffer,
-          BITS_PER_WORD - BITS_PER_BYTE, BITS_PER_WORD, &tmp_word_buffer, 0,
-          BITS_PER_BYTE);
-
-      CudaRadixCiphertextFFI bit_slices[BITS_PER_WORD];
-      for (uint32_t i = 0; i < BITS_PER_WORD; ++i) {
-        as_radix_ciphertext_slice<Torus>(&bit_slices[i], &rotated_word_buffer,
-                                         i, i + 1);
-      }
-
-      CudaRadixCiphertextFFI *sbox_byte_pointers[BYTES_PER_WORD];
-      for (uint32_t i = 0; i < BYTES_PER_WORD; ++i) {
-        sbox_byte_pointers[i] = &bit_slices[i * BITS_PER_BYTE];
-      }
-
-      vectorized_sbox_n_bytes<Torus>(streams, sbox_byte_pointers,
-                                     BYTES_PER_WORD, 1, mem->aes_encrypt_buffer,
-                                     bsks, ksks);
-
-      Torus rcon_val = rcon[w / KEY_WORDS - 1];
-      for (uint32_t bit = 0; bit < BITS_PER_BYTE; ++bit) {
-        if ((rcon_val >> (7 - bit)) & 1) {
-          CudaRadixCiphertextFFI first_byte_bit_slice;
-          as_radix_ciphertext_slice<Torus>(&first_byte_bit_slice,
-                                           &rotated_word_buffer, bit, bit + 1);
-          host_add_scalar_one_inplace<Torus>(streams, &first_byte_bit_slice,
-                                             mem->params.message_modulus,
-                                             mem->params.carry_modulus);
-        }
-      }
-
-      aes_flush_inplace(streams, &rotated_word_buffer, mem->aes_encrypt_buffer,
-                        bsks, ksks);
-
-      copy_radix_ciphertext_async<Torus>(streams.stream(0),
-                                         streams.gpu_index(0), &tmp_word_buffer,
-                                         &rotated_word_buffer);
-    } else if (w % KEY_WORDS == 4) {
-      CudaRadixCiphertextFFI bit_slices[BITS_PER_WORD];
-      for (uint32_t i = 0; i < BITS_PER_WORD; ++i) {
-        as_radix_ciphertext_slice<Torus>(&bit_slices[i], &tmp_word_buffer, i,
-                                         i + 1);
-      }
-
-      CudaRadixCiphertextFFI *sbox_byte_pointers[BYTES_PER_WORD];
-      for (uint32_t i = 0; i < BYTES_PER_WORD; ++i) {
-        sbox_byte_pointers[i] = &bit_slices[i * BITS_PER_BYTE];
-      }
-
-      vectorized_sbox_n_bytes<Torus>(streams, sbox_byte_pointers,
-                                     BYTES_PER_WORD, 1, mem->aes_encrypt_buffer,
-                                     bsks, ksks);
-    }
-
-    aes_xor(streams, mem->aes_encrypt_buffer, &tmp_word_buffer, &tmp_far,
-            &tmp_word_buffer);
-    aes_flush_inplace(streams, &tmp_word_buffer, mem->aes_encrypt_buffer, bsks,
-                      ksks);
-
-    CudaRadixCiphertextFFI dest_word;
-    as_radix_ciphertext_slice<Torus>(&dest_word, words, w * BITS_PER_WORD,
-                                     (w + 1) * BITS_PER_WORD);
-    copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
-                                       &dest_word, &tmp_word_buffer);
-  }
-
-  copy_radix_ciphertext_async<Torus>(streams.stream(0), streams.gpu_index(0),
-                                     expanded_keys, words);
-}
--- a/backends/tfhe-cuda-backend/cuda/src/crypto/gadget.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/crypto/gadget.cuh
@@ -1,7 +1,10 @@
-#pragma once
+#ifndef CNCRT_CRYPTO_CUH
+#define CNCRT_CRYPTO_CUH
+
 #include "crypto/torus.cuh"
 #include "device.h"
 #include "fft128/f128.cuh"
+#include <cstdint>

 /**
 * GadgetMatrix implements the iterator design pattern to decompose a set of
@@ -13,20 +16,7 @@
 * This class always decomposes the entire set of num_poly polynomials.
 * By default, it works on a single polynomial.
 */
-
-// Define explicitly an arithmetic shift right with a cast to signed
-template <typename T> __device__ T signed_shift_right(T value, int base_log) {
-  if constexpr (sizeof(T) == 4) {
-    return static_cast<T>(static_cast<int32_t>(value) >> base_log);
-  } else if constexpr (sizeof(T) == 8) {
-    return static_cast<T>(static_cast<int64_t>(value) >> base_log);
-  } else if constexpr (sizeof(T) == 16) {
-    return static_cast<T>(static_cast<__int128_t>(value) >> base_log);
-  } else {
-    return value >> base_log; // fallback for unusual sizes
-  }
-}
-
+#pragma once
 template <typename T, class params> class GadgetMatrix {
 private:
  uint32_t level_count;
@@ -71,8 +61,8 @@ public:
      T res_re = *input1 & mask_mod_b;
      T res_im = *input2 & mask_mod_b;

-      *input1 = signed_shift_right<T>(*input1, base_log); // Update state
-      *input2 = signed_shift_right<T>(*input2, base_log); // Update state
+      *input1 >>= base_log; // Update state
+      *input2 >>= base_log; // Update state

      T carry_re = ((res_re - 1ll) | *input1) & res_re;
      T carry_im = ((res_im - 1ll) | *input2) & res_im;
@@ -104,8 +94,8 @@ public:
      T res_re = *input1 & mask_mod_b;
      T res_im = *input2 & mask_mod_b;

-      *input1 = signed_shift_right<T>(*input1, base_log);
-      *input2 = signed_shift_right<T>(*input2, base_log);
+      *input1 >>= base_log; // Update state
+      *input2 >>= base_log; // Update state

      T carry_re = ((res_re - 1ll) | *input1) & res_re;
      T carry_im = ((res_im - 1ll) | *input2) & res_im;
@@ -159,8 +149,8 @@ __device__ void decompose_and_compress_level_2_2_params(double2 *result,
    T res_re = input1 & mask_mod_b;
    T res_im = input2 & mask_mod_b;

-    input1 = signed_shift_right<T>(input1, base_log); // Update state
-    input2 = signed_shift_right<T>(input2, base_log); // Update state
+    input1 >>= base_log; // Update state
+    input2 >>= base_log; // Update state

    T carry_re = ((res_re - 1ll) | input1) & res_re;
    T carry_im = ((res_im - 1ll) | input2) & res_im;
@@ -178,11 +168,12 @@ __device__ void decompose_and_compress_level_2_2_params(double2 *result,
 template <typename Torus>
 __device__ Torus decompose_one(Torus &state, Torus mask_mod_b, int base_log) {
  Torus res = state & mask_mod_b;
-  state = signed_shift_right<Torus>(state, base_log);
-
+  state >>= base_log;
  Torus carry = ((res - 1ll) | state) & res;
  carry >>= base_log - 1;
  state += carry;
  res -= carry << base_log;
  return res;
 }
+
+#endif // CNCRT_CRYPTO_CUH
--- a/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cu
@@ -22,21 +22,21 @@ uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64(
    uint32_t encryption_glwe_dimension, uint32_t encryption_polynomial_size,
    uint32_t compression_glwe_dimension, uint32_t compression_polynomial_size,
    uint32_t lwe_dimension, uint32_t pbs_level, uint32_t pbs_base_log,
-    uint32_t grouping_factor, uint32_t num_blocks_to_decompress,
-    uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
-    bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type) {
+    uint32_t num_blocks_to_decompress, uint32_t message_modulus,
+    uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
+    PBS_MS_REDUCTION_T noise_reduction_type) {

  // Decompression doesn't keyswitch, so big and small dimensions are the same
  int_radix_params encryption_params(
      pbs_type, encryption_glwe_dimension, encryption_polynomial_size,
-      lwe_dimension, lwe_dimension, 0, 0, pbs_level, pbs_base_log,
-      grouping_factor, message_modulus, carry_modulus, noise_reduction_type);
+      lwe_dimension, lwe_dimension, 0, 0, pbs_level, pbs_base_log, 0,
+      message_modulus, carry_modulus, noise_reduction_type);

  int_radix_params compression_params(
      pbs_type, compression_glwe_dimension, compression_polynomial_size,
      lwe_dimension, compression_glwe_dimension * compression_polynomial_size,
-      0, 0, pbs_level, pbs_base_log, grouping_factor, message_modulus,
-      carry_modulus, noise_reduction_type);
+      0, 0, pbs_level, pbs_base_log, 0, message_modulus, carry_modulus,
+      noise_reduction_type);

  return scratch_cuda_integer_decompress_radix_ciphertext<uint64_t>(
      CudaStreams(streams), (int_decompression<uint64_t> **)mem_ptr,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/rerand.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/rerand.cu
@@ -1,105 +0,0 @@
-#include "rerand.cuh"
-
-extern "C" {
-uint64_t
-scratch_cuda_rerand_64(CudaStreamsFFI streams, int8_t **mem_ptr,
-                       uint32_t big_lwe_dimension, uint32_t small_lwe_dimension,
-                       uint32_t ks_level, uint32_t ks_base_log,
-                       uint32_t lwe_ciphertext_count, uint32_t message_modulus,
-                       uint32_t carry_modulus, bool allocate_gpu_memory) {
-  PUSH_RANGE("scratch rerand")
-  int_radix_params params(PBS_TYPE::CLASSICAL, 0, 0, big_lwe_dimension,
-                          small_lwe_dimension, ks_level, ks_base_log, 0, 0, 0,
-                          message_modulus, carry_modulus,
-                          PBS_MS_REDUCTION_T::NO_REDUCTION);
-
-  uint64_t ret = scratch_cuda_rerand<uint64_t>(
-      CudaStreams(streams), (int_rerand_mem<uint64_t> **)mem_ptr,
-      lwe_ciphertext_count, params, allocate_gpu_memory);
-  POP_RANGE()
-  return ret;
-}
-
-/* Executes the re-randomization procedure, adding encryptions of zero to each
- * element of an array of LWE ciphertexts. This method expects the encryptions
- * of zero to be provided as input in the format of a flattened compact
- * ciphertext list, generated using a compact public key.
- */
-void cuda_rerand_64(
-    CudaStreamsFFI streams, void *lwe_array,
-    const void *lwe_flattened_encryptions_of_zero_compact_array_in,
-    int8_t *mem_ptr, void *const *ksk) {
-
-  auto rerand_buffer = reinterpret_cast<int_rerand_mem<uint64_t> *>(mem_ptr);
-
-  switch (rerand_buffer->params.big_lwe_dimension) {
-  case 256:
-    rerand_inplace<uint64_t, AmortizedDegree<256>>(
-        streams, static_cast<uint64_t *>(lwe_array),
-        static_cast<const uint64_t *>(
-            lwe_flattened_encryptions_of_zero_compact_array_in),
-        (uint64_t **)(ksk), rerand_buffer);
-    break;
-  case 512:
-    rerand_inplace<uint64_t, AmortizedDegree<512>>(
-        streams, static_cast<uint64_t *>(lwe_array),
-        static_cast<const uint64_t *>(
-            lwe_flattened_encryptions_of_zero_compact_array_in),
-        (uint64_t **)(ksk), rerand_buffer);
-    break;
-  case 1024:
-    rerand_inplace<uint64_t, AmortizedDegree<1024>>(
-        streams, static_cast<uint64_t *>(lwe_array),
-        static_cast<const uint64_t *>(
-            lwe_flattened_encryptions_of_zero_compact_array_in),
-        (uint64_t **)(ksk), rerand_buffer);
-    break;
-  case 2048:
-    rerand_inplace<uint64_t, AmortizedDegree<2048>>(
-        streams, static_cast<uint64_t *>(lwe_array),
-        static_cast<const uint64_t *>(
-            lwe_flattened_encryptions_of_zero_compact_array_in),
-        (uint64_t **)(ksk), rerand_buffer);
-    break;
-  case 4096:
-    rerand_inplace<uint64_t, AmortizedDegree<4096>>(
-        streams, static_cast<uint64_t *>(lwe_array),
-        static_cast<const uint64_t *>(
-            lwe_flattened_encryptions_of_zero_compact_array_in),
-        (uint64_t **)(ksk), rerand_buffer);
-    break;
-  case 8192:
-    rerand_inplace<uint64_t, AmortizedDegree<8192>>(
-        streams, static_cast<uint64_t *>(lwe_array),
-        static_cast<const uint64_t *>(
-            lwe_flattened_encryptions_of_zero_compact_array_in),
-        (uint64_t **)(ksk), rerand_buffer);
-    break;
-  case 16384:
-    rerand_inplace<uint64_t, AmortizedDegree<16384>>(
-        streams, static_cast<uint64_t *>(lwe_array),
-        static_cast<const uint64_t *>(
-            lwe_flattened_encryptions_of_zero_compact_array_in),
-        (uint64_t **)(ksk), rerand_buffer);
-    break;
-  default:
-    PANIC("CUDA error: lwe_dimension not supported."
-          "Supported n's are powers of two"
-          " in the interval [256..16384].");
-    break;
-  }
-
-  cuda_synchronize_stream(static_cast<cudaStream_t>(streams.streams[0]),
-                          streams.gpu_indexes[0]);
-}
-
-void cleanup_cuda_rerand(CudaStreamsFFI streams, int8_t **mem_ptr_void) {
-  PUSH_RANGE("cleanup rerand")
-  int_rerand_mem<uint64_t> *mem_ptr =
-      (int_rerand_mem<uint64_t> *)(*mem_ptr_void);
-  mem_ptr->release(CudaStreams(streams));
-  delete mem_ptr;
-  *mem_ptr_void = nullptr;
-  POP_RANGE()
-}
-}
--- a/backends/tfhe-cuda-backend/cuda/src/integer/rerand.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/rerand.cuh
@@ -1,87 +0,0 @@
-#pragma once
-
-#include "device.h"
-#include "integer/integer.h"
-#include "integer/radix_ciphertext.h"
-#include "integer/rerand.h"
-#include "integer/rerand_utilities.h"
-#include "utils/helper_profile.cuh"
-#include "utils/kernel_dimensions.cuh"
-#include "zk/zk_utilities.h"
-
-template <typename Torus, class params>
-void rerand_inplace(
-    CudaStreams const streams, Torus *lwe_array,
-    const Torus *lwe_flattened_encryptions_of_zero_compact_array_in,
-    Torus *const *ksk, int_rerand_mem<Torus> *mem_ptr) {
-  auto zero_lwes = mem_ptr->tmp_zero_lwes;
-  auto num_lwes = mem_ptr->num_lwes;
-  auto ksed_zero_lwes = mem_ptr->tmp_ksed_zero_lwes;
-  auto lwe_trivial_indexes = mem_ptr->lwe_trivial_indexes;
-  auto ksk_params = mem_ptr->params;
-  auto output_dimension = ksk_params.small_lwe_dimension;
-  auto input_dimension = ksk_params.big_lwe_dimension;
-  auto ks_level = ksk_params.ks_level;
-  auto ks_base_log = ksk_params.ks_base_log;
-  auto message_modulus = ksk_params.message_modulus;
-  auto carry_modulus = ksk_params.carry_modulus;
-
-  GPU_ASSERT(sizeof(Torus) == 8,
-             "Cuda error: expand is only supported on 64 bits");
-
-  // Expand encryptions of zero
-  // Wraps the input into a flattened_compact_lwe_lists type
-  auto compact_lwe_lists = flattened_compact_lwe_lists<Torus>(
-      const_cast<Torus *>(lwe_flattened_encryptions_of_zero_compact_array_in),
-      &num_lwes, (uint32_t)1, input_dimension);
-  auto h_expand_jobs = mem_ptr->h_expand_jobs;
-  auto d_expand_jobs = mem_ptr->d_expand_jobs;
-
-  auto output_index = 0;
-  for (auto list_index = 0; list_index < compact_lwe_lists.num_compact_lists;
-       ++list_index) {
-    auto list = compact_lwe_lists.get_device_compact_list(list_index);
-    for (auto lwe_index = 0; lwe_index < list.total_num_lwes; ++lwe_index) {
-      h_expand_jobs[output_index] =
-          expand_job<Torus>(list.get_mask(), list.get_body(lwe_index));
-      output_index++;
-    }
-  }
-  cuda_memcpy_with_size_tracking_async_to_gpu(
-      d_expand_jobs, h_expand_jobs,
-      compact_lwe_lists.total_num_lwes * sizeof(expand_job<Torus>),
-      streams.stream(0), streams.gpu_index(0), true);
-
-  host_lwe_expand<Torus, params>(streams.stream(0), streams.gpu_index(0),
-                                 zero_lwes, d_expand_jobs, num_lwes);
-
-  // Keyswitch
-  execute_keyswitch_async<Torus>(
-      streams.get_ith(0), ksed_zero_lwes, lwe_trivial_indexes, zero_lwes,
-      lwe_trivial_indexes, ksk, input_dimension, output_dimension, ks_base_log,
-      ks_level, num_lwes);
-
-  // Add ks output to ct
-  // Check sizes
-  auto lwes_ffi = new CudaRadixCiphertextFFI;
-  into_radix_ciphertext(lwes_ffi, lwe_array, num_lwes, output_dimension);
-  auto ksed_zero_lwes_ffi = new CudaRadixCiphertextFFI;
-  into_radix_ciphertext(ksed_zero_lwes_ffi, ksed_zero_lwes, num_lwes,
-                        output_dimension);
-  host_addition<Torus>(streams.stream(0), streams.gpu_index(0), lwes_ffi,
-                       lwes_ffi, ksed_zero_lwes_ffi, num_lwes, message_modulus,
-                       carry_modulus);
-}
-
-template <typename Torus>
-__host__ uint64_t scratch_cuda_rerand(CudaStreams streams,
-                                      int_rerand_mem<Torus> **mem_ptr,
-                                      uint32_t num_lwes,
-                                      int_radix_params params,
-                                      bool allocate_gpu_memory) {
-
-  uint64_t size_tracker = 0;
-  *mem_ptr = new int_rerand_mem<Torus>(streams, params, num_lwes,
-                                       allocate_gpu_memory, size_tracker);
-  return size_tracker;
-}
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_multibit.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_cg_multibit.cuh
@@ -30,7 +30,7 @@ __global__ void __launch_bounds__(params::degree / params::opt)
        Torus *global_accumulator, uint32_t lwe_dimension,
        uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
        uint32_t level_count, uint32_t grouping_factor, uint32_t lwe_offset,
-        uint64_t lwe_chunk_size, uint64_t keybundle_size_per_input,
+        uint32_t lwe_chunk_size, uint32_t keybundle_size_per_input,
        int8_t *device_mem, uint64_t device_memory_size_per_block,
        uint32_t num_many_lut, uint32_t lut_stride) {

@@ -193,7 +193,7 @@ template <typename Torus>
 uint64_t get_buffer_size_cg_multibit_programmable_bootstrap(
    uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
    uint32_t level_count, uint32_t input_lwe_ciphertext_count,
-    uint32_t grouping_factor, uint64_t lwe_chunk_size) {
+    uint32_t grouping_factor, uint32_t lwe_chunk_size) {

  uint64_t buffer_size = 0;
  buffer_size += input_lwe_ciphertext_count * lwe_chunk_size * level_count *
@@ -280,9 +280,9 @@ __host__ uint64_t scratch_cg_multi_bit_programmable_bootstrap(
    check_cuda_error(cudaGetLastError());
  }

-  auto lwe_chunk_size = get_lwe_chunk_size<Torus, params>(
-      gpu_index, input_lwe_ciphertext_count, polynomial_size, glwe_dimension,
-      level_count, full_sm_keybundle);
+  auto lwe_chunk_size =
+      get_lwe_chunk_size<Torus, params>(gpu_index, input_lwe_ciphertext_count,
+                                        polynomial_size, full_sm_keybundle);
  uint64_t size_tracker = 0;
  *buffer = new pbs_buffer<Torus, MULTI_BIT>(
      stream, gpu_index, glwe_dimension, polynomial_size, level_count,
@@ -317,12 +317,12 @@ __host__ void execute_cg_external_product_loop(
  auto lwe_chunk_size = buffer->lwe_chunk_size;
  auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);

-  uint64_t keybundle_size_per_input =
+  uint32_t keybundle_size_per_input =
      lwe_chunk_size * level_count * (glwe_dimension + 1) *
      (glwe_dimension + 1) * (polynomial_size / 2);

-  uint64_t chunk_size = std::min(
-      lwe_chunk_size, (uint64_t)(lwe_dimension / grouping_factor) - lwe_offset);
+  uint32_t chunk_size =
+      std::min(lwe_chunk_size, (lwe_dimension / grouping_factor) - lwe_offset);

  auto d_mem = buffer->d_mem_acc_cg;
  auto keybundle_fft = buffer->keybundle_fft;
--- a/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit.cu
@@ -456,9 +456,9 @@ void cleanup_cuda_multi_bit_programmable_bootstrap(void *stream,
 * benchmarking on an RTX 4090 GPU, balancing performance and resource use.
 */
 template <typename Torus, class params>
-uint64_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
-                            uint32_t polynomial_size, uint32_t glwe_dimension,
-                            uint32_t level_count, uint64_t full_sm_keybundle) {
+uint32_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
+                            uint32_t polynomial_size,
+                            uint64_t full_sm_keybundle) {

  int max_blocks_per_sm;
  auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
@@ -479,22 +479,6 @@ uint64_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
  check_cuda_error(cudaDeviceGetAttribute(
      &num_sms, cudaDevAttrMultiProcessorCount, gpu_index));

-  size_t total_mem, free_mem;
-  check_cuda_error(cudaMemGetInfo(&free_mem, &total_mem));
-  // Estimate the size of one chunk
-  uint64_t size_one_chunk = max_num_pbs * polynomial_size *
-                            (glwe_dimension + 1) * (glwe_dimension + 1) *
-                            level_count * sizeof(Torus);
-
-  // We calculate the maximum number of chunks that can fit in the 50% of free
-  // memory. We don't want the pbs temp array uses more than 50% of the free
-  // memory if 1 chunk doesn't fit in the 50% of free memory we panic
-  uint32_t max_num_chunks =
-      static_cast<uint32_t>(free_mem / (2 * size_one_chunk));
-  PANIC_IF_FALSE(
-      max_num_chunks > 0,
-      "Cuda error (multi-bit PBS): Not enough GPU memory to allocate PBS "
-      "temporary arrays.");
  int x = num_sms * max_blocks_per_sm;
  int count = 0;

@@ -516,7 +500,7 @@ uint64_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
  // applied only to few number of samples(8) because it can have a negative
  // effect of over saturation.
  if (max_num_pbs <= 8) {
-    return (max_num_chunks > num_sms / 2) ? num_sms / 2 : max_num_chunks;
+    return num_sms / 2;
  }
 #endif

@@ -530,7 +514,8 @@ uint64_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
      }
    }
  }
-  return (max_num_chunks > divisor) ? divisor : max_num_chunks;
+
+  return divisor;
 }

 template uint64_t scratch_cuda_multi_bit_programmable_bootstrap<uint64_t>(
--- a/Show More
+++ b/Show More