fix(hpu): Correctly select adder configuration in ERC_20/ERC_20_SIMD

Add knobs to select ripple or kogge adder in ERC_20/ERC_20_SIMD. Previously, it was hardcoded to ripple carry and thus degraded latency performance of ERC_20.
feat: Add IfThenZero impl for Cpu
2026-01-12 08:08:31 -05:00 · 2025-12-24 10:38:38 +01:00 · 2025-12-24 10:38:38 +01:00 · 2025-12-24 10:38:38 +01:00 · 2025-12-24 10:38:38 +01:00
102 changed files with 338 additions and 3580 deletions
--- a/.cargo/audit.toml
+++ b/.cargo/audit.toml
@@ -2,8 +2,6 @@
 ignore = [
    # Ignoring unmaintained 'paste' advisory as it is a widely used, low-risk build dependency.
    "RUSTSEC-2024-0436",
-    # Ignoring unmaintained 'bincode' crate. Getting rid of it would be too complex on the short term.
-    "RUSTSEC-2025-0141",
 ]

 [output]
--- a/.github/actions/gpu_setup/action.yml
+++ b/.github/actions/gpu_setup/action.yml
@@ -23,8 +23,6 @@ runs:
        echo "${CMAKE_SCRIPT_SHA} cmake-${CMAKE_VERSION}-linux-x86_64.sh" > checksum
        sha256sum -c checksum
        sudo bash cmake-"${CMAKE_VERSION}"-linux-x86_64.sh --skip-license --prefix=/usr/ --exclude-subdir
-        sudo apt-get clean
-        sudo rm -rf /var/lib/apt/lists/*
        sudo apt update
        sudo apt remove -y unattended-upgrades
        sudo apt install -y cmake-format libclang-dev
--- a/.github/workflows/aws_tfhe_backward_compat_tests.yml
+++ b/.github/workflows/aws_tfhe_backward_compat_tests.yml
@@ -80,7 +80,7 @@ jobs:

      - name: Retrieve data from cache
        id: retrieve-data-cache
-        uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
+        uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
        with:
          path: |
            utils/tfhe-backward-compat-data/**/*.cbor
@@ -109,7 +109,7 @@ jobs:
      - name: Store data in cache
        if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
        continue-on-error: true
-        uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
+        uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
        with:
          path: |
            utils/tfhe-backward-compat-data/**/*.cbor
--- a/.github/workflows/aws_tfhe_fast_tests.yml
+++ b/.github/workflows/aws_tfhe_fast_tests.yml
@@ -71,7 +71,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            dependencies:
@@ -219,7 +219,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
+        uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
        with:
          path: |
            ~/.nvm
@@ -232,7 +232,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
+        uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
--- a/.github/workflows/aws_tfhe_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_integer_tests.yml
@@ -58,7 +58,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            integer:
--- a/.github/workflows/aws_tfhe_signed_integer_tests.yml
+++ b/.github/workflows/aws_tfhe_signed_integer_tests.yml
@@ -59,7 +59,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            integer:
--- a/.github/workflows/aws_tfhe_tests.yml
+++ b/.github/workflows/aws_tfhe_tests.yml
@@ -80,7 +80,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            dependencies:
--- a/.github/workflows/aws_tfhe_wasm_tests.yml
+++ b/.github/workflows/aws_tfhe_wasm_tests.yml
@@ -80,7 +80,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
+        uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
        with:
          path: |
            ~/.nvm
@@ -93,7 +93,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
+        uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
--- a/.github/workflows/benchmark_cpu_common.yml
+++ b/.github/workflows/benchmark_cpu_common.yml
@@ -223,7 +223,7 @@ jobs:
          results_type: ${{ inputs.additional_results_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_ct_key_sizes.yml
+++ b/.github/workflows/benchmark_ct_key_sizes.yml
@@ -99,7 +99,7 @@ jobs:
          --append-results

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ github.sha }}_ct_key_sizes
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_documentation.yml
+++ b/.github/workflows/benchmark_documentation.yml
@@ -185,7 +185,7 @@ jobs:
          persist-credentials: 'false'

      - name: Download SVG tables
-        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
+        uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
        with:
          path: svg_tables
          merge-multiple: 'true'
@@ -203,7 +203,7 @@ jobs:
          echo "date=$(date '+%g_%m_%d_%Hh%Mm%Ss')" >> "${GITHUB_OUTPUT}"

      - name: Create pull-request
-        uses: peter-evans/create-pull-request@98357b18bf14b5342f975ff684046ec3b2a07725 # v8.0.0
+        uses: peter-evans/create-pull-request@84ae59a2cdc2258d6fa0732dd66352dddae2a412 # v7.0.9
        with:
          sign-commits: true # Commit will be signed by github-actions bot
          add-paths: ${{ env.PATH_TO_DOC_ASSETS }}/*.svg
--- a/.github/workflows/benchmark_gpu_4090.yml
+++ b/.github/workflows/benchmark_gpu_4090.yml
@@ -89,7 +89,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ github.sha }}_integer_multi_bit_gpu_default
          path: ${{ env.RESULTS_FILENAME }}
@@ -173,7 +173,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ github.sha }}_core_crypto
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_gpu_common.yml
+++ b/.github/workflows/benchmark_gpu_common.yml
@@ -281,7 +281,7 @@ jobs:
          BENCH_TYPE: ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_gpu_coprocessor.yml
+++ b/.github/workflows/benchmark_gpu_coprocessor.yml
@@ -192,10 +192,10 @@ jobs:
          cargo install sqlx-cli

      - name: Install foundry
-        uses: foundry-rs/foundry-toolchain@8b0419c685ef46cb79ec93fbdc131174afceb730
+        uses: foundry-rs/foundry-toolchain@50d5a8956f2e319df19e6b57539d7e2acb9f8c1e

      - name: Cache cargo
-        uses: actions/cache@9255dc7a253b0ccc959486e2bca901246202afeb # v5.0.1
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
        with:
          path: |
            ~/.cargo/registry
@@ -262,7 +262,7 @@ jobs:
      - name: Upload profile artifact
        env:
          REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ env.REPORT_NAME }}
          path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}
@@ -293,7 +293,7 @@ jobs:
        working-directory: fhevm/

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
          path: fhevm/$${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_hpu_common.yml
+++ b/.github/workflows/benchmark_hpu_common.yml
@@ -185,7 +185,7 @@ jobs:
          BENCH_TYPE: ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ github.sha }}_${{ matrix.bench_type }}_integer_benchmarks
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_perf_regression.yml
+++ b/.github/workflows/benchmark_perf_regression.yml
@@ -280,7 +280,7 @@ jobs:
          BENCH_TYPE: ${{ env.__TFHE_RS_BENCH_TYPE }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ github.sha }}_regression_${{ env.RESULTS_FILE_SHA }} # RESULT_FILE_SHA is needed to avoid collision between matrix.command runs
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_tfhe_fft.yml
+++ b/.github/workflows/benchmark_tfhe_fft.yml
@@ -96,7 +96,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ github.sha }}_fft
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_tfhe_ntt.yml
+++ b/.github/workflows/benchmark_tfhe_ntt.yml
@@ -96,7 +96,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ github.sha }}_ntt
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/benchmark_wasm_client.yml
+++ b/.github/workflows/benchmark_wasm_client.yml
@@ -47,7 +47,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            wasm_bench:
@@ -119,7 +119,7 @@ jobs:

      - name: Node cache restoration
        id: node-cache
-        uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
+        uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
        with:
          path: |
            ~/.nvm
@@ -132,7 +132,7 @@ jobs:
          make install_node

      - name: Node cache save
-        uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
+        uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 #v4.3.0
        if: steps.node-cache.outputs.cache-hit != 'true'
        with:
          path: |
@@ -153,12 +153,6 @@ jobs:
        env:
          BROWSER: ${{ matrix.browser }}

-      - name: Run benchmarks (unsafe coop)
-        run: |
-          make bench_web_js_api_unsafe_coop_"${BROWSER}"_ci
-        env:
-          BROWSER: ${{ matrix.browser }}
-
      - name: Parse results
        run: |
          make parse_wasm_benchmarks
@@ -175,7 +169,7 @@ jobs:
          REF_NAME: ${{ github.ref_name }}

      - name: Upload parsed results artifact
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ github.sha }}_wasm_${{ matrix.browser }}
          path: ${{ env.RESULTS_FILENAME }}
--- a/.github/workflows/cargo_test_fft.yml
+++ b/.github/workflows/cargo_test_fft.yml
@@ -37,7 +37,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            fft:
--- a/.github/workflows/cargo_test_ntt.yml
+++ b/.github/workflows/cargo_test_ntt.yml
@@ -39,7 +39,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            ntt:
--- a/.github/workflows/ci_lint.yml
+++ b/.github/workflows/ci_lint.yml
@@ -50,7 +50,7 @@ jobs:
          version: ${{ steps.get_zizmor.outputs.version }}

      - name: Ensure SHA pinned actions
-        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@6124774845927d14c601359ab8138699fa5b70c3 # v4.0.1
+        uses: zgosalvez/github-actions-ensure-sha-pinned-actions@9e9574ef04ea69da568d6249bd69539ccc704e74 # v4.0.0
        with:
          allowlist: |
            slsa-framework/slsa-github-generator
--- a/.github/workflows/code_coverage.yml
+++ b/.github/workflows/code_coverage.yml
@@ -62,7 +62,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            tfhe:
@@ -92,7 +92,7 @@ jobs:
          make test_shortint_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de
+        uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
@@ -106,7 +106,7 @@ jobs:
          make test_integer_cov

      - name: Upload tfhe coverage to Codecov
-        uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de
+        uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7
        if: steps.changed-files.outputs.tfhe_any_changed == 'true'
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
--- a/.github/workflows/generate_svg_common.yml
+++ b/.github/workflows/generate_svg_common.yml
@@ -75,15 +75,6 @@ jobs:
          DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
          DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}

-      - name: Upload tables
-        if: inputs.backend_comparison == false
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
-        with:
-          name: ${{ github.sha }}_${{ inputs.backend }}_${{ inputs.layer }}_${{ inputs.pbs_kind }}_${{ inputs.bench_type }}_tables
-          # This will upload all the file generated
-          path: ${{ inputs.output_filename }}*.svg
-          retention-days: 60
-
      - name: Produce backends comparison table from database
        if: inputs.backend_comparison == true
        run: |
@@ -99,11 +90,10 @@ jobs:
          DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
          DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}

-      - name: Upload comparison tables
-        if: inputs.backend_comparison == true
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+      - name: Upload tables
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
-          name: ${{ github.sha }}_backends_comparison_tables
+          name: ${{ github.sha }}_${{ inputs.backend }}_${{ inputs.layer }}_${{ inputs.pbs_kind }}_${{ inputs.bench_type }}_tables
          # This will upload all the file generated
          path: ${{ inputs.output_filename }}*.svg
          retention-days: 60
--- a/.github/workflows/gpu_fast_h100_tests.yml
+++ b/.github/workflows/gpu_fast_h100_tests.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_fast_tests.yml
+++ b/.github/workflows/gpu_fast_tests.yml
@@ -47,7 +47,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_full_multi_gpu_tests.yml
+++ b/.github/workflows/gpu_full_multi_gpu_tests.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_signed_integer_classic_tests.yml
+++ b/.github/workflows/gpu_signed_integer_classic_tests.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_signed_integer_h100_tests.yml
+++ b/.github/workflows/gpu_signed_integer_h100_tests.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_signed_integer_tests.yml
+++ b/.github/workflows/gpu_signed_integer_tests.yml
@@ -49,7 +49,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_unsigned_integer_classic_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_classic_tests.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_unsigned_integer_h100_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_h100_tests.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/gpu_unsigned_integer_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_tests.yml
@@ -49,7 +49,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            gpu:
--- a/.github/workflows/hpu_hlapi_tests.yml
+++ b/.github/workflows/hpu_hlapi_tests.yml
@@ -40,7 +40,7 @@ jobs:

      - name: Check for file changes
        id: changed-files
-        uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
+        uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
        with:
          files_yaml: |
            hpu:
--- a/.github/workflows/make_release_common.yml
+++ b/.github/workflows/make_release_common.yml
@@ -62,7 +62,7 @@ jobs:
          PACKAGE: ${{ inputs.package-name }}
        run: |
          cargo package -p "${PACKAGE}"
-      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
        with:
          name: crate-${{ inputs.package-name }}
          path: target/package/*.crate
@@ -100,7 +100,7 @@ jobs:
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Download artifact
-        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
+        uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
        with:
          name: crate-${{ inputs.package-name }}
          path: target/package
--- a/.github/workflows/make_release_cuda.yml
+++ b/.github/workflows/make_release_cuda.yml
@@ -104,7 +104,7 @@ jobs:
        run: |
          cargo package -p tfhe-cuda-backend

-      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
        with:
          name: crate-tfhe-cuda-backend
          path: target/package/*.crate
@@ -174,7 +174,7 @@ jobs:
          GCC_VERSION: ${{ matrix.gcc }}

      - name: Download artifact
-        uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
+        uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
        with:
          name: crate-tfhe-cuda-backend
          path: target/package
--- a/.gitignore
+++ b/.gitignore
@@ -10,7 +10,6 @@ target/
 **/*.rmeta
 **/Cargo.lock
 **/*.bin
-**/.DS_Store

 # Some of our bench outputs
 /tfhe/benchmarks_parameters
--- a/6
+++ b/6
@@ -11,7 +11,7 @@
 /tfhe/src/core_crypto/gpu               @agnesLeroy
 /tfhe/src/core_crypto/hpu               @zama-ai/hardware

-/tfhe/src/shortint/                     @mayeul-zama @nsarlin-zama
+/tfhe/src/shortint/                     @mayeul-zama

 /tfhe/src/integer/                      @tmontaigu
 /tfhe/src/integer/gpu                   @agnesLeroy
@@ -19,12 +19,8 @@

 /tfhe/src/high_level_api/               @tmontaigu

-/tfhe-zk-pok/                           @nsarlin-zama
-
 /tfhe-benchmark/                        @soonum

-/utils/                                 @nsarlin-zama
-
 /Makefile                               @IceTDrinker @soonum

 /mockups/tfhe-hpu-mockup                @zama-ai/hardware
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -36,8 +36,6 @@ rayon = "1.11"
 serde = { version = "1.0", default-features = false }
 wasm-bindgen = "0.2.101"
 getrandom = "0.2.8"
-# The project maintainers consider that this is the last version of the 1.3 branch, any newer version should not be trusted
-bincode = "=1.3.3"

 [profile.bench]
 lto = "fat"
--- a/38
+++ b/38
@@ -1300,14 +1300,13 @@ run_web_js_api_parallel: build_web_js_api_parallel setup_venv
 	--browser-path $(browser_path) \
 	--driver-path $(driver_path) \
 	--browser-kind  $(browser_kind) \
-	--server-cmd $(server_cmd) \
+	--server-cmd "npm run server" \
 	--server-workdir "$(WEB_SERVER_DIR)" \
 	--id-pattern $(filter)

 test_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
 test_web_js_api_parallel_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
 test_web_js_api_parallel_chrome: browser_kind = chrome
-test_web_js_api_parallel_chrome: server_cmd = "npm run server:multithreaded"
 test_web_js_api_parallel_chrome: filter = Test

 .PHONY: test_web_js_api_parallel_chrome # Run tests for the web wasm api on Chrome
@@ -1323,7 +1322,6 @@ test_web_js_api_parallel_chrome_ci: setup_venv
 test_web_js_api_parallel_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
 test_web_js_api_parallel_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
 test_web_js_api_parallel_firefox: browser_kind = firefox
-test_web_js_api_parallel_firefox: server_cmd = "npm run server:multithreaded"
 test_web_js_api_parallel_firefox: filter = Test

 .PHONY: test_web_js_api_parallel_firefox # Run tests for the web wasm api on Firefox
@@ -1573,7 +1571,6 @@ bench_pbs128_gpu: install_rs_check_toolchain
 bench_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
 bench_web_js_api_parallel_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
 bench_web_js_api_parallel_chrome: browser_kind = chrome
-bench_web_js_api_parallel_chrome: server_cmd = "npm run server:multithreaded"
 bench_web_js_api_parallel_chrome: filter = Bench

 .PHONY: bench_web_js_api_parallel_chrome # Run benchmarks for the web wasm api
@@ -1589,7 +1586,6 @@ bench_web_js_api_parallel_chrome_ci: setup_venv
 bench_web_js_api_parallel_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
 bench_web_js_api_parallel_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
 bench_web_js_api_parallel_firefox: browser_kind = firefox
-bench_web_js_api_parallel_firefox: server_cmd = "npm run server:multithreaded"
 bench_web_js_api_parallel_firefox: filter = Bench

 .PHONY: bench_web_js_api_parallel_firefox # Run benchmarks for the web wasm api
@@ -1602,38 +1598,6 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
 	nvm use $(NODE_VERSION) && \
 	$(MAKE) bench_web_js_api_parallel_firefox

-bench_web_js_api_unsafe_coop_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
-bench_web_js_api_unsafe_coop_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
-bench_web_js_api_unsafe_coop_chrome: browser_kind = chrome
-bench_web_js_api_unsafe_coop_chrome: server_cmd = "npm run server:unsafe-coop"
-bench_web_js_api_unsafe_coop_chrome: filter = ZeroKnowledgeBench # Only bench zk with unsafe coop
-
-.PHONY: bench_web_js_api_unsafe_coop_chrome # Run benchmarks for the web wasm api without cross-origin isolation
-bench_web_js_api_unsafe_coop_chrome: run_web_js_api_parallel
-
-.PHONY: bench_web_js_api_unsafe_coop_chrome_ci # Run benchmarks for the web wasm api without cross-origin isolation
-bench_web_js_api_unsafe_coop_chrome_ci: setup_venv
-	source ~/.nvm/nvm.sh && \
-	nvm install $(NODE_VERSION) && \
-	nvm use $(NODE_VERSION) && \
-	$(MAKE) bench_web_js_api_unsafe_coop_chrome
-
-bench_web_js_api_unsafe_coop_firefox: browser_path = "$(WEB_RUNNER_DIR)/firefox/firefox/firefox"
-bench_web_js_api_unsafe_coop_firefox: driver_path = "$(WEB_RUNNER_DIR)/firefox/geckodriver"
-bench_web_js_api_unsafe_coop_firefox: browser_kind = firefox
-bench_web_js_api_unsafe_coop_firefox: server_cmd = "npm run server:unsafe-coop"
-bench_web_js_api_unsafe_coop_firefox: filter = ZeroKnowledgeBench # Only bench zk with unsafe coop
-
-.PHONY: bench_web_js_api_unsafe_coop_firefox # Run benchmarks for the web wasm api without cross-origin isolation
-bench_web_js_api_unsafe_coop_firefox: run_web_js_api_parallel
-
-.PHONY: bench_web_js_api_unsafe_coop_firefox_ci # Run benchmarks for the web wasm api without cross-origin isolation
-bench_web_js_api_unsafe_coop_firefox_ci: setup_venv
-	source ~/.nvm/nvm.sh && \
-	nvm install $(NODE_VERSION) && \
-	nvm use $(NODE_VERSION) && \
-	$(MAKE) bench_web_js_api_unsafe_coop_firefox
-
 .PHONY: bench_hlapi # Run benchmarks for integer operations
 bench_hlapi: install_rs_check_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
--- a/apps/test-vectors/checksums.sha256
+++ b/apps/test-vectors/checksums.sha256
@@ -1,32 +1,24 @@
-08f31a47c29cc4d72ad32c0b5411fa20b3deef5b84558dd2fb892d3cdf90528a  data/toy_params/glwe_after_id_br_karatsuba.cbor
 29b6e3e7d27700004b70dca24d225816500490e2d6ee49b9af05837fd421896b  data/valid_params_128/lwe_after_spec_pbs.cbor
 2c70d1d78cc3760733850a353ace2b9c4705e840141b75841739e90e51247e18  data/valid_params_128/small_lwe_secret_key.cbor
-2fb4bb45c259b8383da10fc8f9459c40a6972c49b1696eb107f0a75640724be5  data/toy_params/lwe_after_id_pbs_karatsuba.cbor
 36c9080b636475fcacca503ce041bbfeee800fd3e1890dee559ea18defff9fe8  data/toy_params/glwe_after_id_br.cbor
 377761beeb4216cf5aa2624a8b64b8259f5a75c32d28e850be8bced3a0cdd6f5  data/toy_params/ksk.cbor
 59dba26d457f96478eda130cab5301fce86f23c6a8807de42f2a1e78c4985ca7  data/valid_params_128/lwe_ks.cbor
-5d80dd93fefae4f4f89484dfcd65bbe99cc32e7e3b0a90c33dd0d77516c0a023  data/valid_params_128/glwe_after_id_br_karatsuba.cbor
 656f0009c7834c5bcb61621e222047516054b9bc5d0593d474ab8f1c086b67a6  data/valid_params_128/lwe_after_id_pbs.cbor
 699580ca92b9c2f9e1f57fb1e312c9e8cb29714f7acdef9d2ba05f798546751f  data/toy_params/lwe_sum.cbor
 6e54ab41056984595b077baff70236d934308cf5c0c33b4482fbfb129b3756c6  data/valid_params_128/glwe_after_id_br.cbor
 70f5e5728822de05b49071efb5ec28551b0f5cc87aa709a455d8e7f04b9c96ee  data/toy_params/lwe_after_id_pbs.cbor
-76a5c52cab7fec1dc167da676c6cd39479cda6b2bb9f4e0573cb7d99c2692faa  data/valid_params_128/lwe_after_id_pbs_karatsuba.cbor
 7cc6803f5fbc3d5a1bf597f2b979ce17eecd3d6baca12183dea21022a7b65c52  data/toy_params/bsk.cbor
 7f3c40a134623b44779a556212477fea26eaed22450f3b6faeb8721d63699972  data/valid_params_128/lwe_sum.cbor
 837b3bd3245d4d0534ed255fdef896fb4fa6998a258a14543dfdadd0bfc9b6dd  data/toy_params/lwe_prod.cbor
-9ece8ca9c1436258b94e8c5e629b8722f9b18fdd415dd5209b6167a9dde8491c  data/toy_params/glwe_after_spec_br_karatsuba.cbor
 aa44aea29efd6d9e4d35a21a625d9cba155672e3f7ed3eddee1e211e62ad146b  data/valid_params_128/lwe_ms.cbor
 b7a037b9eaa88d6385167579b93e26a0cb6976d9b8967416fd1173e113bda199  data/valid_params_128/large_lwe_secret_key.cbor
-b7b8e3586128887bd682120f3e3a43156139bce5e3fe0b03284f8753a864d647  data/toy_params/lwe_after_spec_pbs_karatsuba.cbor
 bd00a8ae7494e400de5753029552ee1647efe7e17409b863a26a13b081099b8c  data/toy_params/lwe_after_spec_pbs.cbor
 c6df98676de04fe54b5ffc2eb30a82ebb706c9d7d5a4e0ed509700fec88761f7  data/toy_params/lwe_ms.cbor
 c7d5a864d5616a7d8ad50bbf40416e41e6c9b60c546dc14d4aa8fc40a418baa7  data/toy_params/large_lwe_secret_key.cbor
 c806533b325b1009db38be2f9bef5f3b2fad6b77b4c71f2855ccc9d3b4162e98  data/valid_params_128/lwe_b.cbor
 c9eb75bd2993639348a679cf48c06e3c38d1a513f48e5b0ce0047cea8cff6bbc  data/toy_params/lwe_a.cbor
-d3391969acf26dc69de0927ba279139d8d79999944069addc8ff469ad6c5ae2d  data/valid_params_128/lwe_after_spec_pbs_karatsuba.cbor
 d6da5baef0e787f6be56e218d8354e26904652602db964844156fdff08350ce6  data/toy_params/lwe_ks.cbor
 e591ab9af1b6a0aede273f9a3abb65a4c387feb5fa06a6959e9314058ca0f7e5  data/valid_params_128/ksk.cbor
-e59b002df3a9b01ad321ec51cf076fa35131ab9dbef141d1c54b717d61426c92  data/valid_params_128/glwe_after_spec_br_karatsuba.cbor
 e628354c81508a2d888016e8282df363dd12f1e19190b6475d4eb9d7ab8ae007  data/valid_params_128/glwe_after_spec_br.cbor
 e69d2d2c064fc8c0460b39191ca65338146990349954f5ec5ebd01d93610e7eb  data/valid_params_128/lwe_a.cbor
 e76c24b2a0c9a842ad13dda35473c2514f9e7d20983b5ea0759c4521a91626d9  data/valid_params_128/lwe_prod.cbor
--- a/apps/test-vectors/data/README.md
+++ b/apps/test-vectors/data/README.md
@@ -39,9 +39,6 @@ The following values are generated:
 | `glwe_after_spec_br` | The glwe returned by the application of the spec blind rotation on the mod switched ciphertexts.             | `GlweCiphertext<Vec<u64>>` | rot spec LUT |
 | `lwe_after_spec_pbs` | The lwe returned by the application of the sample extract operation on the output of the spec blind rotation | `LweCiphertext<Vec<u64>>`  | `spec(A)`    |

-Ciphertexts with the `_karatsuba` suffix are generated using the Karatsuba polynomial multiplication algorithm in the blind rotation, while default ciphertexts are generated using an FFT multiplication.
-This makes it easier to reproduce bit exact results.
-
 ### Encodings
 #### Non native encoding
 Warning: TFHE-rs uses a specific encoding for non native (ie: u32, u64) power of two ciphertext modulus. This encoding puts the encoded value in the high bits of the native integer.
--- a/apps/test-vectors/data/toy_params/glwe_after_id_br_karatsuba.cbor
+++ b/apps/test-vectors/data/toy_params/glwe_after_id_br_karatsuba.cbor
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:08f31a47c29cc4d72ad32c0b5411fa20b3deef5b84558dd2fb892d3cdf90528a
-size 4679
--- a/apps/test-vectors/data/toy_params/glwe_after_spec_br_karatsuba.cbor
+++ b/apps/test-vectors/data/toy_params/glwe_after_spec_br_karatsuba.cbor
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9ece8ca9c1436258b94e8c5e629b8722f9b18fdd415dd5209b6167a9dde8491c
-size 4679
--- a/apps/test-vectors/data/toy_params/lwe_after_id_pbs_karatsuba.cbor
+++ b/apps/test-vectors/data/toy_params/lwe_after_id_pbs_karatsuba.cbor
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2fb4bb45c259b8383da10fc8f9459c40a6972c49b1696eb107f0a75640724be5
-size 2365
--- a/apps/test-vectors/data/toy_params/lwe_after_spec_pbs_karatsuba.cbor
+++ b/apps/test-vectors/data/toy_params/lwe_after_spec_pbs_karatsuba.cbor
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b7b8e3586128887bd682120f3e3a43156139bce5e3fe0b03284f8753a864d647
-size 2365
--- a/apps/test-vectors/data/valid_params_128/glwe_after_id_br_karatsuba.cbor
+++ b/apps/test-vectors/data/valid_params_128/glwe_after_id_br_karatsuba.cbor
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5d80dd93fefae4f4f89484dfcd65bbe99cc32e7e3b0a90c33dd0d77516c0a023
-size 36935
--- a/apps/test-vectors/data/valid_params_128/glwe_after_spec_br_karatsuba.cbor
+++ b/apps/test-vectors/data/valid_params_128/glwe_after_spec_br_karatsuba.cbor
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e59b002df3a9b01ad321ec51cf076fa35131ab9dbef141d1c54b717d61426c92
-size 36935
--- a/apps/test-vectors/data/valid_params_128/lwe_after_id_pbs_karatsuba.cbor
+++ b/apps/test-vectors/data/valid_params_128/lwe_after_id_pbs_karatsuba.cbor
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:76a5c52cab7fec1dc167da676c6cd39479cda6b2bb9f4e0573cb7d99c2692faa
-size 18493
--- a/apps/test-vectors/data/valid_params_128/lwe_after_spec_pbs_karatsuba.cbor
+++ b/apps/test-vectors/data/valid_params_128/lwe_after_spec_pbs_karatsuba.cbor
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d3391969acf26dc69de0927ba279139d8d79999944069addc8ff469ad6c5ae2d
-size 18493
--- a/apps/test-vectors/src/main.rs
+++ b/apps/test-vectors/src/main.rs
@@ -265,7 +265,6 @@ fn generate_test_vectors<P: AsRef<Path>>(

    let mut id_lut = encoding.encode_lut(glwe_dimension, polynomial_size, ID_LUT);
    assert_data_not_zero(&id_lut);
-    let mut id_lut_karatsuba = id_lut.clone();

    blind_rotate_assign(&modswitched, &mut id_lut, &fourier_bsk);
    assert_data_not_zero(&id_lut);
@@ -288,32 +287,8 @@ fn generate_test_vectors<P: AsRef<Path>>(
    assert_data_not_zero(&lwe_pbs_id);
    store_data(path, &lwe_pbs_id, "lwe_after_id_pbs");

-    blind_rotate_karatsuba_assign(&modswitched, &mut id_lut_karatsuba, &bsk);
-    store_data(path, &id_lut_karatsuba, "glwe_after_id_br_karatsuba");
-
-    let mut lwe_pbs_karatsuba_id = LweCiphertext::new(
-        0u64,
-        glwe_dimension
-            .to_equivalent_lwe_dimension(polynomial_size)
-            .to_lwe_size(),
-        encoding.ciphertext_modulus,
-    );
-
-    extract_lwe_sample_from_glwe_ciphertext(
-        &id_lut_karatsuba,
-        &mut lwe_pbs_karatsuba_id,
-        MonomialDegree(0),
-    );
-
-    let decrypted_pbs_id = decrypt_lwe_ciphertext(&large_lwe_secret_key, &lwe_pbs_karatsuba_id);
-    let res = encoding.decode(decrypted_pbs_id);
-
-    assert_eq!(res, MSG_A);
-    store_data(path, &lwe_pbs_karatsuba_id, "lwe_after_id_pbs_karatsuba");
-
    let mut spec_lut = encoding.encode_lut(glwe_dimension, polynomial_size, SPEC_LUT);
    assert_data_not_zero(&spec_lut);
-    let mut spec_lut_karatsuba = spec_lut.clone();

    blind_rotate_assign(&modswitched, &mut spec_lut, &fourier_bsk);
    assert_data_not_zero(&spec_lut);
@@ -335,33 +310,6 @@ fn generate_test_vectors<P: AsRef<Path>>(
    assert_eq!(res, SPEC_LUT(MSG_A));
    assert_data_not_zero(&lwe_pbs_spec);
    store_data(path, &lwe_pbs_spec, "lwe_after_spec_pbs");
-
-    blind_rotate_karatsuba_assign(&modswitched, &mut spec_lut_karatsuba, &bsk);
-    store_data(path, &spec_lut_karatsuba, "glwe_after_spec_br_karatsuba");
-
-    let mut lwe_pbs_karatsuba_spec = LweCiphertext::new(
-        0u64,
-        glwe_dimension
-            .to_equivalent_lwe_dimension(polynomial_size)
-            .to_lwe_size(),
-        encoding.ciphertext_modulus,
-    );
-
-    extract_lwe_sample_from_glwe_ciphertext(
-        &spec_lut_karatsuba,
-        &mut lwe_pbs_karatsuba_spec,
-        MonomialDegree(0),
-    );
-
-    let decrypted_pbs_spec = decrypt_lwe_ciphertext(&large_lwe_secret_key, &lwe_pbs_karatsuba_spec);
-    let res = encoding.decode(decrypted_pbs_spec);
-
-    assert_eq!(res, SPEC_LUT(MSG_A));
-    store_data(
-        path,
-        &lwe_pbs_karatsuba_spec,
-        "lwe_after_spec_pbs_karatsuba",
-    );
 }

 fn rm_dir_except_readme<P: AsRef<Path>>(dir: P) {
--- a/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/compression/compression.h
@@ -65,16 +65,6 @@ void cleanup_cuda_integer_compress_radix_ciphertext_128(CudaStreamsFFI streams,

 void cleanup_cuda_integer_decompress_radix_ciphertext_128(
    CudaStreamsFFI streams, int8_t **mem_ptr_void);
-
-void cuda_integer_extract_glwe_128(
-    CudaStreamsFFI streams, void *glwe_array_out,
-    CudaPackedGlweCiphertextListFFI const *glwe_list,
-    uint32_t const glwe_index);
-
-void cuda_integer_extract_glwe_64(
-    CudaStreamsFFI streams, void *glwe_array_out,
-    CudaPackedGlweCiphertextListFFI const *glwe_list,
-    uint32_t const glwe_index);
 }

 #endif
--- a/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cu
@@ -155,24 +155,3 @@ void cleanup_cuda_integer_decompress_radix_ciphertext_128(
  delete mem_ptr;
  *mem_ptr_void = nullptr;
 }
-
-void cuda_integer_extract_glwe_128(
-    CudaStreamsFFI streams, void *glwe_array_out,
-    CudaPackedGlweCiphertextListFFI const *glwe_list,
-    uint32_t const glwe_index) {
-
-  CudaStreams _streams = CudaStreams(streams);
-  host_extract<__uint128_t>(_streams.stream(0), _streams.gpu_index(0),
-                            (__uint128_t *)glwe_array_out, glwe_list,
-                            glwe_index);
-}
-
-void cuda_integer_extract_glwe_64(
-    CudaStreamsFFI streams, void *glwe_array_out,
-    CudaPackedGlweCiphertextListFFI const *glwe_list,
-    uint32_t const glwe_index) {
-
-  CudaStreams _streams = CudaStreams(streams);
-  host_extract<__uint64_t>(_streams.stream(0), _streams.gpu_index(0),
-                           (__uint64_t *)glwe_array_out, glwe_list, glwe_index);
-}
--- a/backends/tfhe-cuda-backend/src/bindings.rs
+++ b/backends/tfhe-cuda-backend/src/bindings.rs
@@ -2349,22 +2349,6 @@ unsafe extern "C" {
        mem_ptr_void: *mut *mut i8,
    );
 }
-unsafe extern "C" {
-    pub fn cuda_integer_extract_glwe_128(
-        streams: CudaStreamsFFI,
-        glwe_array_out: *mut ffi::c_void,
-        glwe_list: *const CudaPackedGlweCiphertextListFFI,
-        glwe_index: u32,
-    );
-}
-unsafe extern "C" {
-    pub fn cuda_integer_extract_glwe_64(
-        streams: CudaStreamsFFI,
-        glwe_array_out: *mut ffi::c_void,
-        glwe_list: *const CudaPackedGlweCiphertextListFFI,
-        glwe_index: u32,
-    );
-}
 unsafe extern "C" {
    pub fn scratch_cuda_rerand_64(
        streams: CudaStreamsFFI,
--- a/backends/tfhe-hpu-backend/Cargo.toml
+++ b/backends/tfhe-hpu-backend/Cargo.toml
@@ -40,7 +40,7 @@ rand = "0.8.5"
 regex = "1.10.4"
 bitflags = { version = "2.5.0", features = ["serde"] }
 itertools = "0.11.0"
-lru = "0.16.3"
+lru = "0.12.3"
 bitfield-struct = "0.10.0"
 crossbeam = { version = "0.8.4", features = ["crossbeam-queue"] }
 rayon = { workspace = true }
@@ -61,8 +61,8 @@ bitvec = { version = "1.0", optional = true }
 serde_json = { version = "1.0", optional = true }

 # Dependencies used for v80 pdi handling
-bincode = { workspace = true, optional = true }
-serde_derive = { version = "1.0", optional = true }
+bincode ={ version = "1.3", optional = true}
+serde_derive ={ version = "1.0", optional = true}

 # Binary for manual debugging
 # Enable to access Hpu register and drive some custom sequence by hand
--- a/backends/tfhe-hpu-backend/config_store/v80_archives/psi64.hpu
+++ b/backends/tfhe-hpu-backend/config_store/v80_archives/psi64.hpu
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:934c8131c12010dc837f6a2af5111b83f8f5d42f10485e9b3b971edb24c467f8
-size 82201876
+oid sha256:35cc06547a23b862ab9829351d74d944e60ea9dad3ecf593d15f0ce8445d145e
+size 81710610
--- a/backends/tfhe-hpu-backend/src/fw/fw_impl/llt/mod.rs
+++ b/backends/tfhe-hpu-backend/src/fw/fw_impl/llt/mod.rs
@@ -239,12 +239,7 @@ pub fn iop_erc_20(prog: &mut Program) {
 pub fn iop_erc_20_simd(prog: &mut Program) {
    // Add Comment header
    prog.push_comment("ERC_20_SIMD (new_from, new_to) <- (from, to, amount)".to_string());
-    simd(
-        prog,
-        crate::asm::iop::SIMD_N,
-        fw_impl::llt::iop_erc_20_rtl,
-        None,
-    );
+    simd(prog, crate::asm::iop::SIMD_N, fw_impl::llt::iop_erc_20_rtl, None);
 }

 #[instrument(level = "trace", skip(prog))]
@@ -435,8 +430,7 @@ pub fn iop_erc_20_rtl(prog: &mut Program, batch_index: u8, kogge_blk_w: Option<u
        if let Some(blk_w) = kogge_blk_w {
            kogge::add(prog, dst_to, src_to, src_amount.clone(), None, blk_w)
                + kogge::sub(prog, dst_from, src_from, src_amount, blk_w)
-        } else {
-            // Default to ripple carry
+        } else { // Default to ripple carry
            kogge::ripple_add(dst_to, src_to, src_amount.clone(), None)
                + kogge::ripple_sub(prog, dst_from, src_from, src_amount)
        }
--- a/backends/tfhe-hpu-backend/src/fw/program.rs
+++ b/backends/tfhe-hpu-backend/src/fw/program.rs
@@ -160,9 +160,9 @@ impl ProgramInner {
            .filter(|(_, var)| var.is_none())
            .map(|(rid, _)| *rid)
            .collect::<Vec<_>>();
-        demote_order.into_iter().for_each(|rid| {
-            self.regs.demote(&rid);
-        });
+        demote_order
+            .into_iter()
+            .for_each(|rid| self.regs.demote(&rid));
    }

    /// Release register entry
@@ -179,7 +179,7 @@ impl ProgramInner {

    /// Notify register access to update LRU state
    pub(crate) fn reg_access(&mut self, rid: asm::RegId) {
-        self.regs.promote(&rid);
+        self.regs.promote(&rid)
    }

    /// Retrieved least-recent-used heap entry
@@ -220,9 +220,9 @@ impl ProgramInner {
                    .filter(|(_mid, var)| var.is_none())
                    .map(|(mid, _)| *mid)
                    .collect::<Vec<_>>();
-                demote_order.into_iter().for_each(|mid| {
-                    self.heap.demote(&mid);
-                });
+                demote_order
+                    .into_iter()
+                    .for_each(|mid| self.heap.demote(&mid));
            }
            _ => { /*Only release Heap slot*/ }
        }
@@ -231,9 +231,7 @@ impl ProgramInner {
    /// Notify heap access to update LRU state
    pub(crate) fn heap_access(&mut self, mid: asm::MemId) {
        match mid {
-            asm::MemId::Heap { .. } => {
-                self.heap.promote(&mid);
-            }
+            asm::MemId::Heap { .. } => self.heap.promote(&mid),
            _ => { /* Do Nothing slot do not below to heap*/ }
        }
    }
--- a/ci/webdriver.py
+++ b/ci/webdriver.py
@@ -367,8 +367,6 @@ def dump_benchmark_results(results, browser_kind):
    """
    Dump as JSON benchmark results into a file.
    If `results` is an empty dict then this function is a no-op.
-    If the file already exists, new results are merged with existing ones,
-    overwriting keys that already exist.

    :param results: benchmark results as :class:`dict`
    :param browser_kind: browser as :class:`BrowserKind`
@@ -378,15 +376,7 @@ def dump_benchmark_results(results, browser_kind):
            key.replace("mean", "_".join((browser_kind.name, "mean"))): val
            for key, val in results.items()
        }
-        results_path = pathlib.Path("tfhe-benchmark/wasm_benchmark_results.json")
-        existing_results = {}
-        if results_path.exists():
-            try:
-                existing_results = json.loads(results_path.read_text())
-            except json.JSONDecodeError:
-                pass
-        existing_results.update(results)
-        results_path.write_text(json.dumps(existing_results))
+        pathlib.Path("tfhe-benchmark/wasm_benchmark_results.json").write_text(json.dumps(results))


 def start_web_server(
--- a/scripts/check_memory_errors.sh
+++ b/scripts/check_memory_errors.sh
@@ -33,11 +33,7 @@ RUSTFLAGS="$RUSTFLAGS" cargo nextest list --cargo-profile "${CARGO_PROFILE}" \
          --features=integer,internal-keycache,gpu-debug,zk-pok -p tfhe &> /tmp/test_list.txt

 if [[ "${RUN_VALGRIND}" == "1" ]]; then
-  # The tests are filtered using grep (to keep only HL) GPU tests.
-  # Since, when output is directed to a file, nextest outputs a list of `<executable name> <test name>` the `grep -o '[^ ]\+$'` filter
-  # will keep only the test name and the `tfhe` executable is assumed. To sanitize tests from another
-  # executable changes might be needed
-  TESTS_TO_RUN=$(sed -e $'s/\x1b\[[0-9;]*m//g' < /tmp/test_list.txt | grep -E 'high_level_api::.*gpu.*' | grep -v 'array' | grep -v 'flip' | grep -o '[^ ]\+$')
+  TESTS_TO_RUN=$(sed -e $'s/\x1b\[[0-9;]*m//g' < /tmp/test_list.txt | grep -E 'high_level_api::.*gpu.*' | grep -v 'array' | grep -v 'flip')

  # Build the tests but don't run them
  RUSTFLAGS="$RUSTFLAGS" cargo test --no-run --profile "${CARGO_PROFILE}" \
@@ -60,11 +56,7 @@ if [[ "${RUN_VALGRIND}" == "1" ]]; then
 fi

 if [[ "${RUN_COMPUTE_SANITIZER}" == "1" ]]; then
-  # The tests are filtered using grep (to keep only HL / corecrypto) GPU tests.
-  # Since, when output is directed to a file, nextest outputs a list of `<executable name> <test name>` the `grep -o '[^ ]\+$'` filter
-  # will keep only the test name and the `tfhe` executable is assumed. To sanitize tests from another
-  # executable changes might be needed
-  TESTS_TO_RUN=$(sed -e $'s/\x1b\[[0-9;]*m//g' < /tmp/test_list.txt | grep -E 'high_level_api::.*gpu.*|core_crypto::.*gpu.*' | grep -v 'array' | grep -v 'modulus_switch' | grep -v '3_3' | grep -v 'noise_distribution' | grep -v 'flip' | grep -o '[^ ]\+$')
+  TESTS_TO_RUN=$(sed -e $'s/\x1b\[[0-9;]*m//g' < /tmp/test_list.txt | grep -E 'high_level_api::.*gpu.*|core_crypto::.*gpu.*' | grep -v 'array' | grep -v 'modulus_switch' | grep -v '3_3' | grep -v 'noise_distribution' | grep -v 'flip')
  # Build the tests but don't run them
  RUSTFLAGS="$RUSTFLAGS" cargo test --no-run --profile "${CARGO_PROFILE}" \
    --features=integer,internal-keycache,gpu,zk-pok -p tfhe
--- a/tfhe-benchmark/.gitignore
+++ b/tfhe-benchmark/.gitignore
@@ -1 +0,0 @@
-benchmarks_parameters/*
--- a/tfhe-benchmark/Cargo.toml
+++ b/tfhe-benchmark/Cargo.toml
@@ -15,7 +15,7 @@ name = "benchmark"
 path = "src/lib.rs"

 [dependencies]
-bincode = { workspace = true }
+bincode = "1.3.3"
 # clap has to be pinned as its minimum supported rust version
 # changes often between minor releases, which breaks our CI
 clap = { version = "=4.5.30", features = ["derive"] }
--- a/tfhe-benchmark/benches/high_level_api/bench.rs
+++ b/tfhe-benchmark/benches/high_level_api/bench.rs
@@ -2,9 +2,7 @@ use benchmark::utilities::{
    hlapi_throughput_num_ops, write_to_json, BenchmarkType, BitSizesSet, EnvConfig, OperatorType,
 };
 use criterion::{black_box, Criterion, Throughput};
-use oprf::oprf_any_range2;
 use rand::prelude::*;
-use rayon::prelude::*;
 use std::marker::PhantomData;
 use std::ops::*;
 use tfhe::core_crypto::prelude::Numeric;
@@ -13,42 +11,34 @@ use tfhe::keycache::NamedParam;
 use tfhe::named::Named;
 use tfhe::prelude::*;
 use tfhe::{
-    ClientKey, CompressedServerKey, FheIntegerType, FheUint, FheUint10, FheUint12, FheUint128,
-    FheUint14, FheUint16, FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8, FheUintId,
-    IntegerId, KVStore,
+    ClientKey, CompressedServerKey, FheIntegerType, FheUint10, FheUint12, FheUint128, FheUint14,
+    FheUint16, FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8, FheUintId, IntegerId,
+    KVStore,
 };

-mod oprf;
+use rayon::prelude::*;

-trait BenchWait {
-    fn wait_bench(&self);
-}
-
-impl<Id: FheUintId> BenchWait for FheUint<Id> {
-    fn wait_bench(&self) {
-        self.wait()
-    }
-}
-
-impl<T1: FheWait, T2> BenchWait for (T1, T2) {
-    fn wait_bench(&self) {
-        self.0.wait()
-    }
-}
-
-fn bench_fhe_type_op<FheType, F, R>(
+fn bench_fhe_type<FheType>(
    c: &mut Criterion,
    client_key: &ClientKey,
    type_name: &str,
    bit_size: usize,
-    display_name: &str,
-    func_name: &str,
-    func: F,
 ) where
-    F: Fn(&FheType, &FheType) -> R,
-    R: BenchWait,
    FheType: FheEncrypt<u128, ClientKey>,
    FheType: FheWait,
+    for<'a> &'a FheType: Add<&'a FheType, Output = FheType>
+        + Sub<&'a FheType, Output = FheType>
+        + Mul<&'a FheType, Output = FheType>
+        + BitAnd<&'a FheType, Output = FheType>
+        + BitOr<&'a FheType, Output = FheType>
+        + BitXor<&'a FheType, Output = FheType>
+        + Shl<&'a FheType, Output = FheType>
+        + Shr<&'a FheType, Output = FheType>
+        + RotateLeft<&'a FheType, Output = FheType>
+        + RotateRight<&'a FheType, Output = FheType>
+        + OverflowingAdd<&'a FheType, Output = FheType>
+        + OverflowingSub<&'a FheType, Output = FheType>,
+    for<'a> FheType: FheMin<&'a FheType, Output = FheType> + FheMax<&'a FheType, Output = FheType>,
 {
    let mut bench_group = c.benchmark_group(type_name);
    let mut bench_prefix = "hlapi".to_string();
@@ -81,90 +71,170 @@ fn bench_fhe_type_op<FheType, F, R>(
    let lhs = FheType::encrypt(rng.gen(), client_key);
    let rhs = FheType::encrypt(rng.gen(), client_key);

-    let bench_id = format!("{bench_prefix}::{func_name}::{param_name}::{type_name}");
+    let mut bench_id;

+    bench_id = format!("{bench_prefix}::add::{param_name}::{type_name}");
    bench_group.bench_function(&bench_id, |b| {
        b.iter(|| {
-            let res = func(&lhs, &rhs);
-            res.wait_bench();
+            let res = &lhs + &rhs;
+            res.wait();
            black_box(res)
        })
    });
-    write_record(bench_id, display_name);
+    write_record(bench_id, "add");
+
+    bench_id = format!("{bench_prefix}::overflowing_add::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let (res, flag) = lhs.overflowing_add(&rhs);
+            res.wait();
+            black_box((res, flag))
+        })
+    });
+    write_record(bench_id, "overflowing_add");
+
+    bench_id = format!("{bench_prefix}::overflowing_sub::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let (res, flag) = lhs.overflowing_sub(&rhs);
+            res.wait();
+            black_box((res, flag))
+        })
+    });
+    write_record(bench_id, "overflowing_sub");
+
+    bench_id = format!("{bench_prefix}::sub::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let res = &lhs - &rhs;
+            res.wait();
+            black_box(res)
+        })
+    });
+    write_record(bench_id, "sub");
+
+    bench_id = format!("{bench_prefix}::mul::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let res = &lhs * &rhs;
+            res.wait();
+            black_box(res)
+        })
+    });
+    write_record(bench_id, "mul");
+
+    bench_id = format!("{bench_prefix}::bitand::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let res = &lhs & &rhs;
+            res.wait();
+            black_box(res)
+        })
+    });
+    write_record(bench_id, "bitand");
+
+    bench_id = format!("{bench_prefix}::bitor::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let res = &lhs | &rhs;
+            res.wait();
+            black_box(res)
+        })
+    });
+    write_record(bench_id, "bitor");
+
+    bench_id = format!("{bench_prefix}::bitxor::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let res = &lhs ^ &rhs;
+            res.wait();
+            black_box(res)
+        })
+    });
+    write_record(bench_id, "bitxor");
+
+    bench_id = format!("{bench_prefix}::left_shift::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let res = &lhs << &rhs;
+            res.wait();
+            black_box(res)
+        })
+    });
+    write_record(bench_id, "left_shift");
+
+    bench_id = format!("{bench_prefix}::right_shift::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let res = &lhs >> &rhs;
+            res.wait();
+            black_box(res)
+        })
+    });
+    write_record(bench_id, "right_shift");
+
+    bench_id = format!("{bench_prefix}::left_rotate::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let res = (&lhs).rotate_left(&rhs);
+            res.wait();
+            black_box(res)
+        })
+    });
+    write_record(bench_id, "left_rotate");
+
+    bench_id = format!("{bench_prefix}::right_rotate::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let res = (&lhs).rotate_right(&rhs);
+            res.wait();
+            black_box(res)
+        })
+    });
+    write_record(bench_id, "right_rotate");
+
+    bench_id = format!("{bench_prefix}::min::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let res = lhs.min(&rhs);
+            res.wait();
+            black_box(res)
+        })
+    });
+    write_record(bench_id, "min");
+
+    bench_id = format!("{bench_prefix}::max::{param_name}::{type_name}");
+    bench_group.bench_function(&bench_id, |b| {
+        b.iter(|| {
+            let res = lhs.max(&rhs);
+            res.wait();
+            black_box(res)
+        })
+    });
+    write_record(bench_id, "max");
 }

-macro_rules! bench_type_op (
-    (type_name: $fhe_type:ident, display_name: $display_name:literal, operation: $op:ident) => {
+macro_rules! bench_type {
+    ($fhe_type:ident) => {
        ::paste::paste! {
-            fn [<bench_ $fhe_type:snake _ $op>](c: &mut Criterion, cks: &ClientKey) {
-                bench_fhe_type_op::<$fhe_type, _, _>(
-                    c,
-                    cks,
-                    stringify!($fhe_type),
-                    $fhe_type::num_bits(),
-                    $display_name,
-                    stringify!($op),
-                    |lhs, rhs| lhs.$op(rhs)
-                );
+            fn [<bench_ $fhe_type:snake>](c: &mut Criterion, cks: &ClientKey) {
+                bench_fhe_type::<$fhe_type>(c, cks, stringify!($fhe_type), $fhe_type::num_bits());
            }
        }
    };
-);
-
-macro_rules! generate_typed_benches {
-    ($fhe_type:ident) => {
-        bench_type_op!(type_name: $fhe_type, display_name: "add", operation: add);
-        bench_type_op!(type_name: $fhe_type, display_name: "overflowing_add", operation: overflowing_add);
-        bench_type_op!(type_name: $fhe_type, display_name: "sub", operation: sub);
-        bench_type_op!(type_name: $fhe_type, display_name: "overflowing_sub", operation: overflowing_sub);
-        bench_type_op!(type_name: $fhe_type, display_name: "mul", operation: mul);
-        bench_type_op!(type_name: $fhe_type, display_name: "bitand", operation: bitand);
-        bench_type_op!(type_name: $fhe_type, display_name: "bitor", operation: bitor);
-        bench_type_op!(type_name: $fhe_type, display_name: "bitxor", operation: bitxor);
-        bench_type_op!(type_name: $fhe_type, display_name: "left_shift", operation: shl);
-        bench_type_op!(type_name: $fhe_type, display_name: "right_shift", operation: shr);
-        bench_type_op!(type_name: $fhe_type, display_name: "left_rotate", operation: rotate_left);
-        bench_type_op!(type_name: $fhe_type, display_name: "right_rotate", operation: rotate_right);
-        bench_type_op!(type_name: $fhe_type, display_name: "min", operation: min);
-        bench_type_op!(type_name: $fhe_type, display_name: "max", operation: max);
-    };
 }

-// Generate benches for all FheUint types
-generate_typed_benches!(FheUint2);
-generate_typed_benches!(FheUint4);
-generate_typed_benches!(FheUint6);
-generate_typed_benches!(FheUint8);
-generate_typed_benches!(FheUint10);
-generate_typed_benches!(FheUint12);
-generate_typed_benches!(FheUint14);
-generate_typed_benches!(FheUint16);
-generate_typed_benches!(FheUint32);
-generate_typed_benches!(FheUint64);
-generate_typed_benches!(FheUint128);
-
-macro_rules! run_benches {
-    ($c:expr, $cks:expr, $($fhe_type:ident),+ $(,)?) => {
-        $(
-            ::paste::paste! {
-                [<bench_ $fhe_type:snake _add>]($c, $cks);
-                [<bench_ $fhe_type:snake _overflowing_add>]($c, $cks);
-                [<bench_ $fhe_type:snake _sub>]($c, $cks);
-                [<bench_ $fhe_type:snake _overflowing_sub>]($c, $cks);
-                [<bench_ $fhe_type:snake _mul>]($c, $cks);
-                [<bench_ $fhe_type:snake _bitand>]($c, $cks);
-                [<bench_ $fhe_type:snake _bitor>]($c, $cks);
-                [<bench_ $fhe_type:snake _bitxor>]($c, $cks);
-                [<bench_ $fhe_type:snake _shl>]($c, $cks);
-                [<bench_ $fhe_type:snake _shr>]($c, $cks);
-                [<bench_ $fhe_type:snake _rotate_left>]($c, $cks);
-                [<bench_ $fhe_type:snake _rotate_right>]($c, $cks);
-                [<bench_ $fhe_type:snake _min>]($c, $cks);
-                [<bench_ $fhe_type:snake _max>]($c, $cks);
-            }
-        )+
-    };
-}
+bench_type!(FheUint2);
+bench_type!(FheUint4);
+bench_type!(FheUint6);
+bench_type!(FheUint8);
+bench_type!(FheUint10);
+bench_type!(FheUint12);
+bench_type!(FheUint14);
+bench_type!(FheUint16);
+bench_type!(FheUint32);
+bench_type!(FheUint64);
+bench_type!(FheUint128);

 trait TypeDisplay {
    fn fmt(f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -374,7 +444,7 @@ fn main() {

    match env_config.bit_sizes_set {
        BitSizesSet::Fast => {
-            run_benches!(&mut c, &cks, FheUint64);
+            bench_fhe_uint64(&mut c, &cks);

            // KVStore Benches
            if benched_device == tfhe::Device::Cpu {
@@ -382,11 +452,17 @@ fn main() {
            }
        }
        _ => {
-            // Call all benchmarks for all types
-            run_benches!(
-                &mut c, &cks, FheUint2, FheUint4, FheUint6, FheUint8, FheUint10, FheUint12,
-                FheUint14, FheUint16, FheUint32, FheUint64, FheUint128
-            );
+            bench_fhe_uint2(&mut c, &cks);
+            bench_fhe_uint4(&mut c, &cks);
+            bench_fhe_uint6(&mut c, &cks);
+            bench_fhe_uint8(&mut c, &cks);
+            bench_fhe_uint10(&mut c, &cks);
+            bench_fhe_uint12(&mut c, &cks);
+            bench_fhe_uint14(&mut c, &cks);
+            bench_fhe_uint16(&mut c, &cks);
+            bench_fhe_uint32(&mut c, &cks);
+            bench_fhe_uint64(&mut c, &cks);
+            bench_fhe_uint128(&mut c, &cks);

            // KVStore Benches
            if benched_device == tfhe::Device::Cpu {
@@ -405,8 +481,5 @@ fn main() {
        }
    }

-    #[cfg(not(feature = "hpu"))]
-    oprf_any_range2();
-
    c.final_summary();
 }
--- a/tfhe-benchmark/benches/high_level_api/erc20.rs
+++ b/tfhe-benchmark/benches/high_level_api/erc20.rs
@@ -28,22 +28,12 @@ pub fn transfer_whitepaper<FheType>(
    amount: &FheType,
 ) -> (FheType, FheType)
 where
-    FheType: Add<Output = FheType> + for<'a> FheOrd<&'a FheType> + FheTrivialEncrypt<u64>,
-    FheBool: IfThenZero<FheType> + IfThenElse<FheType>,
+    FheType: Add<Output = FheType> + for<'a> FheOrd<&'a FheType>,
+    FheBool: IfThenZero<FheType>,
    for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
 {
    let has_enough_funds = (from_amount).ge(amount);
-    let amount_to_transfer = {
-        #[cfg(not(feature = "hpu"))]
-        {
-            let zero_amount = FheType::encrypt_trivial(0u64);
-            has_enough_funds.select(amount, &zero_amount)
-        }
-        #[cfg(feature = "hpu")]
-        {
-            has_enough_funds.if_then_zero(amount)
-        }
-    };
+    let amount_to_transfer = has_enough_funds.if_then_zero(amount);

    let new_to_amount = to_amount + &amount_to_transfer;
    let new_from_amount = from_amount - &amount_to_transfer;
@@ -60,21 +50,13 @@ pub fn par_transfer_whitepaper<FheType>(
 where
    FheType:
        Add<Output = FheType> + for<'a> FheOrd<&'a FheType> + Send + Sync + FheTrivialEncrypt<u64>,
-    FheBool: IfThenZero<FheType> + IfThenElse<FheType>,
+    FheBool: IfThenZero<FheType>,
    for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
 {
    let has_enough_funds = (from_amount).ge(amount);
-    let amount_to_transfer = {
-        #[cfg(feature = "gpu")]
-        {
-            let zero_amount = FheType::encrypt_trivial(0u64);
-            has_enough_funds.select(amount, &zero_amount)
-        }
-        #[cfg(not(feature = "gpu"))]
-        {
-            has_enough_funds.if_then_zero(amount)
-        }
-    };
+    //let zero_amount = FheType::encrypt_trivial(0u64);
+    //let amount_to_transfer = has_enough_funds.select(amount, &zero_amount);
+    let amount_to_transfer = has_enough_funds.if_then_zero(amount);

    let (new_to_amount, new_from_amount) = rayon::join(
        || to_amount + &amount_to_transfer,
--- a/tfhe-benchmark/benches/high_level_api/oprf.rs
+++ b/tfhe-benchmark/benches/high_level_api/oprf.rs
@@ -1,44 +0,0 @@
-use benchmark::params_aliases::BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
-use criterion::{black_box, criterion_group, Criterion};
-use std::num::NonZeroU64;
-use tfhe::{set_server_key, ClientKey, ConfigBuilder, FheUint64, RangeForRandom, Seed, ServerKey};
-
-pub fn oprf_any_range(c: &mut Criterion) {
-    let bench_name = "hlapi::oprf_any_range";
-
-    let mut bench_group = c.benchmark_group(bench_name);
-    bench_group
-        .sample_size(15)
-        .measurement_time(std::time::Duration::from_secs(30));
-
-    let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
-
-    let config = ConfigBuilder::with_custom_parameters(param).build();
-    let cks = ClientKey::generate(config);
-    let sks = ServerKey::new(&cks);
-
-    rayon::broadcast(|_| set_server_key(sks.clone()));
-    set_server_key(sks);
-
-    for excluded_upper_bound in [3, 52] {
-        let range = RangeForRandom::new_from_excluded_upper_bound(
-            NonZeroU64::new(excluded_upper_bound).unwrap(),
-        );
-
-        let bench_id_oprf = format!("{bench_name}::bound_{excluded_upper_bound}");
-
-        bench_group.bench_function(&bench_id_oprf, |b| {
-            b.iter(|| {
-                _ = black_box(FheUint64::generate_oblivious_pseudo_random_custom_range(
-                    Seed(0),
-                    &range,
-                    None,
-                ));
-            })
-        });
-    }
-
-    bench_group.finish()
-}
-
-criterion_group!(oprf_any_range2, oprf_any_range);
--- a/tfhe-benchmark/benches/integer/bench.rs
+++ b/tfhe-benchmark/benches/integer/bench.rs
@@ -2809,7 +2809,6 @@ mod cuda {
    criterion_group!(
        default_cuda_dedup_ops,
        cuda_add,
-        cuda_neg,
        cuda_mul,
        cuda_div_rem,
        cuda_bitand,
--- a/tfhe-benchmark/src/params.rs
+++ b/tfhe-benchmark/src/params.rs
@@ -629,10 +629,8 @@ mod integer_params {
                // operations.
                #[cfg(feature = "hpu")]
                let params = vec![BENCH_HPU_PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128.into()];
-                #[cfg(feature = "gpu")]
-                let params = vec![BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS.into()];
-                #[cfg(not(any(feature = "gpu", feature = "hpu")))]
-                let params = vec![BENCH_PARAM_MESSAGE_2_CARRY_2_KS32_PBS.into()];
+                #[cfg(not(feature = "hpu"))]
+                let params = vec![BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128.into()];

                let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());
                Self {
--- a/tfhe-benchmark/src/params_aliases.rs
+++ b/tfhe-benchmark/src/params_aliases.rs
@@ -1,10 +1,12 @@
 #[cfg(any(feature = "shortint", feature = "integer"))]
 pub mod shortint_params_aliases {
    use tfhe::shortint::parameters::current_params::*;
+    #[cfg(feature = "hpu")]
+    use tfhe::shortint::parameters::KeySwitch32PBSParameters;
    use tfhe::shortint::parameters::{
        ClassicPBSParameters, CompactPublicKeyEncryptionParameters, CompressionParameters,
-        KeySwitch32PBSParameters, MultiBitPBSParameters, NoiseSquashingCompressionParameters,
-        NoiseSquashingParameters, ShortintKeySwitchingParameters,
+        MultiBitPBSParameters, NoiseSquashingCompressionParameters, NoiseSquashingParameters,
+        ShortintKeySwitchingParameters,
    };

    // KS PBS Gaussian
@@ -40,8 +42,6 @@ pub mod shortint_params_aliases {
        V1_5_PARAM_MESSAGE_4_CARRY_4_KS_PBS_TUNIFORM_2M128;
    pub const BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS: ClassicPBSParameters =
        V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
-    pub const BENCH_PARAM_MESSAGE_2_CARRY_2_KS32_PBS: KeySwitch32PBSParameters =
-        V1_5_PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128;

    pub const BENCH_ALL_CLASSIC_PBS_PARAMETERS: [(&ClassicPBSParameters, &str); 141] =
        VEC_ALL_CLASSIC_PBS_PARAMETERS;
--- a/tfhe-fft/Cargo.toml
+++ b/tfhe-fft/Cargo.toml
@@ -30,7 +30,7 @@ serde = ["dep:serde", "num-complex/serde"]
 [dev-dependencies]
 rustfft = "6.0"
 rand = { workspace = true }
-bincode = { workspace = true }
+bincode = "1.3"
 more-asserts = "0.3.1"
 serde_json = "1.0.96"
 dyn-stack = { workspace = true, features = ["alloc"] }
--- a/tfhe-zk-pok/Cargo.toml
+++ b/tfhe-zk-pok/Cargo.toml
@@ -31,7 +31,7 @@ experimental = []
 [dev-dependencies]
 serde_json = "~1.0"
 itertools = { workspace = true }
-bincode = { workspace = true }
+bincode = "1.3.3"
 criterion = "0.5.1"

 [[bench]]
--- a/tfhe/Cargo.toml
+++ b/tfhe/Cargo.toml
@@ -27,7 +27,6 @@ rand_distr = "0.4.3"
 criterion = "0.5.1"
 doc-comment = "0.3.3"
 serde_json = "1.0.94"
-num-bigint = "0.4.6"
 # clap has to be pinned as its minimum supported rust version
 # changes often between minor releases, which breaks our CI
 clap = { version = "=4.5.30", features = ["derive"] }
@@ -59,7 +58,7 @@ tfhe-csprng = { version = "0.8.0", path = "../tfhe-csprng", features = [
 ] }
 serde = { workspace = true, features = ["default", "derive"] }
 rayon = { workspace = true }
-bincode = { workspace = true }
+bincode = "1.3.3"
 tfhe-fft = { version = "0.10.0", path = "../tfhe-fft", features = [
    "serde",
    "fft128",
--- a/tfhe/docs/fhe-computation/advanced-features/encrypted-prf.md
+++ b/tfhe/docs/fhe-computation/advanced-features/encrypted-prf.md
@@ -2,30 +2,14 @@

 This document explains the mechanism and steps to generate an oblivious encrypted random value using only server keys.

-The goal is to give to the server the possibility to generate a random value, which will be obtained in an encrypted format and will remain unknown to the server.
+The goal is to give to the server the possibility to generate a random value, which will be obtained in an encrypted format and will remain unknown to the server. The implementation is based on [this article](https://eprint.iacr.org/2024/665).

-The main method for this is `FheUint::generate_oblivious_pseudo_random_custom_range` which returns an integer in the given range.
-Currently the range can only be in the form `[0, excluded_upper_bound[` with any `excluded_upper_bound` in `[1, 2^64[`
-It follows a distribution close to the uniform.
-
-This function guarantees the norm-1 distance (defined as ∆(P,Q) := 1/2 Sum[ω∈Ω] |P(ω) − Q(ω)|)
-between the actual distribution and the target uniform distribution will be below the `max_distance` argument (which must be in ]0, 1[).
-The higher the distance, the more dissimilar the actual distribution is from the target uniform distribution.
-
-The default value for `max_distance` is `2^-128` if `None` is provided.
-
-Higher values allow better performance but must be considered carefully in the context of their target application as it may have serious unintended consequences.
-
-If the range is a power of 2, the distribution is uniform (for any `max_distance`) and the cost is smaller.
-
-
-For powers of 2 specifically there are two methods on `FheUint` and `FheInt` (based on [this article](https://eprint.iacr.org/2024/665)): 
+This is possible through two methods on `FheUint` and `FheInt`: 
 - `generate_oblivious_pseudo_random` which return an integer taken uniformly in the full integer range (`[0; 2^N[` for a `FheUintN` and `[-2^(N-1); 2^(N-1)[` for a `FheIntN`).
 - `generate_oblivious_pseudo_random_bounded` which return an integer taken uniformly in `[0; 2^random_bits_count[`. For a `FheUintN`, we must have  `random_bits_count <= N`. For a `FheIntN`, we must have  `random_bits_count <= N - 1`.

-
-These method functions take a seed `Seed` as input, which could be any `u128` value.
-They rely on the use of the usual server key.
+Both methods functions take a seed `Seed` as input, which could be any `u128` value.
+They both rely on the use of the usual server key.
 The output is reproducible, i.e., the function is deterministic from the inputs: assuming the same hardware, seed and server key, this function outputs the same random encrypted value.


@@ -34,8 +18,7 @@ Here is an example of the usage:

 ```rust
 use tfhe::prelude::FheDecrypt;
-use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, FheInt8, RangeForRandom, Seed};
-use std::num::NonZeroU64;
+use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, FheInt8, Seed};

 pub fn main() {
    let config = ConfigBuilder::default().build();
@@ -43,30 +26,23 @@ pub fn main() {

    set_server_key(server_key);

-    let excluded_upper_bound = NonZeroU64::new(3).unwrap();
-    let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
-
-    // in [0, excluded_upper_bound[ = {0, 1, 2}
-    let ct_res = FheUint8::generate_oblivious_pseudo_random_custom_range(Seed(0), &range, None);
-    let dec_result: u8 = ct_res.decrypt(&client_key);
-
    let random_bits_count = 3;

-    // in [0, 2^8[
    let ct_res = FheUint8::generate_oblivious_pseudo_random(Seed(0));
+
    let dec_result: u8 = ct_res.decrypt(&client_key);

-    // in [0, 2^random_bits_count[ = [0, 8[
    let ct_res = FheUint8::generate_oblivious_pseudo_random_bounded(Seed(0), random_bits_count);
+
    let dec_result: u8 = ct_res.decrypt(&client_key);
    assert!(dec_result < (1 << random_bits_count));

-    // in [-2^7, 2^7[
    let ct_res = FheInt8::generate_oblivious_pseudo_random(Seed(0));
+    
    let dec_result: i8 = ct_res.decrypt(&client_key);
    
-    // in [0, 2^random_bits_count[ = [0, 8[
    let ct_res = FheInt8::generate_oblivious_pseudo_random_bounded(Seed(0), random_bits_count);
+
    let dec_result: i8 = ct_res.decrypt(&client_key);
    assert!(dec_result < (1 << random_bits_count));
 }
--- a/tfhe/docs/integration/js-on-wasm-api.md
+++ b/tfhe/docs/integration/js-on-wasm-api.md
@@ -141,7 +141,7 @@ Some parameter sets lead to the FHE keys exceeding the 2GB memory limit of WASM,

 ### Setting up TFHE-rs JS on WASM API for Node.js programs.

-To build the JS on WASM bindings for **TFHE-rs**, install [`wasm-pack`](https://drager.github.io/wasm-pack/) and the necessary [`rust toolchain`](https://rustup.rs/). Clone the **TFHE-rs** repository and build using the following commands (this will build using the default branch, you can check out a specific tag depending on your requirements):
+To build the JS on WASM bindings for **TFHE-rs**, install [`wasm-pack`](https://rustwasm.github.io/wasm-pack/) and the necessary [`rust toolchain`](https://rustup.rs/). Clone the **TFHE-rs** repository and build using the following commands (this will build using the default branch, you can check out a specific tag depending on your requirements):

 ```shell
 $ git clone https://github.com/zama-ai/tfhe-rs.git
@@ -150,7 +150,7 @@ Cloning into 'tfhe-rs'...
 Resolving deltas: 100% (3866/3866), done.
 $ cd tfhe-rs
 $ cd tfhe
-$ wasm-pack build --release --target=nodejs --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api
+$ rustup run wasm-pack build --release --target=nodejs --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api
 [INFO]: Compiling to Wasm...
 ...
 [INFO]: :-) Your wasm pkg is ready to publish at ...
@@ -164,7 +164,7 @@ After the build, a new directory **pkg** is available in the `tfhe` directory.

 ```shell
 $ ls pkg
-LICENSE  README.md  package.json  tfhe.d.ts  tfhe.js  tfhe_bg.wasm  tfhe_bg.wasm.d.ts
+LICENSE  index.html  package.json  tfhe.d.ts  tfhe.js  tfhe_bg.txt  tfhe_bg.wasm  tfhe_bg.wasm.d.ts
 $
 ```

--- a/tfhe/src/core_crypto/algorithms/lwe_programmable_bootstrapping/karatsuba_pbs.rs
+++ b/tfhe/src/core_crypto/algorithms/lwe_programmable_bootstrapping/karatsuba_pbs.rs
@@ -1,415 +0,0 @@
-use aligned_vec::CACHELINE_ALIGN;
-use dyn_stack::{PodStack, StackReq};
-
-use crate::core_crypto::commons::traits::*;
-use crate::core_crypto::commons::utils::izip_eq;
-use crate::core_crypto::entities::*;
-use crate::core_crypto::fft_impl::fft64::crypto::ggsw::collect_next_term;
-use crate::core_crypto::fft_impl::fft64::math::decomposition::TensorSignedDecompositionLendingIter;
-use crate::core_crypto::prelude::polynomial_algorithms::*;
-use crate::core_crypto::prelude::{
-    extract_lwe_sample_from_glwe_ciphertext, lwe_ciphertext_modulus_switch, ComputationBuffers,
-    DecompositionBaseLog, DecompositionLevelCount, GlweSize, ModulusSwitchedLweCiphertext,
-    MonomialDegree, PolynomialSize, SignedDecomposer,
-};
-
-pub fn programmable_bootstrap_karatsuba_lwe_ciphertext_mem_optimized_requirement<Scalar>(
-    glwe_size: GlweSize,
-    polynomial_size: PolynomialSize,
-) -> StackReq {
-    StackReq::all_of(&[
-        // local accumulator
-        StackReq::new_aligned::<Scalar>(glwe_size.0 * polynomial_size.0, CACHELINE_ALIGN),
-        // blind rotation
-        blind_rotate_karatsuba_assign_scratch::<Scalar>(glwe_size, polynomial_size),
-    ])
-}
-
-/// Return the required memory for [`blind_rotate_karatsuba_assign`].
-pub fn blind_rotate_karatsuba_assign_scratch<Scalar>(
-    glwe_size: GlweSize,
-    polynomial_size: PolynomialSize,
-) -> StackReq {
-    StackReq::any_of(&[
-        // tmp_poly allocation
-        StackReq::new_aligned::<Scalar>(polynomial_size.0, CACHELINE_ALIGN),
-        StackReq::all_of(&[
-            // ct1 allocation
-            StackReq::new_aligned::<Scalar>(glwe_size.0 * polynomial_size.0, CACHELINE_ALIGN),
-            // external product
-            karatsuba_add_external_product_assign_scratch::<Scalar>(glwe_size, polynomial_size),
-        ]),
-    ])
-}
-
-/// Return the required memory for [`karatsuba_add_external_product_assign`].
-pub fn karatsuba_add_external_product_assign_scratch<Scalar>(
-    glwe_size: GlweSize,
-    polynomial_size: PolynomialSize,
-) -> StackReq {
-    StackReq::all_of(&[
-        // Output buffer
-        StackReq::new_aligned::<Scalar>(glwe_size.0 * polynomial_size.0, CACHELINE_ALIGN),
-        // decomposition
-        StackReq::new_aligned::<Scalar>(glwe_size.0 * polynomial_size.0, CACHELINE_ALIGN),
-        // decomposition term
-        StackReq::new_aligned::<Scalar>(glwe_size.0 * polynomial_size.0, CACHELINE_ALIGN),
-    ])
-}
-
-/// Perform a programmable bootstrap given an input [`LWE ciphertext`](`LweCiphertext`), a
-/// look-up table passed as a [`GLWE ciphertext`](`GlweCiphertext`) and an [`LWE bootstrap
-/// key`](`LweBootstrapKey`) using the karatsuba polynomial multiplication. The result is written in
-/// the provided output [`LWE ciphertext`](`LweCiphertext`).
-///
-/// If you want to manage the computation memory manually you can use
-/// [`programmable_bootstrap_karatsuba_lwe_ciphertext_mem_optimized`].
-///
-/// # Warning
-/// For a more efficient implementation of the programmable bootstrap, see
-/// [`programmable_bootstrap_lwe_ciphertext`](super::programmable_bootstrap_lwe_ciphertext)
-pub fn programmable_bootstrap_karatsuba_lwe_ciphertext<InputCont, OutputCont, AccCont, KeyCont>(
-    input: &LweCiphertext<InputCont>,
-    output: &mut LweCiphertext<OutputCont>,
-    accumulator: &GlweCiphertext<AccCont>,
-    bsk: &LweBootstrapKey<KeyCont>,
-) where
-    InputCont: Container<Element = u64>,
-    OutputCont: ContainerMut<Element = u64>,
-    AccCont: Container<Element = u64>,
-    KeyCont: Container<Element = u64>,
-{
-    assert!(
-        input.ciphertext_modulus().is_power_of_two(),
-        "This operation requires the input to have a power of two modulus."
-    );
-    assert_eq!(
-        output.ciphertext_modulus(),
-        accumulator.ciphertext_modulus()
-    );
-
-    let mut buffers = ComputationBuffers::new();
-
-    buffers.resize(
-        programmable_bootstrap_karatsuba_lwe_ciphertext_mem_optimized_requirement::<u64>(
-            bsk.glwe_size(),
-            bsk.polynomial_size(),
-        )
-        .unaligned_bytes_required(),
-    );
-
-    programmable_bootstrap_karatsuba_lwe_ciphertext_mem_optimized(
-        input,
-        output,
-        accumulator,
-        bsk,
-        buffers.stack(),
-    );
-}
-
-/// Perform a programmable bootstrap given an input [`LWE ciphertext`](`LweCiphertext`), a
-/// look-up table passed as a [`GLWE ciphertext`](`GlweCiphertext`) and an [`LWE bootstrap
-/// key`](`LweBootstrapKey`) using the karatsuba polynomial multiplication. The result is written in
-/// the provided output [`LWE ciphertext`](`LweCiphertext`).
-///
-/// # Warning
-/// For a more efficient implementation of the programmable bootstrap, see
-/// [`programmable_bootstrap_lwe_ciphertext_mem_optimized`](super::programmable_bootstrap_lwe_ciphertext_mem_optimized)
-pub fn programmable_bootstrap_karatsuba_lwe_ciphertext_mem_optimized<
-    InputCont,
-    OutputCont,
-    AccCont,
-    KeyCont,
->(
-    input: &LweCiphertext<InputCont>,
-    output: &mut LweCiphertext<OutputCont>,
-    accumulator: &GlweCiphertext<AccCont>,
-    bsk: &LweBootstrapKey<KeyCont>,
-    stack: &mut PodStack,
-) where
-    InputCont: Container<Element = u64>,
-    OutputCont: ContainerMut<Element = u64>,
-    AccCont: Container<Element = u64>,
-    KeyCont: Container<Element = u64>,
-{
-    assert_eq!(
-        output.ciphertext_modulus(),
-        accumulator.ciphertext_modulus()
-    );
-    assert_eq!(accumulator.ciphertext_modulus(), bsk.ciphertext_modulus());
-
-    let (local_accumulator_data, stack) =
-        stack.collect_aligned(CACHELINE_ALIGN, accumulator.as_ref().iter().copied());
-    let mut local_accumulator = GlweCiphertextMutView::from_container(
-        &mut *local_accumulator_data,
-        accumulator.polynomial_size(),
-        accumulator.ciphertext_modulus(),
-    );
-
-    let log_modulus = accumulator
-        .polynomial_size()
-        .to_blind_rotation_input_modulus_log();
-
-    let msed = lwe_ciphertext_modulus_switch(input.as_view(), log_modulus);
-
-    blind_rotate_karatsuba_assign_mem_optimized(&msed, &mut local_accumulator, bsk, stack);
-
-    extract_lwe_sample_from_glwe_ciphertext(&local_accumulator, output, MonomialDegree(0));
-}
-
-/// Perform a blind rotation given an input [`modulus switched LWE
-/// ciphertext`](`ModulusSwitchedLweCiphertext`), modifying a look-up table passed as a [`GLWE
-/// ciphertext`](`GlweCiphertext`) and an [`LWE bootstrap key`](`LweBootstrapKey`) using the
-/// karatsuba polynomial multiplication.
-///
-/// If you want to manage the computation memory manually you can use
-/// [`blind_rotate_karatsuba_assign_mem_optimized`].
-///
-/// # Warning
-/// For a more efficient implementation of the blind rotation, see
-/// [`blind_rotate_assign`](super::blind_rotate_assign)
-pub fn blind_rotate_karatsuba_assign<OutputScalar, OutputCont, KeyCont>(
-    msed_input: &impl ModulusSwitchedLweCiphertext<usize>,
-    lut: &mut GlweCiphertext<OutputCont>,
-    bsk: &LweBootstrapKey<KeyCont>,
-) where
-    OutputScalar: UnsignedTorus + CastInto<usize>,
-    OutputCont: ContainerMut<Element = OutputScalar>,
-    KeyCont: Container<Element = OutputScalar>,
-    GlweCiphertext<OutputCont>: PartialEq<GlweCiphertext<OutputCont>>,
-{
-    let mut buffers = ComputationBuffers::new();
-
-    buffers.resize(
-        blind_rotate_karatsuba_assign_scratch::<u64>(bsk.glwe_size(), bsk.polynomial_size())
-            .unaligned_bytes_required(),
-    );
-
-    blind_rotate_karatsuba_assign_mem_optimized(msed_input, lut, bsk, buffers.stack())
-}
-
-/// Perform a blind rotation given an input [`modulus switched LWE
-/// ciphertext`](`ModulusSwitchedLweCiphertext`), modifying a look-up table passed as a [`GLWE
-/// ciphertext`](`GlweCiphertext`) and an [`LWE bootstrap key`](`LweBootstrapKey`) using the
-/// karatsuba polynomial multiplication.
-///
-/// # Warning
-/// For a more efficient implementation of the blind rotation, see
-/// [`blind_rotate_assign`](super::blind_rotate_assign)
-pub fn blind_rotate_karatsuba_assign_mem_optimized<OutputScalar, OutputCont, KeyCont>(
-    msed_input: &impl ModulusSwitchedLweCiphertext<usize>,
-    lut: &mut GlweCiphertext<OutputCont>,
-    bsk: &LweBootstrapKey<KeyCont>,
-    stack: &mut PodStack,
-) where
-    OutputScalar: UnsignedTorus + CastInto<usize>,
-    OutputCont: ContainerMut<Element = OutputScalar>,
-    KeyCont: Container<Element = OutputScalar>,
-    GlweCiphertext<OutputCont>: PartialEq<GlweCiphertext<OutputCont>>,
-{
-    assert!(lut.ciphertext_modulus().is_power_of_two());
-
-    assert_eq!(
-        bsk.input_lwe_dimension(),
-        msed_input.lwe_dimension(),
-        "Mismatched input LweDimension. \
-        LweBootstrapKey input LweDimension: {:?}, input LweCiphertext LweDimension {:?}.",
-        bsk.input_lwe_dimension(),
-        msed_input.lwe_dimension(),
-    );
-    assert_eq!(
-        bsk.glwe_size(),
-        lut.glwe_size(),
-        "Mismatched GlweSize. \
-        LweBootstrapKey GlweSize: {:?}, lut GlweSize {:?}.",
-        bsk.glwe_size(),
-        lut.glwe_size(),
-    );
-    assert_eq!(
-        lut.polynomial_size(),
-        bsk.polynomial_size(),
-        "Mismatched PolynomialSize. \
-        LweBootstrapKey PolynomialSize: {:?}, lut PolynomialSize {:?}.",
-        bsk.polynomial_size(),
-        lut.polynomial_size(),
-    );
-
-    let msed_lwe_mask = msed_input.mask();
-
-    let msed_lwe_body = msed_input.body();
-
-    let monomial_degree = MonomialDegree(msed_lwe_body.cast_into());
-
-    let lut_poly_size = lut.polynomial_size();
-    let ciphertext_modulus = lut.ciphertext_modulus();
-    assert!(ciphertext_modulus.is_compatible_with_native_modulus());
-
-    lut.as_mut_polynomial_list()
-        .iter_mut()
-        .for_each(|mut poly| {
-            let (tmp_poly, _) = stack.make_aligned_raw(poly.as_ref().len(), CACHELINE_ALIGN);
-
-            let mut tmp_poly = Polynomial::from_container(&mut *tmp_poly);
-            tmp_poly.as_mut().copy_from_slice(poly.as_ref());
-            polynomial_wrapping_monic_monomial_div(&mut poly, &tmp_poly, monomial_degree);
-        });
-
-    // We initialize the ct_0 used for the successive cmuxes
-    let ct0 = lut;
-    let (ct1, stack) = stack.make_aligned_raw(ct0.as_ref().len(), CACHELINE_ALIGN);
-    let mut ct1 =
-        GlweCiphertextMutView::from_container(&mut *ct1, lut_poly_size, ciphertext_modulus);
-
-    for (lwe_mask_element, bootstrap_key_ggsw) in izip_eq!(msed_lwe_mask, bsk.iter()) {
-        if lwe_mask_element != 0 {
-            let monomial_degree = MonomialDegree(lwe_mask_element);
-
-            // we effectively inline the body of cmux here, merging the initial subtraction
-            // operation with the monic polynomial multiplication, then performing the
-            // external product manually
-
-            // We rotate ct_1 and subtract ct_0 (first step of cmux) by performing
-            // ct_1 <- (ct_0 * X^a_i) - ct_0
-            for (mut ct1_poly, ct0_poly) in izip_eq!(
-                ct1.as_mut_polynomial_list().iter_mut(),
-                ct0.as_polynomial_list().iter(),
-            ) {
-                polynomial_wrapping_monic_monomial_mul_and_subtract(
-                    &mut ct1_poly,
-                    &ct0_poly,
-                    monomial_degree,
-                );
-            }
-
-            // second step of cmux:
-            // ct_0 <- ct_0 + ct1 * s_i
-            // with ct_0 + ct1s_i = ct_0 + ((ct_0 * X^a_i) - ct_0)s_i
-            //                    = ct_0          if s_i= 0
-            //                      ct_0 * X^a_i  otherwise
-            //                    = ct_0 * X^(a_i * s_i)
-            //
-            // as_mut_view is required to keep borrow rules consistent
-            karatsuba_add_external_product_assign(
-                ct0.as_mut_view(),
-                bootstrap_key_ggsw,
-                ct1.as_view(),
-                stack,
-            );
-        }
-    }
-
-    if !ciphertext_modulus.is_native_modulus() {
-        let signed_decomposer = SignedDecomposer::new(
-            DecompositionBaseLog(ciphertext_modulus.get_custom_modulus().ilog2() as usize),
-            DecompositionLevelCount(1),
-        );
-        ct0.as_mut()
-            .iter_mut()
-            .for_each(|x| *x = signed_decomposer.closest_representable(*x));
-    }
-}
-
-/// Perform the external product of `ggsw` and `glwe`, and adds the result to `out`.
-#[cfg_attr(feature = "__profiling", inline(never))]
-pub fn karatsuba_add_external_product_assign<Scalar>(
-    mut out: GlweCiphertextMutView<'_, Scalar>,
-    ggsw: GgswCiphertextView<Scalar>,
-    glwe: GlweCiphertextView<Scalar>,
-    stack: &mut PodStack,
-) where
-    Scalar: UnsignedTorus,
-{
-    // we check that the polynomial sizes match
-    debug_assert_eq!(ggsw.polynomial_size(), glwe.polynomial_size());
-    debug_assert_eq!(ggsw.polynomial_size(), out.polynomial_size());
-    // we check that the glwe sizes match
-    debug_assert_eq!(ggsw.glwe_size(), glwe.glwe_size());
-    debug_assert_eq!(ggsw.glwe_size(), out.glwe_size());
-
-    let align = CACHELINE_ALIGN;
-    let poly_size = ggsw.polynomial_size().0;
-
-    // we round the input mask and body
-    let decomposer = SignedDecomposer::<Scalar>::new(
-        ggsw.decomposition_base_log(),
-        ggsw.decomposition_level_count(),
-    );
-
-    let (output_buffer, substack0) =
-        stack.make_aligned_raw::<Scalar>(poly_size * ggsw.glwe_size().0, align);
-    // output_fft_buffer is initially uninitialized, considered to be implicitly zero, to avoid
-    // the cost of filling it up with zeros. `is_output_uninit` is set to `false` once
-    // it has been fully initialized for the first time.
-    let output_buffer = &mut *output_buffer;
-    let mut is_output_uninit = true;
-
-    let (mut decomposition, substack1) = TensorSignedDecompositionLendingIter::new(
-        glwe.as_ref()
-            .iter()
-            .map(|s| decomposer.init_decomposer_state(*s)),
-        DecompositionBaseLog(decomposer.base_log),
-        DecompositionLevelCount(decomposer.level_count),
-        substack0,
-    );
-
-    // We loop through the levels
-    for ggsw_decomp_matrix in ggsw.iter() {
-        // We retrieve the decomposition of this level.
-        let (_glwe_level, glwe_decomp_term, _substack2) =
-            collect_next_term(&mut decomposition, substack1, align);
-        let glwe_decomp_term = GlweCiphertextView::from_container(
-            &*glwe_decomp_term,
-            ggsw.polynomial_size(),
-            out.ciphertext_modulus(),
-        );
-
-        // For each level we have to add the result of the vector-matrix product between the
-        // decomposition of the glwe, and the ggsw level matrix to the output. To do so, we
-        // iteratively add to the output, the product between every line of the matrix, and
-        // the corresponding (scalar) polynomial in the glwe decomposition:
-        //
-        //                ggsw_mat                        ggsw_mat
-        //   glwe_dec   | - - - - | <        glwe_dec   | - - - - |
-        //  | - - - | x | - - - - |         | - - - | x | - - - - | <
-        //    ^         | - - - - |             ^       | - - - - |
-        //
-        //        t = 1                           t = 2                     ...
-
-        for (ggsw_row, glwe_poly) in izip_eq!(
-            ggsw_decomp_matrix.as_glwe_list().iter(),
-            glwe_decomp_term.as_polynomial_list().iter()
-        ) {
-            let row_as_poly_list = ggsw_row.as_polynomial_list();
-            if is_output_uninit {
-                for (mut output_poly, row_poly) in output_buffer
-                    .chunks_exact_mut(poly_size)
-                    .map(Polynomial::from_container)
-                    .zip(row_as_poly_list.iter())
-                {
-                    polynomial_wrapping_mul(&mut output_poly, &row_poly, &glwe_poly);
-                }
-            } else {
-                for (mut output_poly, row_poly) in output_buffer
-                    .chunks_exact_mut(poly_size)
-                    .map(Polynomial::from_container)
-                    .zip(row_as_poly_list.iter())
-                {
-                    polynomial_wrapping_add_mul_assign(&mut output_poly, &row_poly, &glwe_poly);
-                }
-            }
-
-            is_output_uninit = false;
-        }
-    }
-
-    // We iterate over the polynomials in the output.
-    if !is_output_uninit {
-        izip_eq!(
-            out.as_mut_polynomial_list().iter_mut(),
-            output_buffer
-                .into_chunks(poly_size)
-                .map(Polynomial::from_container),
-        )
-        .for_each(|(mut out, res)| polynomial_wrapping_add_assign(&mut out, &res));
-    }
-}
--- a/tfhe/src/core_crypto/algorithms/lwe_programmable_bootstrapping/mod.rs
+++ b/tfhe/src/core_crypto/algorithms/lwe_programmable_bootstrapping/mod.rs
@@ -1,12 +1,10 @@
 pub mod fft128_pbs;
 pub mod fft64_pbs;
-pub mod karatsuba_pbs;
 pub mod ntt64_bnf_pbs;
 pub mod ntt64_pbs;

 pub use fft128_pbs::*;
 pub use fft64_pbs::*;
-pub use karatsuba_pbs::*;
 pub use ntt64_bnf_pbs::*;
 pub use ntt64_pbs::*;

--- a/tfhe/src/core_crypto/algorithms/test/lwe_programmable_bootstrapping.rs
+++ b/tfhe/src/core_crypto/algorithms/test/lwe_programmable_bootstrapping.rs
@@ -1161,91 +1161,3 @@ fn lwe_encrypt_pbs_ntt64_bnf_decrypt(params: ClassicTestParams<u64>) {
 create_parameterized_test!(lwe_encrypt_pbs_ntt64_bnf_decrypt {
    TEST_PARAMS_3_BITS_SOLINAS_U64
 });
-
-fn lwe_encrypt_pbs_karatsuba_decrypt_custom_mod(params: ClassicTestParams<u64>) {
-    let lwe_noise_distribution = params.lwe_noise_distribution;
-    let ciphertext_modulus = params.ciphertext_modulus;
-    let message_modulus_log = params.message_modulus_log;
-    let msg_modulus = 1 << (message_modulus_log.0);
-    let encoding_with_padding = get_encoding_with_padding(ciphertext_modulus);
-    let glwe_dimension = params.glwe_dimension;
-    let polynomial_size = params.polynomial_size;
-
-    let mut rsc = TestResources::new();
-
-    let f = |x: u64| x;
-
-    let delta: u64 = encoding_with_padding / msg_modulus;
-    let mut msg = msg_modulus;
-
-    let accumulator = generate_programmable_bootstrap_glwe_lut(
-        polynomial_size,
-        glwe_dimension.to_glwe_size(),
-        msg_modulus.cast_into(),
-        ciphertext_modulus,
-        delta,
-        f,
-    );
-
-    assert!(check_encrypted_content_respects_mod(
-        &accumulator,
-        ciphertext_modulus
-    ));
-
-    while msg != 0 {
-        msg = msg.wrapping_sub(1);
-
-        let mut keys_gen = |params| generate_keys(params, &mut rsc);
-        let keys = gen_keys_or_get_from_cache_if_enabled(params, &mut keys_gen);
-        let (input_lwe_secret_key, output_lwe_secret_key, bsk) =
-            (keys.small_lwe_sk, keys.big_lwe_sk, keys.bsk);
-
-        for _ in 0..NB_TESTS {
-            let plaintext = Plaintext(msg * delta);
-
-            let lwe_ciphertext_in = allocate_and_encrypt_new_lwe_ciphertext(
-                &input_lwe_secret_key,
-                plaintext,
-                lwe_noise_distribution,
-                ciphertext_modulus,
-                &mut rsc.encryption_random_generator,
-            );
-
-            assert!(check_encrypted_content_respects_mod(
-                &lwe_ciphertext_in,
-                ciphertext_modulus
-            ));
-
-            let mut out_pbs_ct = LweCiphertext::new(
-                0,
-                output_lwe_secret_key.lwe_dimension().to_lwe_size(),
-                ciphertext_modulus,
-            );
-
-            programmable_bootstrap_karatsuba_lwe_ciphertext(
-                &lwe_ciphertext_in,
-                &mut out_pbs_ct,
-                &accumulator,
-                &bsk,
-            );
-
-            assert!(check_encrypted_content_respects_mod(
-                &out_pbs_ct,
-                ciphertext_modulus
-            ));
-
-            let decrypted = decrypt_lwe_ciphertext(&output_lwe_secret_key, &out_pbs_ct);
-
-            let decoded = round_decode(decrypted.0, delta) % msg_modulus;
-
-            assert_eq!(decoded, f(msg));
-        }
-
-        // In coverage, we break after one while loop iteration, changing message values does not
-        // yield higher coverage
-        #[cfg(tarpaulin)]
-        break;
-    }
-}
-
-create_parameterized_test!(lwe_encrypt_pbs_karatsuba_decrypt_custom_mod);
--- a/tfhe/src/core_crypto/commons/math/random/tests.rs
+++ b/tfhe/src/core_crypto/commons/math/random/tests.rs
@@ -540,12 +540,10 @@ pub fn sup_diff(cumulative_bins: &[u64], theoretical_cdf: &[f64]) -> f64 {
        .iter()
        .copied()
        .zip_eq(theoretical_cdf.iter().copied())
-        .enumerate()
-        .map(|(i, (x, theoretical_cdf))| {
+        .map(|(x, theoretical_cdf)| {
            let empirical_cdf = x as f64 / number_of_samples as f64;

-            if i == cumulative_bins.len() - 1 {
-                assert_eq!(theoretical_cdf, 1.0);
+            if theoretical_cdf == 1.0 {
                assert_eq!(empirical_cdf, 1.0);
            }

--- a/tfhe/src/high_level_api/integers/oprf.rs
+++ b/tfhe/src/high_level_api/integers/oprf.rs
@@ -4,9 +4,7 @@ use crate::high_level_api::keys::InternalServerKey;
 use crate::high_level_api::re_randomization::ReRandomizationMetadata;
 #[cfg(feature = "gpu")]
 use crate::integer::gpu::ciphertext::{CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext};
-use crate::shortint::MessageModulus;
 use crate::{FheInt, Seed};
-use std::num::NonZeroU64;

 impl<Id: FheUintId> FheUint<Id> {
    /// Generates an encrypted unsigned integer
@@ -94,7 +92,7 @@ impl<Id: FheUintId> FheUint<Id> {
            }
        })
    }
-    /// Generates an encrypted unsigned integer
+    /// Generates an encrypted `num_block` blocks unsigned integer
    /// taken uniformly in `[0, 2^random_bits_count[` using the given seed.
    /// The encrypted value is oblivious to the server.
    /// It can be useful to make server random generation deterministic.
@@ -152,103 +150,6 @@ impl<Id: FheUintId> FheUint<Id> {
            }
        })
    }
-
-    /// Generates an encrypted unsigned integer
-    /// taken almost uniformly in the given range using the given seed.
-    /// Currently the range can only be in the form `[0, excluded_upper_bound[`
-    /// with any `excluded_upper_bound` in `[1, 2^64[`.
-    ///
-    /// The encrypted value is oblivious to the server.
-    /// It can be useful to make server random generation deterministic.
-    ///
-    /// This function guarantees the the norm-1 distance
-    /// (defined as ∆(P,Q) := 1/2 Sum[ω∈Ω] |P(ω) − Q(ω)|)
-    /// between the actual distribution and the target uniform distribution
-    /// will be below the `max_distance` argument (which must be in ]0, 1[).
-    /// The higher the distance, the more dissimilar the actual distribution is
-    /// from the target uniform distribution.
-    ///
-    /// The default value for `max_distance` is `2^-128` if `None` is provided.
-    ///
-    /// Higher values allow better performance but must be considered carefully in the context of
-    /// their target application as it may have serious unintended consequences.
-    ///
-    /// If the range is a power of 2, the distribution is uniform (for any `max_distance`) and
-    /// the cost is smaller.
-    ///
-    /// ```rust
-    /// use std::num::NonZeroU64;
-    /// use tfhe::prelude::FheDecrypt;
-    /// use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, RangeForRandom, Seed};
-    ///
-    /// let config = ConfigBuilder::default().build();
-    /// let (client_key, server_key) = generate_keys(config);
-    ///
-    /// set_server_key(server_key);
-    ///
-    /// let excluded_upper_bound = NonZeroU64::new(3).unwrap();
-    ///
-    /// let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
-    ///
-    /// let ct_res = FheUint8::generate_oblivious_pseudo_random_custom_range(Seed(0), &range, None);
-    ///
-    /// let dec_result: u16 = ct_res.decrypt(&client_key);
-    /// assert!(dec_result < excluded_upper_bound.get() as u16);
-    /// ```
-    pub fn generate_oblivious_pseudo_random_custom_range(
-        seed: Seed,
-        range: &RangeForRandom,
-        max_distance: Option<f64>,
-    ) -> Self {
-        let excluded_upper_bound = range.excluded_upper_bound;
-
-        if excluded_upper_bound.is_power_of_two() {
-            let random_bits_count = excluded_upper_bound.ilog2() as u64;
-
-            Self::generate_oblivious_pseudo_random_bounded(seed, random_bits_count)
-        } else {
-            let max_distance = max_distance.unwrap_or_else(|| 2_f64.powi(-128));
-
-            assert!(
-                0_f64 < max_distance && max_distance < 1_f64,
-                "max_distance (={max_distance}) should be in ]0, 1["
-            );
-
-            global_state::with_internal_keys(|key| match key {
-                InternalServerKey::Cpu(key) => {
-                    let message_modulus = key.message_modulus();
-
-                    let num_input_random_bits = num_input_random_bits_for_max_distance(
-                        excluded_upper_bound,
-                        max_distance,
-                        message_modulus,
-                    );
-
-                    let num_blocks_output = Id::num_blocks(key.message_modulus()) as u64;
-
-                    let ct = key
-                        .pbs_key()
-                        .par_generate_oblivious_pseudo_random_unsigned_custom_range(
-                            seed,
-                            num_input_random_bits,
-                            excluded_upper_bound,
-                            num_blocks_output,
-                        );
-
-                    Self::new(ct, key.tag.clone(), ReRandomizationMetadata::default())
-                }
-                #[cfg(feature = "gpu")]
-                InternalServerKey::Cuda(_cuda_key) => {
-                    panic!("Gpu does not support this operation yet.")
-                }
-                #[cfg(feature = "hpu")]
-                InternalServerKey::Hpu(_device) => {
-                    panic!("Hpu does not support this operation yet.")
-                }
-            })
-        }
-    }
-
    #[cfg(feature = "gpu")]
    /// Returns the amount of memory required to execute generate_oblivious_pseudo_random_bounded
    ///
@@ -372,7 +273,7 @@ impl<Id: FheIntId> FheInt<Id> {
            }
        })
    }
-    /// Generates an encrypted signed integer
+    /// Generates an encrypted `num_block` blocks signed integer
    /// taken uniformly in `[0, 2^random_bits_count[` using the given seed.
    /// The encrypted value is oblivious to the server.
    /// It can be useful to make server random generation deterministic.
@@ -466,350 +367,10 @@ impl<Id: FheIntId> FheInt<Id> {
    }
 }

-pub struct RangeForRandom {
-    excluded_upper_bound: NonZeroU64,
-}
-
-impl RangeForRandom {
-    pub fn new_from_excluded_upper_bound(excluded_upper_bound: NonZeroU64) -> Self {
-        Self {
-            excluded_upper_bound,
-        }
-    }
-}
-
-fn num_input_random_bits_for_max_distance(
-    excluded_upper_bound: NonZeroU64,
-    max_distance: f64,
-    message_modulus: MessageModulus,
-) -> u64 {
-    assert!(message_modulus.0.is_power_of_two());
-    let log_message_modulus = message_modulus.0.ilog2() as u64;
-
-    let mut random_block_count = 1;
-
-    let random_block_count = loop {
-        let random_bit_count = random_block_count * log_message_modulus;
-
-        let distance = distance(excluded_upper_bound.get(), random_bit_count);
-
-        if distance < max_distance {
-            break random_block_count;
-        }
-
-        random_block_count += 1;
-    };
-
-    random_block_count * log_message_modulus
-}
-
-fn distance(excluded_upper_bound: u64, random_bit_count: u64) -> f64 {
-    let remainder = mod_pow_2(random_bit_count, excluded_upper_bound);
-
-    remainder as f64 * (excluded_upper_bound - remainder) as f64
-        / (2_f64.powi(random_bit_count as i32) * excluded_upper_bound as f64)
-}
-
-// Computes 2^exponent % modulus
-fn mod_pow_2(exponent: u64, modulus: u64) -> u64 {
-    assert_ne!(modulus, 0);
-
-    if modulus == 1 {
-        return 0;
-    }
-
-    let mut result: u128 = 1;
-    let mut base: u128 = 2; // We are calculating 2^i
-
-    // We cast exponent to u128 to match the loop, though u64 is fine
-    let mut exp = exponent;
-    let mod_val = modulus as u128;
-
-    while exp > 0 {
-        // If exponent is odd, multiply result with base
-        if exp % 2 == 1 {
-            result = (result * base) % mod_val;
-        }
-
-        // Square the base
-        base = (base * base) % mod_val;
-
-        // Divide exponent by 2
-        exp /= 2;
-    }
-
-    result as u64
-}
-
-#[cfg(test)]
-mod test {
-
-    use super::*;
-    use crate::integer::server_key::radix_parallel::tests_unsigned::test_oprf::{
-        oprf_density_function, p_value_upper_bound_oprf_almost_uniformity_from_values,
-        probability_density_function_from_density,
-    };
-    use crate::prelude::FheDecrypt;
-    use crate::shortint::oprf::test::test_uniformity;
-    use crate::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128;
-    use crate::{generate_keys, set_server_key, ClientKey, ConfigBuilder, FheUint8, Seed};
-    use num_bigint::BigUint;
-    use rand::{thread_rng, Rng};
-    use rayon::iter::{IntoParallelIterator, ParallelIterator};
-
-    // Helper: The "Oracle" implementation using BigInt
-    // This is slow but mathematically guaranteed to be correct.
-    fn oracle_mod_pow_2(exponent: u64, modulus: u64) -> u64 {
-        assert_ne!(modulus, 0);
-
-        if modulus == 1 {
-            return 0;
-        }
-
-        let base = BigUint::from(2u32);
-        let exp = BigUint::from(exponent);
-        let modu = BigUint::from(modulus);
-
-        let res = base.modpow(&exp, &modu);
-        res.iter_u64_digits().next().unwrap_or(0)
-    }
-
-    #[test]
-    fn test_edge_cases() {
-        // 2^0 % 10 = 1
-        assert_eq!(mod_pow_2(0, 10), 1, "Failed exponent 0");
-
-        // 2^10 % 1 = 0
-        assert_eq!(mod_pow_2(10, 1), 0, "Failed modulus 1");
-
-        // 2^1 % 10 = 2
-        assert_eq!(mod_pow_2(1, 10), 2, "Failed exponent 1");
-
-        // 2^3 % 5 = 8 % 5 = 3
-        assert_eq!(mod_pow_2(3, 5), 3, "Failed small calc");
-    }
-
-    #[test]
-    fn test_boundaries_and_overflow() {
-        assert_eq!(mod_pow_2(2, u64::MAX), 4);
-
-        assert_eq!(mod_pow_2(u64::MAX, 3), 2);
-
-        assert_eq!(mod_pow_2(5, 32), 0);
-    }
-
-    #[test]
-    fn test_against_oracle() {
-        let mut rng = thread_rng();
-        for _ in 0..1_000_000 {
-            let exp: u64 = rng.gen();
-            let mod_val: u64 = rng.gen();
-
-            let mod_val = if mod_val == 0 { 1 } else { mod_val };
-
-            let expected = oracle_mod_pow_2(exp, mod_val);
-            let actual = mod_pow_2(exp, mod_val);
-
-            assert_eq!(
-                actual, expected,
-                "Mismatch! 2^{exp} % {mod_val} => Ours: {actual}, Oracle: {expected}",
-            );
-        }
-    }
-
-    #[test]
-    fn test_distance_with_uniform() {
-        for excluded_upper_bound in 1..20 {
-            for num_input_random_bits in 0..20 {
-                let density = oprf_density_function(excluded_upper_bound, num_input_random_bits);
-
-                let theoretical_pdf = probability_density_function_from_density(&density);
-
-                let p_uniform = 1. / excluded_upper_bound as f64;
-
-                let actual_distance: f64 = 1. / 2.
-                    * theoretical_pdf
-                        .iter()
-                        .map(|p| (*p - p_uniform).abs())
-                        .sum::<f64>();
-
-                let theoretical_distance = distance(excluded_upper_bound, num_input_random_bits);
-
-                assert!(
-                    (theoretical_distance - actual_distance).abs()
-                        <= theoretical_distance / 1_000_000.,
-                    "{theoretical_distance} != {actual_distance}"
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn test_uniformity_scalar_mul_shift() {
-        let max_distance = 2_f64.powi(-20);
-
-        let message_modulus = MessageModulus(4);
-
-        let excluded_upper_bound = 3;
-
-        let num_input_random_bits = num_input_random_bits_for_max_distance(
-            NonZeroU64::new(excluded_upper_bound).unwrap(),
-            max_distance,
-            message_modulus,
-        );
-
-        let sample_count: usize = 10_000_000;
-
-        let p_value_limit: f64 = 0.001;
-
-        // The distribution is not exactly uniform
-        // This check ensures than with the given low max_distance,
-        // the distribution is indistinguishable from the uniform with at the given sample count
-        test_uniformity(sample_count, p_value_limit, excluded_upper_bound, |_seed| {
-            oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits)
-        });
-    }
-
-    fn oprf_clear_equivalent(excluded_upper_bound: u64, num_input_random_bits: u64) -> u64 {
-        let random_input_upper_bound = 1 << num_input_random_bits;
-
-        let random_input = thread_rng().gen_range(0..random_input_upper_bound);
-
-        (random_input * excluded_upper_bound) >> num_input_random_bits
-    }
-
-    #[test]
-    fn test_uniformity_generate_oblivious_pseudo_random_custom_range() {
-        let base_sample_count: usize = 10_000;
-
-        let p_value_limit: f64 = 0.001;
-
-        let params = PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128;
-        let config = ConfigBuilder::with_custom_parameters(params).build();
-
-        let (cks, sks) = generate_keys(config);
-        rayon::broadcast(|_| set_server_key(sks.clone()));
-
-        let message_modulus = params.message_modulus;
-
-        // [0.7, 0.1] for `max_distance` chosen to have `num_input_random_bits` be [2, 4]
-        // for any of the listed `excluded_upper_bound`
-        for (expected_num_input_random_bits, max_distance, excluded_upper_bounds) in
-            [(2, 0.7, [3, 5, 6, 7]), (4, 0.1, [3, 5, 6, 7])]
-        {
-            for excluded_upper_bound in excluded_upper_bounds {
-                let sample_count = base_sample_count * excluded_upper_bound as usize;
-
-                let excluded_upper_bound = NonZeroU64::new(excluded_upper_bound).unwrap();
-
-                let num_input_random_bits = num_input_random_bits_for_max_distance(
-                    excluded_upper_bound,
-                    max_distance,
-                    message_modulus,
-                );
-
-                assert_eq!(num_input_random_bits, expected_num_input_random_bits);
-
-                test_uniformity_generate_oblivious_pseudo_random_custom_range2(
-                    sample_count,
-                    p_value_limit,
-                    message_modulus,
-                    &cks,
-                    excluded_upper_bound,
-                    max_distance,
-                );
-            }
-        }
-    }
-
-    fn test_uniformity_generate_oblivious_pseudo_random_custom_range2(
-        sample_count: usize,
-        p_value_limit: f64,
-        message_modulus: MessageModulus,
-        cks: &ClientKey,
-        excluded_upper_bound: NonZeroU64,
-        max_distance: f64,
-    ) {
-        let num_input_random_bits = num_input_random_bits_for_max_distance(
-            excluded_upper_bound,
-            max_distance,
-            message_modulus,
-        );
-
-        let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
-
-        let real_values: Vec<u64> = (0..sample_count)
-            .into_par_iter()
-            .map(|_| {
-                let img = FheUint8::generate_oblivious_pseudo_random_custom_range(
-                    Seed(rand::thread_rng().gen::<u128>()),
-                    &range,
-                    Some(max_distance),
-                );
-
-                img.decrypt(cks)
-            })
-            .collect();
-
-        let excluded_upper_bound = excluded_upper_bound.get();
-
-        let uniform_values: Vec<u64> = (0..sample_count)
-            .into_par_iter()
-            .map(|_| thread_rng().gen_range(0..excluded_upper_bound))
-            .collect();
-
-        let clear_oprf_value_lower_num_input_random_bits = (0..sample_count)
-            .into_par_iter()
-            .map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits - 1))
-            .collect();
-
-        let clear_oprf_value_same_num_input_random_bits = (0..sample_count)
-            .into_par_iter()
-            .map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits))
-            .collect();
-
-        let clear_oprf_value_higher_num_input_random_bits = (0..sample_count)
-            .into_par_iter()
-            .map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits + 1))
-            .collect();
-
-        for (values, should_have_low_p_value) in [
-            (&real_values, false),
-            // to test that the same distribution passes
-            (&clear_oprf_value_same_num_input_random_bits, false),
-            // to test that other distribution don't pass
-            // (makes sure the test is statistically powerful)
-            (&uniform_values, true),
-            (&clear_oprf_value_lower_num_input_random_bits, true),
-            (&clear_oprf_value_higher_num_input_random_bits, true),
-        ] {
-            let p_value_upper_bound = p_value_upper_bound_oprf_almost_uniformity_from_values(
-                values,
-                num_input_random_bits,
-                excluded_upper_bound,
-            );
-
-            println!("p_value_upper_bound: {p_value_upper_bound}");
-
-            if should_have_low_p_value {
-                assert!(
-                    p_value_upper_bound < p_value_limit,
-                    "p_value_upper_bound (={p_value_upper_bound}) expected to be smaller than {p_value_limit}"
-                );
-            } else {
-                assert!(
-                    p_value_limit < p_value_upper_bound ,
-                    "p_value_upper_bound (={p_value_upper_bound}) expected to be bigger than {p_value_limit}"
-                );
-            }
-        }
-    }
-}
-
 #[cfg(test)]
 #[cfg(feature = "gpu")]
 #[allow(unused_imports)]
-mod test_gpu {
+mod test {
    use crate::prelude::*;
    use crate::{
        generate_keys, set_server_key, ConfigBuilder, FheInt128, FheUint32, FheUint64, GpuIndex,
--- a/tfhe/src/high_level_api/integers/unsigned/tests/cpu.rs
+++ b/tfhe/src/high_level_api/integers/unsigned/tests/cpu.rs
@@ -386,12 +386,6 @@ fn test_if_then_else() {
    super::test_case_if_then_else(&client_key);
 }

-#[test]
-fn test_if_then_zero() {
-    let client_key = setup_default_cpu();
-    super::test_case_if_then_zero(&client_key);
-}
-
 #[test]
 fn test_flip() {
    let client_key = setup_default_cpu();
--- a/tfhe/src/high_level_api/integers/unsigned/tests/hpu.rs
+++ b/tfhe/src/high_level_api/integers/unsigned/tests/hpu.rs
@@ -89,12 +89,6 @@ fn test_case_if_then_else_hpu() {
    super::test_case_if_then_else(&client_key);
 }

-#[test]
-fn test_case_if_then_zero_hpu() {
-    let client_key = setup_default_hpu();
-    super::test_case_if_then_zero(&client_key);
-}
-
 #[test]
 fn test_case_flip_hpu() {
    let client_key = setup_default_hpu();
--- a/tfhe/src/high_level_api/integers/unsigned/tests/mod.rs
+++ b/tfhe/src/high_level_api/integers/unsigned/tests/mod.rs
@@ -568,28 +568,6 @@ fn test_case_if_then_else(client_key: &ClientKey) {
    );
 }

-fn test_case_if_then_zero(client_key: &ClientKey) {
-    let clear_a = 42u8;
-    let clear_b = 128u8;
-
-    let a = FheUint8::encrypt(clear_a, client_key);
-    let b = FheUint8::encrypt(clear_b, client_key);
-
-    let result = a.le(&b).if_then_zero(&a);
-    let decrypted_result: u8 = result.decrypt(client_key);
-    assert_eq!(
-        decrypted_result,
-        if clear_a <= clear_b { clear_a } else { 0 }
-    );
-
-    let result = a.ge(&b).if_then_zero(&a);
-    let decrypted_result: u8 = result.decrypt(client_key);
-    assert_eq!(
-        decrypted_result,
-        if clear_a >= clear_b { clear_a } else { 0 }
-    );
-}
-
 fn test_case_flip(client_key: &ClientKey) {
    let clear_a = rand::random::<u32>();
    let clear_b = rand::random::<u32>();
--- a/tfhe/src/high_level_api/mod.rs
+++ b/tfhe/src/high_level_api/mod.rs
@@ -48,7 +48,6 @@ macro_rules! export_concrete_array_types {
 }

 pub use crate::core_crypto::commons::math::random::{Seed, XofSeed};
-pub use crate::high_level_api::integers::oprf::RangeForRandom;
 pub use crate::integer::server_key::MatchValues;
 use crate::{error, Error, Versionize};
 use backward_compatibility::compressed_ciphertext_list::SquashedNoiseCiphertextStateVersions;
--- a/tfhe/src/high_level_api/prelude.rs
+++ b/tfhe/src/high_level_api/prelude.rs
@@ -9,9 +9,9 @@
 pub use crate::high_level_api::traits::{
    BitSlice, CiphertextList, DivRem, FheDecrypt, FheEncrypt, FheEq, FheKeyswitch, FheMax, FheMin,
    FheOrd, FheTrivialEncrypt, FheTryEncrypt, FheTryTrivialEncrypt, FheWait, Flip, IfThenElse,
-    IfThenZero, OverflowingAdd, OverflowingMul, OverflowingNeg, OverflowingSub, ReRandomize,
-    RotateLeft, RotateLeftAssign, RotateRight, RotateRightAssign, ScalarIfThenElse, SquashNoise,
-    Tagged,
+    OverflowingAdd, OverflowingMul, OverflowingNeg, OverflowingSub, ReRandomize, RotateLeft,
+    RotateLeftAssign, RotateRight, RotateRightAssign, ScalarIfThenElse, SquashNoise, Tagged,
+    IfThenZero,
 };
 #[cfg(feature = "hpu")]
 pub use crate::high_level_api::traits::{FheHpu, HpuHandle};
--- a/tfhe/src/integer/gpu/list_compression/server_keys.rs
+++ b/tfhe/src/integer/gpu/list_compression/server_keys.rs
@@ -1,5 +1,4 @@
 use crate::core_crypto::gpu::entities::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
-use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
 use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
 use crate::core_crypto::gpu::vec::CudaVec;
 use crate::core_crypto::gpu::CudaStreams;
@@ -17,8 +16,7 @@ use crate::integer::gpu::ciphertext::CudaRadixCiphertext;
 use crate::integer::gpu::server_key::CudaBootstrappingKey;
 use crate::integer::gpu::{
    cuda_backend_compress, cuda_backend_decompress, cuda_backend_get_compression_size_on_gpu,
-    cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, extract_glwe_async,
-    PBSType,
+    cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, PBSType,
 };
 use crate::prelude::CastInto;
 use crate::shortint::ciphertext::{
@@ -199,30 +197,6 @@ impl<T: UnsignedInteger> CudaPackedGlweCiphertextList<T> {
            meta: self.meta,
        }
    }
-    pub fn extract_glwe(
-        &self,
-        glwe_index: usize,
-        streams: &CudaStreams,
-    ) -> CudaGlweCiphertextList<T> {
-        let meta = self
-            .meta
-            .as_ref()
-            .expect("CudaPackedGlweCiphertextList meta must be set to extract GLWE");
-
-        let mut output_cuda_glwe_list = CudaGlweCiphertextList::new(
-            meta.glwe_dimension,
-            meta.polynomial_size,
-            GlweCiphertextCount(1),
-            meta.ciphertext_modulus,
-            streams,
-        );
-
-        unsafe {
-            extract_glwe_async(streams, &mut output_cuda_glwe_list, self, glwe_index as u32);
-        }
-        streams.synchronize();
-        output_cuda_glwe_list
-    }
 }

 impl<T: UnsignedInteger> Clone for CudaPackedGlweCiphertextList<T> {
--- a/tfhe/src/integer/gpu/mod.rs
+++ b/tfhe/src/integer/gpu/mod.rs
@@ -7,7 +7,6 @@ pub mod server_key;
 #[cfg(feature = "zk-pok")]
 pub mod zk;

-use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
 use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
 use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
 use crate::core_crypto::gpu::lwe_compact_ciphertext_list::CudaLweCompactCiphertextList;
@@ -10424,44 +10423,3 @@ pub unsafe fn unchecked_small_scalar_mul_integer_async(
        carry_modulus.0 as u32,
    );
 }
-#[allow(clippy::too_many_arguments)]
-/// # Safety
-///
-/// - [CudaStreams::synchronize] __must__ be called after this function as soon as synchronization
-///   is required
-pub unsafe fn extract_glwe_async<T: UnsignedInteger>(
-    streams: &CudaStreams,
-    glwe_array_out: &mut CudaGlweCiphertextList<T>,
-    glwe_list: &CudaPackedGlweCiphertextList<T>,
-    glwe_index: u32,
-) {
-    assert_eq!(
-        streams.gpu_indexes[0],
-        glwe_array_out.0.d_vec.gpu_index(0),
-        "GPU error: all data should reside on the same GPU."
-    );
-    assert_eq!(
-        streams.gpu_indexes[0],
-        glwe_list.data.gpu_index(0),
-        "GPU error: all data should reside on the same GPU."
-    );
-    let packed_glwe_list_ffi = prepare_cuda_packed_glwe_ct_ffi(glwe_list);
-
-    if T::BITS == 128 {
-        cuda_integer_extract_glwe_128(
-            streams.ffi(),
-            glwe_array_out.0.d_vec.as_mut_c_ptr(0),
-            &raw const packed_glwe_list_ffi,
-            glwe_index,
-        );
-    } else if T::BITS == 64 {
-        cuda_integer_extract_glwe_64(
-            streams.ffi(),
-            glwe_array_out.0.d_vec.as_mut_c_ptr(0),
-            &raw const packed_glwe_list_ffi,
-            glwe_index,
-        );
-    } else {
-        panic!("Unsupported integer size for CUDA GLWE extraction");
-    }
-}
--- a/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/br_dp_packingks_ms.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/br_dp_packingks_ms.rs
@@ -1,757 +0,0 @@
-use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
-use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
-use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
-use crate::core_crypto::gpu::CudaStreams;
-use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertext};
-use crate::integer::compression_keys::CompressionPrivateKeys;
-use crate::integer::gpu::list_compression::server_keys::CudaCompressionKey;
-use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::noise_simulation::cuda_glwe_list_to_glwe_ciphertext;
-use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
-use crate::integer::gpu::server_key::radix::CudaUnsignedRadixCiphertext;
-use crate::integer::gpu::CudaServerKey;
-use crate::integer::{ClientKey, CompressedServerKey, IntegerCiphertext};
-use crate::shortint::ciphertext::{Ciphertext, Degree, NoiseLevel};
-use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
-use crate::shortint::engine::ShortintEngine;
-use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
-use crate::shortint::parameters::{CompressionParameters, MetaParameters, Variance};
-use crate::shortint::server_key::tests::noise_distribution::br_dp_packingks_ms::br_dp_packing_ks_ms;
-use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
-    NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
-    NoiseSimulationLwePackingKeyswitchKey, NoiseSimulationModulus,
-};
-use crate::shortint::server_key::tests::noise_distribution::utils::{
-    expected_pfail_for_precision, mean_and_variance_check, normality_check, pfail_check,
-    precision_with_padding, update_ap_params_msg_and_carry_moduli, DecryptionAndNoiseResult,
-    NoiseSample, PfailAndPrecision, PfailTestMeta, PfailTestResult,
-};
-use crate::shortint::server_key::tests::noise_distribution::{
-    should_run_short_pfail_tests_debug, should_use_single_key_debug,
-};
-use crate::shortint::{
-    AtomicPatternParameters, CarryModulus, MessageModulus, ShortintEncoding, ShortintParameterSet,
-};
-use crate::GpuIndex;
-use rayon::iter::{IntoParallelIterator, ParallelIterator};
-
-pub const SAMPLES_PER_MSG_PACKING_KS_NOISE: usize = 1000;
-
-fn sanity_check_encrypt_br_dp_packing_ks_ms(meta_params: MetaParameters) {
-    let (params, comp_params) = (
-        meta_params.compute_parameters,
-        meta_params.compression_parameters.unwrap(),
-    );
-    let gpu_index = 0;
-    let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
-
-    let block_params: ShortintParameterSet = params.into();
-    let cks = crate::integer::ClientKey::new(block_params);
-    let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
-    let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
-
-    let private_compression_key = cks.new_compression_private_key(comp_params);
-    let (compressed_compression_key, _compressed_decompression_key) =
-        cks.new_compressed_compression_decompression_keys(&private_compression_key);
-    let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
-    let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
-    // The multiplication done in the compression is made to move the message up at the top of the
-    // carry space, multiplying by the carry modulus achieves that
-    let dp_scalar = params.carry_modulus().0;
-    let br_input_modulus_log = cuda_sks.br_input_modulus_log();
-    let storage_modulus_log = cuda_compression_key.storage_log_modulus;
-
-    let id_lut = cuda_sks.generate_lookup_table(|x| x);
-    let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
-
-    let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
-        .map(|_| {
-            cks.key
-                .encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
-        })
-        .collect();
-    let d_input_zeros: Vec<_> = input_zeros
-        .iter()
-        .map(|ct| {
-            let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
-            CudaDynLwe::U64(d_ct_input)
-        })
-        .collect();
-
-    let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
-        degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
-        message_modulus: params.message_modulus(),
-        carry_modulus: params.carry_modulus(),
-        atomic_pattern: params.atomic_pattern(),
-        noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
-    };
-    let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
-        .map(|_| CudaSideResources::new(&streams, cuda_block_info))
-        .collect();
-
-    let (d_before_packing, _after_packing, d_after_ms) = br_dp_packing_ks_ms(
-        d_input_zeros,
-        &cuda_sks,
-        &d_accumulator,
-        dp_scalar,
-        &cuda_compression_key.packing_key_switching_key,
-        storage_modulus_log,
-        &mut cuda_side_resources,
-    );
-
-    let compression_inputs: Vec<_> = d_before_packing
-        .into_iter()
-        .map(|(_input, pbs_result, _dp_result)| {
-            let pbs_result_list_cpu = pbs_result.as_lwe_64().to_lwe_ciphertext_list(&streams);
-            let pbs_result_cpu = LweCiphertext::from_container(
-                pbs_result_list_cpu.clone().into_container(),
-                pbs_result_list_cpu.ciphertext_modulus(),
-            );
-            let cpu_ct = Ciphertext::new(
-                pbs_result_cpu,
-                Degree::new(params.message_modulus().0 - 1),
-                NoiseLevel::NOMINAL,
-                params.message_modulus(),
-                params.carry_modulus(),
-                params.atomic_pattern(),
-            );
-            let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cpu_ct]);
-            let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
-            d_ct.ciphertext
-        })
-        .collect();
-
-    let gpu_compressed =
-        cuda_compression_key.compress_ciphertexts_into_list(&compression_inputs, &streams);
-
-    let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
-    let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
-    let extracted_glwe = GlweCiphertext::from_container(
-        extracted_list.clone().into_container(),
-        extracted_list.polynomial_size(),
-        extracted_list.ciphertext_modulus(),
-    );
-    let after_ms_list = d_after_ms.to_glwe_ciphertext_list(&streams);
-    let mut after_ms = GlweCiphertext::from_container(
-        after_ms_list.clone().into_container(),
-        after_ms_list.polynomial_size(),
-        after_ms_list.ciphertext_modulus(),
-    );
-    // Bodies that were not filled are discarded
-    after_ms.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
-
-    assert_eq!(after_ms.as_view(), extracted_glwe.as_view());
-}
-
-create_gpu_parameterized_test!(sanity_check_encrypt_br_dp_packing_ks_ms {
-    TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
-});
-
-#[allow(clippy::type_complexity, clippy::too_many_arguments)]
-fn encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
-    params: AtomicPatternParameters,
-    comp_params: CompressionParameters,
-    single_cks: &ClientKey,
-    single_cuda_sks: &CudaServerKey,
-    single_compression_private_key: &CompressionPrivateKeys,
-    single_cuda_compression_key: &CudaCompressionKey,
-    msg: u64,
-    streams: &CudaStreams,
-) -> (
-    Vec<(
-        DecryptionAndNoiseResult,
-        DecryptionAndNoiseResult,
-        DecryptionAndNoiseResult,
-    )>,
-    Vec<DecryptionAndNoiseResult>,
-    Vec<DecryptionAndNoiseResult>,
-) {
-    let mut engine = ShortintEngine::new();
-    let thread_cks: crate::integer::ClientKey;
-    let thread_cuda_sks: CudaServerKey;
-    let thread_compression_private_key;
-    let thread_cuda_compression_key;
-    let (cks, cuda_sks, compression_private_key, cuda_compression_key) =
-        if should_use_single_key_debug() {
-            (
-                single_cks,
-                single_cuda_sks,
-                single_compression_private_key,
-                single_cuda_compression_key,
-            )
-        } else {
-            let block_params: ShortintParameterSet = params.into();
-            thread_cks = crate::integer::ClientKey::new(block_params);
-            let compressed_server_key =
-                CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
-            thread_cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, streams);
-
-            thread_compression_private_key = thread_cks.new_compression_private_key(comp_params);
-            let (compressed_compression_key, _compressed_decompression_key) = thread_cks
-                .new_compressed_compression_decompression_keys(&thread_compression_private_key);
-            thread_cuda_compression_key = compressed_compression_key.decompress_to_cuda(streams);
-
-            (
-                &thread_cks,
-                &thread_cuda_sks,
-                &thread_compression_private_key,
-                &thread_cuda_compression_key,
-            )
-        };
-    let br_input_modulus_log = cuda_sks.br_input_modulus_log();
-    let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
-
-    let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
-        .map(|_| {
-            cks.key.encrypt_noiseless_pbs_input_dyn_lwe_with_engine(
-                br_input_modulus_log,
-                msg,
-                &mut engine,
-            )
-        })
-        .collect();
-
-    let d_input_zeros: Vec<_> = input_zeros
-        .iter()
-        .map(|ct| {
-            let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), streams);
-            CudaDynLwe::U64(d_ct_input)
-        })
-        .collect();
-
-    let id_lut = cuda_sks.generate_lookup_table(|x| x);
-    let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, streams);
-
-    let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
-        degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
-        message_modulus: params.message_modulus(),
-        carry_modulus: params.carry_modulus(),
-        atomic_pattern: params.atomic_pattern(),
-        noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
-    };
-    let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
-        .map(|_| CudaSideResources::new(streams, cuda_block_info))
-        .collect();
-
-    let dp_scalar = params.carry_modulus().0;
-    let storage_modulus_log = cuda_compression_key.storage_log_modulus;
-
-    let (d_before_packing, d_after_packing, d_after_ms) = br_dp_packing_ks_ms(
-        d_input_zeros,
-        cuda_sks,
-        &d_accumulator,
-        dp_scalar,
-        &cuda_compression_key.packing_key_switching_key,
-        storage_modulus_log,
-        &mut cuda_side_resources,
-    );
-
-    let compute_large_lwe_secret_key = cks.key.encryption_key();
-    let compression_glwe_secret_key = &compression_private_key.key.post_packing_ks_key;
-
-    let compute_encoding = cuda_sks.encoding();
-    let compression_encoding = ShortintEncoding {
-        carry_modulus: CarryModulus(1),
-        ..compute_encoding
-    };
-    let after_packing = cuda_glwe_list_to_glwe_ciphertext(&d_after_packing, streams);
-    let after_ms = cuda_glwe_list_to_glwe_ciphertext(&d_after_ms, streams);
-    (
-        d_before_packing
-            .into_iter()
-            .map(|(d_input, d_pbs_result, d_dp_result)| {
-                let input = d_input.as_ct_64_cpu(streams);
-                let pbs_result = d_pbs_result.as_ct_64_cpu(streams);
-                let dp_result = d_dp_result.as_ct_64_cpu(streams);
-                (
-                    match &cks.key.atomic_pattern {
-                        AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
-                            DecryptionAndNoiseResult::new_from_lwe(
-                                &input,
-                                &standard_atomic_pattern_client_key.lwe_secret_key,
-                                msg,
-                                &compute_encoding,
-                            )
-                        }
-                        AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
-                            panic!("KS32 Atomic Pattern not supported on GPU tests yet");
-                        }
-                    },
-                    DecryptionAndNoiseResult::new_from_lwe(
-                        &pbs_result,
-                        &compute_large_lwe_secret_key,
-                        msg,
-                        &compute_encoding,
-                    ),
-                    DecryptionAndNoiseResult::new_from_lwe(
-                        &dp_result,
-                        &compute_large_lwe_secret_key,
-                        msg,
-                        &compression_encoding,
-                    ),
-                )
-            })
-            .collect(),
-        DecryptionAndNoiseResult::new_from_glwe(
-            &after_packing,
-            compression_glwe_secret_key,
-            compression_private_key.key.params.lwe_per_glwe(),
-            msg,
-            &compression_encoding,
-        ),
-        DecryptionAndNoiseResult::new_from_glwe(
-            &after_ms,
-            compression_glwe_secret_key,
-            compression_private_key.key.params.lwe_per_glwe(),
-            msg,
-            &compression_encoding,
-        ),
-    )
-}
-
-#[allow(clippy::type_complexity, clippy::too_many_arguments)]
-fn encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
-    params: AtomicPatternParameters,
-    comp_params: CompressionParameters,
-    single_cks: &ClientKey,
-    single_cuda_sks: &CudaServerKey,
-    single_compression_private_key: &CompressionPrivateKeys,
-    single_cuda_compression_key: &CudaCompressionKey,
-    msg: u64,
-    streams: &CudaStreams,
-) -> (
-    Vec<(NoiseSample, NoiseSample, NoiseSample)>,
-    Vec<NoiseSample>,
-    Vec<NoiseSample>,
-) {
-    let (before_packing, after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
-        params,
-        comp_params,
-        single_cks,
-        single_cuda_sks,
-        single_compression_private_key,
-        single_cuda_compression_key,
-        msg,
-        streams,
-    );
-
-    (
-        before_packing
-            .into_iter()
-            .map(|(input, after_pbs, after_dp)| {
-                (
-                    input
-                        .get_noise_if_decryption_was_correct()
-                        .expect("Decryption Failed"),
-                    after_pbs
-                        .get_noise_if_decryption_was_correct()
-                        .expect("Decryption Failed"),
-                    after_dp
-                        .get_noise_if_decryption_was_correct()
-                        .expect("Decryption Failed"),
-                )
-            })
-            .collect(),
-        after_packing
-            .into_iter()
-            .map(|x| {
-                x.get_noise_if_decryption_was_correct()
-                    .expect("Decryption Failed")
-            })
-            .collect(),
-        after_ms
-            .into_iter()
-            .map(|x| {
-                x.get_noise_if_decryption_was_correct()
-                    .expect("Decryption Failed")
-            })
-            .collect(),
-    )
-}
-#[allow(clippy::type_complexity, clippy::too_many_arguments)]
-fn encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
-    params: AtomicPatternParameters,
-    comp_params: CompressionParameters,
-    single_cks: &ClientKey,
-    single_cuda_sks: &CudaServerKey,
-    single_compression_private_key: &CompressionPrivateKeys,
-    single_cuda_compression_key: &CudaCompressionKey,
-    msg: u64,
-    streams: &CudaStreams,
-) -> Vec<DecryptionAndNoiseResult> {
-    let (_before_packing, _after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
-        params,
-        comp_params,
-        single_cks,
-        single_cuda_sks,
-        single_compression_private_key,
-        single_cuda_compression_key,
-        msg,
-        streams,
-    );
-
-    after_ms
-}
-
-fn noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu(meta_params: MetaParameters) {
-    let (params, comp_params) = (
-        meta_params.compute_parameters,
-        meta_params.compression_parameters.unwrap(),
-    );
-    let gpu_index = 0;
-    let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
-
-    let block_params: ShortintParameterSet = params.into();
-    let cks = crate::integer::ClientKey::new(block_params);
-    let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
-    let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
-
-    let private_compression_key = cks.new_compression_private_key(comp_params);
-    let (compressed_compression_key, _compressed_decompression_key) =
-        cks.new_compressed_compression_decompression_keys(&private_compression_key);
-    let compression_key = compressed_compression_key.decompress();
-    let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
-
-    let noise_simulation_bsk =
-        NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
-    let noise_simulation_packing_key =
-        NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
-
-    assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
-    assert!(noise_simulation_packing_key.matches_actual_shortint_comp_key(&compression_key.key));
-
-    // The multiplication done in the compression is made to move the message up at the top of the
-    // carry space, multiplying by the carry modulus achieves that
-    let dp_scalar = params.carry_modulus().0;
-
-    let noise_simulation_accumulator = NoiseSimulationGlwe::new(
-        noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
-        noise_simulation_bsk.output_polynomial_size(),
-        Variance(0.0),
-        noise_simulation_bsk.modulus(),
-    );
-
-    let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
-    let storage_modulus_log = cuda_compression_key.storage_log_modulus;
-    let br_input_modulus_log = cuda_sks.br_input_modulus_log();
-
-    let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
-        let noise_simulation = NoiseSimulationLwe::new(
-            cks.parameters().lwe_dimension(),
-            Variance(0.0),
-            NoiseSimulationModulus::from_ciphertext_modulus(cks.parameters().ciphertext_modulus()),
-        );
-        br_dp_packing_ks_ms(
-            vec![noise_simulation; lwe_per_glwe.0],
-            &noise_simulation_bsk,
-            &noise_simulation_accumulator,
-            dp_scalar,
-            &noise_simulation_packing_key,
-            storage_modulus_log,
-            &mut vec![(); lwe_per_glwe.0],
-        )
-    };
-
-    let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
-        .map(|_| {
-            cks.key
-                .encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
-        })
-        .collect();
-
-    let d_input_zeros: Vec<_> = input_zeros
-        .iter()
-        .map(|ct| {
-            let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
-            CudaDynLwe::U64(d_ct_input)
-        })
-        .collect();
-
-    let id_lut = cuda_sks.generate_lookup_table(|x| x);
-    let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
-
-    let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
-        degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
-        message_modulus: params.message_modulus(),
-        carry_modulus: params.carry_modulus(),
-        atomic_pattern: params.atomic_pattern(),
-        noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
-    };
-    let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
-        .map(|_| CudaSideResources::new(&streams, cuda_block_info))
-        .collect();
-
-    // Check that the circuit is correct with respect to core implementation, i.e. does not crash on
-    // dimension checks
-    let (expected_glwe_size_out, expected_polynomial_size_out, expected_modulus_f64_out) = {
-        let (_before_packing_sim, _after_packing, after_ms) = br_dp_packing_ks_ms(
-            d_input_zeros,
-            &cuda_sks,
-            &d_accumulator,
-            dp_scalar,
-            &cuda_compression_key.packing_key_switching_key,
-            storage_modulus_log,
-            &mut cuda_side_resources,
-        );
-
-        (
-            after_ms.glwe_dimension().to_glwe_size(),
-            after_ms.polynomial_size(),
-            after_ms.ciphertext_modulus().raw_modulus_float(),
-        )
-    };
-
-    assert_eq!(after_ms_sim.glwe_size(), expected_glwe_size_out);
-    assert_eq!(after_ms_sim.polynomial_size(), expected_polynomial_size_out);
-    assert_eq!(after_ms_sim.modulus().as_f64(), expected_modulus_f64_out);
-
-    let cleartext_modulus = params.message_modulus().0 * params.carry_modulus().0;
-    let mut noise_samples_before_ms = vec![];
-    let mut noise_samples_after_ms = vec![];
-
-    let chunk_size = 8;
-    let vec_local_streams = (0..chunk_size)
-        .map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
-        .collect::<Vec<_>>();
-    for _ in 0..cleartext_modulus {
-        let (current_noise_samples_before_ms, current_noise_samples_after_ms): (Vec<_>, Vec<_>) =
-            (0..SAMPLES_PER_MSG_PACKING_KS_NOISE)
-                .collect::<Vec<_>>()
-                .chunks(chunk_size)
-                .flat_map(|chunk| {
-                    chunk
-                        .into_par_iter()
-                        .map(|i| {
-                            let local_stream = &vec_local_streams[*i % chunk_size];
-                            let (_before_packing, after_packing, after_ms) =
-                                encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
-                                    params,
-                                    comp_params,
-                                    &cks,
-                                    &cuda_sks,
-                                    &private_compression_key,
-                                    &cuda_compression_key,
-                                    0,
-                                    local_stream,
-                                );
-                            (after_packing, after_ms)
-                        })
-                        .collect::<Vec<_>>()
-                })
-                .unzip();
-
-        noise_samples_before_ms.extend(current_noise_samples_before_ms);
-        noise_samples_after_ms.extend(current_noise_samples_after_ms);
-    }
-
-    let noise_samples_before_ms_flattened: Vec<_> = noise_samples_before_ms
-        .into_iter()
-        .flatten()
-        .map(|x| x.value)
-        .collect();
-
-    let noise_samples_after_ms_flattened: Vec<_> = noise_samples_after_ms
-        .into_iter()
-        .flatten()
-        .map(|x| x.value)
-        .collect();
-
-    let before_ms_normality =
-        normality_check(&noise_samples_before_ms_flattened, "before ms", 0.01);
-
-    let after_ms_is_ok = mean_and_variance_check(
-        &noise_samples_after_ms_flattened,
-        "after_ms",
-        0.0,
-        after_ms_sim.variance_per_occupied_slot(),
-        comp_params.packing_ks_key_noise_distribution(),
-        after_ms_sim
-            .glwe_dimension()
-            .to_equivalent_lwe_dimension(after_ms_sim.polynomial_size()),
-        after_ms_sim.modulus().as_f64(),
-    );
-
-    assert!(before_ms_normality.null_hypothesis_is_valid && after_ms_is_ok);
-}
-create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu {
-    TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
-});
-
-fn noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu(meta_params: MetaParameters) {
-    let (pfail_test_meta, params, comp_params) = {
-        let (mut params, comp_params) = (
-            meta_params.compute_parameters,
-            meta_params.compression_parameters.unwrap(),
-        );
-
-        let original_message_modulus = params.message_modulus();
-        let original_carry_modulus = params.carry_modulus();
-
-        // For now only allow 2_2 parameters, and see later for heuristics to use
-        assert_eq!(original_message_modulus.0, 4);
-        assert_eq!(original_carry_modulus.0, 4);
-
-        let noise_simulation_bsk =
-            NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
-        let noise_simulation_packing_key =
-            NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
-
-        // The multiplication done in the compression is made to move the message up at the top of
-        // the carry space, multiplying by the carry modulus achieves that
-        let dp_scalar = params.carry_modulus().0;
-
-        let noise_simulation_accumulator = NoiseSimulationGlwe::new(
-            noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
-            noise_simulation_bsk.output_polynomial_size(),
-            Variance(0.0),
-            noise_simulation_bsk.modulus(),
-        );
-
-        let lwe_per_glwe = comp_params.lwe_per_glwe();
-        let storage_modulus_log = comp_params.storage_log_modulus();
-
-        let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
-            let noise_simulation = NoiseSimulationLwe::new(
-                params.lwe_dimension(),
-                Variance(0.0),
-                NoiseSimulationModulus::from_ciphertext_modulus(params.ciphertext_modulus()),
-            );
-            br_dp_packing_ks_ms(
-                vec![noise_simulation; lwe_per_glwe.0],
-                &noise_simulation_bsk,
-                &noise_simulation_accumulator,
-                dp_scalar,
-                &noise_simulation_packing_key,
-                storage_modulus_log,
-                &mut vec![(); lwe_per_glwe.0],
-            )
-        };
-
-        let expected_variance_after_storage = after_ms_sim.variance_per_occupied_slot();
-
-        let compression_carry_mod = CarryModulus(1);
-        let compression_message_mod = original_message_modulus;
-        let compression_precision_with_padding =
-            precision_with_padding(compression_message_mod, compression_carry_mod);
-        let expected_pfail_for_storage = expected_pfail_for_precision(
-            compression_precision_with_padding,
-            expected_variance_after_storage,
-        );
-
-        let original_pfail_and_precision = PfailAndPrecision::new(
-            expected_pfail_for_storage,
-            compression_message_mod,
-            compression_carry_mod,
-        );
-
-        // Here we update the message modulus only:
-        // - because the message modulus matches for the compression encoding and compute encoding
-        // - so that the carry modulus stays the same and we apply the same dot product as normal
-        //   for 2_2
-        // - so that the effective encoding after the storage is the one we used to evaluate the
-        //   pfail
-        let updated_message_mod = MessageModulus(1 << 6);
-        let updated_carry_mod = compression_carry_mod;
-
-        update_ap_params_msg_and_carry_moduli(&mut params, updated_message_mod, updated_carry_mod);
-
-        assert!(
-            (params.message_modulus().0 * params.carry_modulus().0).ilog2()
-                <= comp_params.storage_log_modulus().0 as u32,
-            "Compression storage modulus cannot store enough bits for pfail estimation"
-        );
-
-        let updated_precision_with_padding =
-            precision_with_padding(updated_message_mod, updated_carry_mod);
-
-        let new_expected_pfail_for_storage = expected_pfail_for_precision(
-            updated_precision_with_padding,
-            expected_variance_after_storage,
-        );
-
-        let new_expected_pfail_and_precision = PfailAndPrecision::new(
-            new_expected_pfail_for_storage,
-            updated_message_mod,
-            updated_carry_mod,
-        );
-
-        let pfail_test_meta = if should_run_short_pfail_tests_debug() {
-            // To have the same amount of keys generated as the case where a single run is a single
-            // sample
-            let expected_fails = 200 * lwe_per_glwe.0 as u32;
-            PfailTestMeta::new_with_desired_expected_fails(
-                original_pfail_and_precision,
-                new_expected_pfail_and_precision,
-                expected_fails,
-            )
-        } else {
-            // To guarantee 1_000_000 keysets are generated
-            let total_runs = 1_000_000 * lwe_per_glwe.0 as u32;
-            PfailTestMeta::new_with_total_runs(
-                original_pfail_and_precision,
-                new_expected_pfail_and_precision,
-                total_runs,
-            )
-        };
-
-        (pfail_test_meta, params, comp_params)
-    };
-    let gpu_index = 0;
-    let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
-
-    let block_params: ShortintParameterSet = params.into();
-    let cks = crate::integer::ClientKey::new(block_params);
-    let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
-    let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
-
-    let private_compression_key = cks.new_compression_private_key(comp_params);
-    let (compressed_compression_key, _compressed_decompression_key) =
-        cks.new_compressed_compression_decompression_keys(&private_compression_key);
-
-    let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
-
-    let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
-
-    let total_runs_for_expected_fails = pfail_test_meta
-        .total_runs_for_expected_fails()
-        .div_ceil(lwe_per_glwe.0.try_into().unwrap());
-
-    let chunk_size = 8;
-    let vec_local_streams = (0..chunk_size)
-        .map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
-        .collect::<Vec<_>>();
-
-    let measured_fails: f64 = (0..total_runs_for_expected_fails)
-        .collect::<Vec<_>>()
-        .chunks(chunk_size)
-        .flat_map(|chunk| {
-            chunk
-                .into_par_iter()
-                .map(|i| {
-                    let local_streams = &vec_local_streams[*i as usize % chunk_size];
-                    let after_ms_decryption_result = encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
-                        params,
-                        comp_params,
-                        &cks,
-                        &cuda_sks,
-                        &private_compression_key,
-                        &cuda_compression_key,
-                        0,
-                        local_streams,
-                    );
-                    after_ms_decryption_result
-                        .into_iter()
-                        .map(|result| result.failure_as_f64())
-                        .sum::<f64>()
-                })
-                .collect::<Vec<_>>()
-        })
-        .sum();
-
-    let test_result = PfailTestResult { measured_fails };
-
-    pfail_check(&pfail_test_meta, test_result);
-}
-
-create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu {
-    TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
-});
--- a/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/dp_ks_pbs_128_packingks.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/dp_ks_pbs_128_packingks.rs
@@ -1,869 +0,0 @@
-use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
-use crate::core_crypto::commons::noise_formulas::noise_simulation::{
-    NoiseSimulationLweFourier128Bsk, NoiseSimulationLwePackingKeyswitchKey,
-};
-use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
-use crate::core_crypto::gpu::CudaStreams;
-use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertextCount};
-use crate::integer::gpu::CudaServerKey;
-use crate::integer::noise_squashing::NoiseSquashingPrivateKey;
-use crate::integer::CompressedServerKey;
-
-use crate::core_crypto::commons::parameters::CiphertextModulusLog;
-use crate::core_crypto::prelude::generate_programmable_bootstrap_glwe_lut;
-use crate::integer::ciphertext::NoiseSquashingCompressionPrivateKey;
-use crate::integer::gpu::list_compression::server_keys::CudaNoiseSquashingCompressionKey;
-use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
-use crate::integer::gpu::server_key::radix::{CudaNoiseSquashingKey, CudaUnsignedRadixCiphertext};
-use crate::integer::gpu::unchecked_small_scalar_mul_integer_async;
-use crate::integer::IntegerCiphertext;
-use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
-use crate::shortint::parameters::noise_squashing::NoiseSquashingParameters;
-use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
-use crate::shortint::parameters::{
-    AtomicPatternParameters, MetaParameters, NoiseSquashingCompressionParameters, Variance,
-};
-use crate::shortint::server_key::tests::noise_distribution::dp_ks_pbs128_packingks::{
-    dp_ks_any_ms_standard_pbs128, dp_ks_any_ms_standard_pbs128_packing_ks,
-};
-use crate::shortint::server_key::tests::noise_distribution::should_use_single_key_debug;
-use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
-    NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
-    NoiseSimulationLweKeyswitchKey, NoiseSimulationModulusSwitchConfig,
-};
-use crate::shortint::server_key::tests::noise_distribution::utils::{
-    mean_and_variance_check, DecryptionAndNoiseResult, NoiseSample,
-};
-use crate::shortint::{PaddingBit, ShortintEncoding, ShortintParameterSet};
-use crate::GpuIndex;
-use rayon::prelude::*;
-
-/// Test function to verify that the noise checking tools match the actual atomic patterns
-/// implemented in shortint for GPU
-fn sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu(meta_params: MetaParameters) {
-    let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
-        let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
-        (
-            meta_params.compute_parameters,
-            meta_noise_squashing_params.parameters,
-            meta_noise_squashing_params.compression_parameters.unwrap(),
-        )
-    };
-    let gpu_index = 0;
-    let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
-
-    let block_params: ShortintParameterSet = atomic_params.into();
-    let cks = crate::integer::ClientKey::new(block_params);
-    let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
-    let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
-
-    let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
-    let compressed_noise_squashing_compression_key =
-        cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
-    let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
-    let cuda_noise_squashing_key =
-        compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
-    let noise_squashing_compression_private_key =
-        NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
-    let noise_squashing_compression_key = noise_squashing_private_key
-        .new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
-    let cuda_noise_squashing_compression_key =
-        CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
-            &noise_squashing_compression_key,
-            &streams,
-        );
-
-    let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
-
-    let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
-
-    let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
-
-    let u128_encoding = ShortintEncoding {
-        ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
-        message_modulus: noise_squashing_params.message_modulus(),
-        carry_modulus: noise_squashing_params.carry_modulus(),
-        padding_bit: PaddingBit::Yes,
-    };
-    let max_scalar_mul = cuda_sks.max_noise_level.get();
-
-    let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
-        noise_squashing_key.key.polynomial_size(),
-        noise_squashing_key.key.glwe_size(),
-        u128_encoding
-            .cleartext_space_without_padding()
-            .try_into()
-            .unwrap(),
-        u128_encoding.ciphertext_modulus,
-        u128_encoding.delta(),
-        |x| x,
-    );
-
-    let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
-
-    let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
-
-    let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
-        degree: crate::shortint::ciphertext::Degree::new(atomic_params.message_modulus().0 - 1),
-        message_modulus: atomic_params.message_modulus(),
-        carry_modulus: atomic_params.carry_modulus(),
-        atomic_pattern: atomic_params.atomic_pattern(),
-        noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
-    };
-    let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
-        .map(|_| CudaSideResources::new(&streams, cuda_block_info))
-        .collect();
-
-    let input_zero_as_lwe: Vec<_> = input_zeros
-        .iter()
-        .map(|ct| {
-            let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
-                &crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
-                &streams,
-            );
-            CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
-        })
-        .collect();
-
-    let (_before_packing, d_after_packing) = dp_ks_any_ms_standard_pbs128_packing_ks(
-        input_zero_as_lwe,
-        max_scalar_mul,
-        &cuda_sks,
-        modulus_switch_config,
-        &cuda_noise_squashing_key,
-        br_input_modulus_log,
-        &id_lut_gpu,
-        &cuda_noise_squashing_compression_key.packing_key_switching_key,
-        &mut cuda_side_resources,
-    );
-
-    let cuda_noise_squashed_cts: Vec<_> = input_zeros
-        .into_par_iter()
-        .map(|ct| {
-            let cloned_ct = ct;
-            let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cloned_ct]);
-            let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
-            unsafe {
-                unchecked_small_scalar_mul_integer_async(
-                    &streams,
-                    &mut d_ct.ciphertext,
-                    max_scalar_mul,
-                    atomic_params.message_modulus(),
-                    atomic_params.carry_modulus(),
-                );
-            }
-            streams.synchronize();
-            cuda_noise_squashing_key.unchecked_squash_ciphertext_noise(
-                &d_ct.ciphertext,
-                &cuda_sks,
-                &streams,
-            )
-        })
-        .collect();
-
-    let gpu_compressed = cuda_noise_squashing_compression_key
-        .compress_noise_squashed_ciphertexts_into_list(&cuda_noise_squashed_cts, &streams);
-
-    let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
-    let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
-    let extracted_glwe = GlweCiphertext::from_container(
-        extracted_list.clone().into_container(),
-        extracted_list.polynomial_size(),
-        extracted_list.ciphertext_modulus(),
-    );
-
-    let after_packing_list = d_after_packing.to_glwe_ciphertext_list(&streams);
-    let mut after_packing = GlweCiphertext::from_container(
-        after_packing_list.clone().into_container(),
-        after_packing_list.polynomial_size(),
-        after_packing_list.ciphertext_modulus(),
-    );
-    // Bodies that were not filled are discarded
-    after_packing.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
-
-    assert_eq!(after_packing.as_view(), extracted_glwe.as_view());
-}
-
-/// Test function to verify that the noise checking tools match the actual atomic patterns
-/// implemented in shortint for GPU
-fn sanity_check_encrypt_dp_ks_standard_pbs128_gpu(meta_params: MetaParameters) {
-    let (params, noise_squashing_params) = {
-        let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
-        (
-            meta_params.compute_parameters,
-            meta_noise_squashing_params.parameters,
-        )
-    };
-    let gpu_index = 0;
-    let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
-
-    let block_params: ShortintParameterSet = params.into();
-    let cks = crate::integer::ClientKey::new(block_params);
-    let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
-    let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
-
-    let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
-    let compressed_noise_squashing_compression_key =
-        cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
-    let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
-    let cuda_noise_squashing_key =
-        compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
-
-    let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
-
-    let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
-
-    let u128_encoding = ShortintEncoding {
-        ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
-        message_modulus: noise_squashing_params.message_modulus(),
-        carry_modulus: noise_squashing_params.carry_modulus(),
-        padding_bit: PaddingBit::Yes,
-    };
-    let max_scalar_mul = cuda_sks.max_noise_level.get();
-
-    let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
-        noise_squashing_key.key.polynomial_size(),
-        noise_squashing_key.key.glwe_size(),
-        u128_encoding
-            .cleartext_space_without_padding()
-            .try_into()
-            .unwrap(),
-        u128_encoding.ciphertext_modulus,
-        u128_encoding.delta(),
-        |x| x,
-    );
-
-    let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
-
-    let lwe_per_glwe = LweCiphertextCount(128);
-    let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
-
-    let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
-        degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
-        message_modulus: params.message_modulus(),
-        carry_modulus: params.carry_modulus(),
-        atomic_pattern: params.atomic_pattern(),
-        noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
-    };
-    let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
-        .map(|_| CudaSideResources::new(&streams, cuda_block_info))
-        .collect();
-
-    let input_zero_as_lwe: Vec<_> = input_zeros
-        .iter()
-        .map(|ct| {
-            let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
-                &crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
-                &streams,
-            );
-            CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
-        })
-        .collect();
-
-    let res: Vec<_> = input_zero_as_lwe
-        .into_par_iter()
-        .zip(cuda_side_resources.par_iter_mut())
-        .map(|(input, side_resources)| {
-            let (input, after_dp, ks_result, drift_technique_result, ms_result, pbs_result) =
-                dp_ks_any_ms_standard_pbs128(
-                    input,
-                    max_scalar_mul,
-                    &cuda_sks,
-                    modulus_switch_config,
-                    &cuda_noise_squashing_key,
-                    br_input_modulus_log,
-                    &id_lut_gpu,
-                    side_resources,
-                );
-
-            (
-                input,
-                after_dp,
-                ks_result,
-                drift_technique_result,
-                ms_result,
-                pbs_result,
-            )
-        })
-        .collect();
-
-    let input_zeros_non_pattern: Vec<_> = input_zeros
-        .iter()
-        .map(|ct| {
-            CudaUnsignedRadixCiphertext::from_radix_ciphertext(
-                &crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
-                &streams,
-            )
-        })
-        .collect();
-
-    let vector_non_pattern: Vec<_> = input_zeros_non_pattern
-        .into_par_iter()
-        .map(|mut d_ct_input2| {
-            unsafe {
-                unchecked_small_scalar_mul_integer_async(
-                    &streams,
-                    &mut d_ct_input2.ciphertext,
-                    max_scalar_mul,
-                    params.message_modulus(),
-                    params.carry_modulus(),
-                );
-            }
-
-            streams.synchronize();
-
-            cuda_noise_squashing_key
-                .squash_radix_ciphertext_noise(&cuda_sks, &d_ct_input2.ciphertext, &streams)
-                .unwrap()
-        })
-        .collect();
-
-    let vector_pattern_cpu: Vec<_> = res
-        .into_iter()
-        .map(
-            |(_input, _after_dp, _ks_result, _drift_technique_result, _ms_result, pbs_result)| {
-                pbs_result.as_ct_128_cpu(&streams)
-            },
-        )
-        .collect();
-
-    let vector_non_pattern_cpu: Vec<_> = vector_non_pattern
-        .into_par_iter()
-        .map(|cuda_squashed_radix_ct| {
-            let squashed_noise_ct_cpu =
-                cuda_squashed_radix_ct.to_squashed_noise_radix_ciphertext(&streams);
-            squashed_noise_ct_cpu.packed_blocks()[0]
-                .lwe_ciphertext()
-                .clone()
-        })
-        .collect();
-
-    // Compare that all the results are equivalent
-    assert_eq!(vector_pattern_cpu, vector_non_pattern_cpu);
-}
-
-#[allow(clippy::too_many_arguments)]
-#[allow(clippy::type_complexity)]
-fn encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
-    params: AtomicPatternParameters,
-    noise_squashing_params: NoiseSquashingParameters,
-    noise_squashing_compression_params: NoiseSquashingCompressionParameters,
-    single_cks: &crate::integer::ClientKey,
-    single_cuda_sks: &CudaServerKey,
-    single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
-    single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
-    single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
-    single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
-    single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
-    msg: u64,
-    scalar_for_multiplication: u64,
-    br_input_modulus_log: CiphertextModulusLog,
-    streams: &CudaStreams,
-) -> (
-    Vec<(
-        DecryptionAndNoiseResult,
-        DecryptionAndNoiseResult,
-        DecryptionAndNoiseResult,
-        DecryptionAndNoiseResult,
-        DecryptionAndNoiseResult,
-        DecryptionAndNoiseResult,
-    )>,
-    Vec<DecryptionAndNoiseResult>,
-) {
-    let thread_cks: crate::integer::ClientKey;
-    let thread_cuda_sks: CudaServerKey;
-    let thread_noise_squashing_private_key: NoiseSquashingPrivateKey;
-    let thread_noise_squashing_key: crate::integer::noise_squashing::NoiseSquashingKey;
-    let thread_cuda_noise_squashing_key: CudaNoiseSquashingKey;
-    let thread_noise_squashing_compression_private_key: NoiseSquashingCompressionPrivateKey;
-    let thread_cuda_noise_squashing_compression_key: CudaNoiseSquashingCompressionKey;
-    let (
-        cks,
-        cuda_sks,
-        noise_squashing_private_key,
-        noise_squashing_key,
-        cuda_noise_squashing_key,
-        noise_squashing_compression_private_key,
-        cuda_noise_squashing_compression_key,
-    ) = if should_use_single_key_debug() {
-        (
-            single_cks,
-            single_cuda_sks,
-            single_noise_squashing_private_key,
-            single_noise_squashing_key,
-            single_cuda_noise_squashing_key,
-            single_noise_squashing_compression_private_key,
-            single_cuda_noise_squashing_compression_key,
-        )
-    } else {
-        let block_params: ShortintParameterSet = params.into();
-        thread_cks = crate::integer::ClientKey::new(block_params);
-        let thread_compressed_server_key =
-            CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
-        thread_cuda_sks =
-            CudaServerKey::decompress_from_cpu(&thread_compressed_server_key, streams);
-
-        thread_noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
-        let thread_compressed_noise_squashing_compression_key =
-            thread_cks.new_compressed_noise_squashing_key(&thread_noise_squashing_private_key);
-        thread_noise_squashing_key = thread_compressed_noise_squashing_compression_key.decompress();
-        thread_cuda_noise_squashing_key =
-            thread_compressed_noise_squashing_compression_key.decompress_to_cuda(streams);
-        thread_noise_squashing_compression_private_key =
-            NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
-        let thread_noise_squashing_compression_key = thread_noise_squashing_private_key
-            .new_noise_squashing_compression_key(&thread_noise_squashing_compression_private_key);
-        thread_cuda_noise_squashing_compression_key =
-            CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
-                &thread_noise_squashing_compression_key,
-                streams,
-            );
-        (
-            &thread_cks,
-            &thread_cuda_sks,
-            &thread_noise_squashing_private_key,
-            &thread_noise_squashing_key,
-            &thread_cuda_noise_squashing_key,
-            &thread_noise_squashing_compression_private_key,
-            &thread_cuda_noise_squashing_compression_key,
-        )
-    };
-
-    let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
-
-    let bsk_polynomial_size = noise_squashing_key.key.polynomial_size();
-    let bsk_glwe_size = noise_squashing_key.key.glwe_size();
-
-    let u128_encoding = ShortintEncoding {
-        ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
-        message_modulus: noise_squashing_params.message_modulus(),
-        carry_modulus: noise_squashing_params.carry_modulus(),
-        padding_bit: PaddingBit::Yes,
-    };
-
-    let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
-        bsk_polynomial_size,
-        bsk_glwe_size,
-        u128_encoding
-            .cleartext_space_without_padding()
-            .try_into()
-            .unwrap(),
-        u128_encoding.ciphertext_modulus,
-        u128_encoding.delta(),
-        |x| x,
-    );
-    let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, streams);
-
-    let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
-
-    let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(msg)).collect();
-
-    let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
-        degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
-        message_modulus: params.message_modulus(),
-        carry_modulus: params.carry_modulus(),
-        atomic_pattern: params.atomic_pattern(),
-        noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
-    };
-    let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
-        .map(|_| CudaSideResources::new(streams, cuda_block_info))
-        .collect();
-
-    let input_zero_as_lwe: Vec<_> = input_zeros
-        .iter()
-        .map(|ct| {
-            let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
-                &crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
-                streams,
-            );
-            CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
-        })
-        .collect();
-
-    let (before_packing_gpu, after_packing_gpu) = dp_ks_any_ms_standard_pbs128_packing_ks(
-        input_zero_as_lwe,
-        scalar_for_multiplication,
-        cuda_sks,
-        modulus_switch_config,
-        cuda_noise_squashing_key,
-        br_input_modulus_log,
-        &id_lut_gpu,
-        &cuda_noise_squashing_compression_key.packing_key_switching_key,
-        &mut cuda_side_resources,
-    );
-
-    let before_packing: Vec<_> = before_packing_gpu
-        .into_iter()
-        .map(
-            |(
-                input_gpu,
-                after_dp_gpu,
-                after_ks_gpu,
-                after_drift_gpu,
-                after_ms_gpu,
-                after_pbs128_gpu,
-            )| {
-                match &cks.key.atomic_pattern {
-                    AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
-                        let params = standard_atomic_pattern_client_key.parameters;
-                        let u64_encoding = ShortintEncoding {
-                            ciphertext_modulus: params.ciphertext_modulus(),
-                            message_modulus: params.message_modulus(),
-                            carry_modulus: params.carry_modulus(),
-                            padding_bit: PaddingBit::Yes,
-                        };
-                        let large_lwe_secret_key =
-                            standard_atomic_pattern_client_key.large_lwe_secret_key();
-                        let small_lwe_secret_key =
-                            standard_atomic_pattern_client_key.small_lwe_secret_key();
-
-                        let input_ct = input_gpu.as_ct_64_cpu(streams);
-                        let after_dp_ct = after_dp_gpu.as_ct_64_cpu(streams);
-                        let after_ks_ct = after_ks_gpu.as_ct_64_cpu(streams);
-                        let before_ms_gpu: &CudaDynLwe =
-                            after_drift_gpu.as_ref().unwrap_or(&after_ks_gpu);
-                        let before_ms_ct = before_ms_gpu.as_ct_64_cpu(streams);
-                        let after_ms_ct = after_ms_gpu.as_ct_64_cpu(streams);
-                        let after_pbs128_ct = after_pbs128_gpu.as_ct_128_cpu(streams);
-                        (
-                            DecryptionAndNoiseResult::new_from_lwe(
-                                &input_ct,
-                                &large_lwe_secret_key,
-                                msg,
-                                &u64_encoding,
-                            ),
-                            DecryptionAndNoiseResult::new_from_lwe(
-                                &after_dp_ct,
-                                &large_lwe_secret_key,
-                                msg,
-                                &u64_encoding,
-                            ),
-                            DecryptionAndNoiseResult::new_from_lwe(
-                                &after_ks_ct,
-                                &small_lwe_secret_key,
-                                msg,
-                                &u64_encoding,
-                            ),
-                            DecryptionAndNoiseResult::new_from_lwe(
-                                &before_ms_ct,
-                                &small_lwe_secret_key,
-                                msg,
-                                &u64_encoding,
-                            ),
-                            DecryptionAndNoiseResult::new_from_lwe(
-                                &after_ms_ct,
-                                &small_lwe_secret_key,
-                                msg,
-                                &u64_encoding,
-                            ),
-                            DecryptionAndNoiseResult::new_from_lwe(
-                                &after_pbs128_ct,
-                                &noise_squashing_private_key
-                                    .key
-                                    .post_noise_squashing_lwe_secret_key(),
-                                msg.into(),
-                                &u128_encoding,
-                            ),
-                        )
-                    }
-                    AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
-                        panic!("KS32 atomic pattern not supported for GPU yet");
-                    }
-                }
-            },
-        )
-        .collect();
-    let after_packing_list = after_packing_gpu.to_glwe_ciphertext_list(streams);
-    let after_packing = GlweCiphertext::from_container(
-        after_packing_list.clone().into_container(),
-        after_packing_list.polynomial_size(),
-        after_packing_list.ciphertext_modulus(),
-    );
-    let after_packing = DecryptionAndNoiseResult::new_from_glwe(
-        &after_packing,
-        noise_squashing_compression_private_key
-            .key
-            .post_packing_ks_key(),
-        lwe_per_glwe,
-        msg.into(),
-        &u128_encoding,
-    );
-
-    assert_eq!(after_packing.len(), lwe_per_glwe.0);
-
-    (before_packing, after_packing)
-}
-
-#[allow(clippy::too_many_arguments)]
-#[allow(clippy::type_complexity)]
-fn encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
-    params: AtomicPatternParameters,
-    noise_squashing_params: NoiseSquashingParameters,
-    noise_squashing_compression_params: NoiseSquashingCompressionParameters,
-    single_cks: &crate::integer::ClientKey,
-    single_cuda_sks: &CudaServerKey,
-    single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
-    single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
-    single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
-    single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
-    single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
-    msg: u64,
-    scalar_for_multiplication: u64,
-    br_input_modulus_log: CiphertextModulusLog,
-    streams: &CudaStreams,
-) -> (
-    Vec<(
-        NoiseSample,
-        NoiseSample,
-        NoiseSample,
-        NoiseSample,
-        NoiseSample,
-        NoiseSample,
-    )>,
-    Vec<NoiseSample>,
-) {
-    let (before_compression, after_compression) =
-        encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
-            params,
-            noise_squashing_params,
-            noise_squashing_compression_params,
-            single_cks,
-            single_cuda_sks,
-            single_noise_squashing_private_key,
-            single_noise_squashing_key,
-            single_cuda_noise_squashing_key,
-            single_noise_squashing_compression_private_key,
-            single_cuda_noise_squashing_compression_key,
-            msg,
-            scalar_for_multiplication,
-            br_input_modulus_log,
-            streams,
-        );
-
-    (
-        before_compression
-            .into_iter()
-            .map(
-                |(input, after_dp, after_ks, after_drift, after_ms, after_pbs)| {
-                    (
-                        input
-                            .get_noise_if_decryption_was_correct()
-                            .expect("Decryption Failed"),
-                        after_dp
-                            .get_noise_if_decryption_was_correct()
-                            .expect("Decryption Failed"),
-                        after_ks
-                            .get_noise_if_decryption_was_correct()
-                            .expect("Decryption Failed"),
-                        after_drift
-                            .get_noise_if_decryption_was_correct()
-                            .expect("Decryption Failed"),
-                        after_ms
-                            .get_noise_if_decryption_was_correct()
-                            .expect("Decryption Failed"),
-                        after_pbs
-                            .get_noise_if_decryption_was_correct()
-                            .expect("Decryption Failed"),
-                    )
-                },
-            )
-            .collect(),
-        after_compression
-            .into_iter()
-            .map(|after_compression| {
-                after_compression
-                    .get_noise_if_decryption_was_correct()
-                    .expect("Decryption Failed")
-            })
-            .collect(),
-    )
-}
-
-fn noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu(meta_params: MetaParameters) {
-    let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
-        let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
-        (
-            meta_params.compute_parameters,
-            meta_noise_squashing_params.parameters,
-            meta_noise_squashing_params.compression_parameters.unwrap(),
-        )
-    };
-    let gpu_index = 0;
-    let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
-
-    let block_params: ShortintParameterSet = atomic_params.into();
-    let cks = crate::integer::ClientKey::new(block_params);
-    let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
-    let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
-
-    let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
-    let compressed_noise_squashing_compression_key =
-        cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
-    let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
-    let cuda_noise_squashing_key =
-        compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
-    let noise_squashing_compression_private_key =
-        NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
-    let noise_squashing_compression_key = noise_squashing_private_key
-        .new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
-    let cuda_noise_squashing_compression_key =
-        CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
-            &noise_squashing_compression_key,
-            &streams,
-        );
-
-    let noise_simulation_ksk =
-        NoiseSimulationLweKeyswitchKey::new_from_atomic_pattern_parameters(atomic_params);
-    let noise_simulation_bsk =
-        NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(atomic_params);
-    let noise_simulation_modulus_switch_config =
-        NoiseSimulationModulusSwitchConfig::new_from_atomic_pattern_parameters(atomic_params);
-    let noise_simulation_bsk128 =
-        NoiseSimulationLweFourier128Bsk::new_from_parameters(atomic_params, noise_squashing_params);
-    let noise_simulation_packing_key =
-        NoiseSimulationLwePackingKeyswitchKey::new_from_noise_squashing_parameters(
-            noise_squashing_params,
-            noise_squashing_compression_params,
-        );
-
-    assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
-
-    assert!(noise_simulation_bsk128
-        .matches_actual_shortint_noise_squashing_key(&noise_squashing_key.key));
-    assert!(noise_simulation_packing_key.matches_actual_pksk(
-        noise_squashing_compression_key
-            .key
-            .packing_key_switching_key()
-    ));
-
-    let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
-
-    let max_scalar_mul = cuda_sks.max_noise_level.get();
-
-    let noise_simulation_accumulator = NoiseSimulationGlwe::new(
-        noise_simulation_bsk128
-            .output_glwe_size()
-            .to_glwe_dimension(),
-        noise_simulation_bsk128.output_polynomial_size(),
-        Variance(0.0),
-        noise_simulation_bsk128.modulus(),
-    );
-
-    let (_before_packing_sim, after_packing_sim) = {
-        let noise_simulation = NoiseSimulationLwe::encrypt(&cks.key, 0);
-        dp_ks_any_ms_standard_pbs128_packing_ks(
-            vec![noise_simulation; cuda_noise_squashing_compression_key.lwe_per_glwe.0],
-            max_scalar_mul,
-            &noise_simulation_ksk,
-            noise_simulation_modulus_switch_config.as_ref(),
-            &noise_simulation_bsk128,
-            br_input_modulus_log,
-            &noise_simulation_accumulator,
-            &noise_simulation_packing_key,
-            &mut vec![(); cuda_noise_squashing_compression_key.lwe_per_glwe.0],
-        )
-    };
-
-    let after_packing_sim = after_packing_sim.into_lwe();
-
-    // Check that the circuit is correct with respect to core implementation, i.e. does not crash on
-    // dimension checks
-    let (expected_lwe_dimension_out, expected_modulus_f64_out) = {
-        let pksk = noise_squashing_compression_key
-            .key
-            .packing_key_switching_key();
-
-        let out_glwe_dim = pksk.output_key_glwe_dimension();
-        let out_poly_size = pksk.output_key_polynomial_size();
-
-        (
-            out_glwe_dim.to_equivalent_lwe_dimension(out_poly_size),
-            pksk.ciphertext_modulus().raw_modulus_float(),
-        )
-    };
-
-    assert_eq!(
-        after_packing_sim.lwe_dimension(),
-        expected_lwe_dimension_out
-    );
-    assert_eq!(
-        after_packing_sim.modulus().as_f64(),
-        expected_modulus_f64_out
-    );
-
-    let cleartext_modulus = atomic_params.message_modulus().0 * atomic_params.carry_modulus().0;
-    let mut noise_samples_after_packing = vec![];
-
-    let sample_count_per_msg =
-        1000usize.div_ceil(cuda_noise_squashing_compression_key.lwe_per_glwe.0);
-    let chunk_size = 4;
-    let vec_local_streams = (0..chunk_size)
-        .map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
-        .collect::<Vec<_>>();
-    for _i in 0..cleartext_modulus {
-        let current_noise_samples_after_packing: Vec<_> = (0..sample_count_per_msg)
-            .collect::<Vec<_>>()
-            .chunks(chunk_size)
-            .flat_map(|chunk| {
-                chunk
-                    .into_par_iter()
-                    .map(|i| {
-                        let local_stream = &vec_local_streams[*i % chunk_size];
-                        let (_before_packing, after_packing) =
-                            encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
-                                atomic_params,
-                                noise_squashing_params,
-                                noise_squashing_compression_params,
-                                &cks,
-                                &cuda_sks,
-                                &noise_squashing_private_key,
-                                &noise_squashing_key,
-                                &cuda_noise_squashing_key,
-                                &noise_squashing_compression_private_key,
-                                &cuda_noise_squashing_compression_key,
-                                0,
-                                max_scalar_mul,
-                                br_input_modulus_log,
-                                local_stream,
-                            );
-                        after_packing
-                    })
-                    .collect::<Vec<_>>()
-            })
-            .collect();
-
-        noise_samples_after_packing.extend(current_noise_samples_after_packing);
-    }
-
-    let noise_samples_after_packing_flattened: Vec<_> = noise_samples_after_packing
-        .into_iter()
-        .flatten()
-        .map(|x| x.value)
-        .collect();
-
-    let after_packing_is_ok = mean_and_variance_check(
-        &noise_samples_after_packing_flattened,
-        "after_packing",
-        0.0,
-        after_packing_sim.variance(),
-        noise_squashing_compression_params.packing_ks_key_noise_distribution,
-        after_packing_sim.lwe_dimension(),
-        after_packing_sim.modulus().as_f64(),
-    );
-
-    assert!(after_packing_is_ok);
-}
-
-create_gpu_parameterized_test!(
-    noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu {
-        TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
-    }
-);
-
-create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu {
-    TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
-});
-
-create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_gpu {
-    TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
-});
--- a/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/mod.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/mod.rs
@@ -1,5 +1,3 @@
 pub mod br_dp_ks_ms;
-pub mod br_dp_packingks_ms;
 pub mod dp_ks_ms;
-pub mod dp_ks_pbs_128_packingks;
 pub mod utils;
--- a/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/utils/noise_simulation.rs
+++ b/tfhe/src/integer/gpu/server_key/radix/tests_noise_distribution/utils/noise_simulation.rs
@@ -1,7 +1,7 @@
 use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::{
    AllocateCenteredBinaryShiftedStandardModSwitchResult,
    AllocateDriftTechniqueStandardModSwitchResult, AllocateLweBootstrapResult,
-    AllocateLweKeyswitchResult, AllocateLwePackingKeyswitchResult, AllocateStandardModSwitchResult,
+    AllocateLweKeyswitchResult, AllocateStandardModSwitchResult,
    CenteredBinaryShiftedStandardModSwitch, DriftTechniqueStandardModSwitch,
    LweClassicFftBootstrap, LweKeyswitch, ScalarMul, StandardModSwitch,
 };
@@ -13,7 +13,6 @@ use crate::core_crypto::gpu::cuda_modulus_switch_ciphertext;
 use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
 use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
 use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
-use crate::core_crypto::gpu::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
 use crate::core_crypto::gpu::vec::CudaVec;
 use crate::core_crypto::prelude::*;
 use crate::integer::gpu::ciphertext::info::CudaBlockInfo;
@@ -26,7 +25,7 @@ use crate::integer::gpu::{
    cuda_centered_modulus_switch_64, unchecked_small_scalar_mul_integer_async, CudaStreams,
 };
 use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::NoiseSimulationModulusSwitchConfig;
-use crate::shortint::server_key::tests::noise_distribution::utils::traits::LwePackingKeyswitch;
+
 /// Side resources for CUDA operations in noise simulation
 #[derive(Clone)]
 pub struct CudaSideResources {
@@ -129,19 +128,6 @@ impl CudaDynLwe {
        }
    }

-    pub fn as_ct_128_cpu(&self, streams: &CudaStreams) -> LweCiphertext<Vec<u128>> {
-        match self {
-            Self::U32(_) => panic!("Tried getting a u32 CudaLweCiphertextList as u128."),
-            Self::U64(_) => panic!("Tried getting a u64 CudaLweCiphertextList as u128."),
-            Self::U128(_cuda_lwe) => {
-                let cpu_lwe_list = self.as_lwe_128().to_lwe_ciphertext_list(streams);
-                LweCiphertext::from_container(
-                    cpu_lwe_list.clone().into_container(),
-                    cpu_lwe_list.ciphertext_modulus(),
-                )
-            }
-        }
-    }
    pub fn from_lwe_32(cuda_lwe: CudaLweCiphertextList<u32>) -> Self {
        Self::U32(cuda_lwe)
    }
@@ -155,19 +141,6 @@ impl CudaDynLwe {
    }
 }

-/// Converts a CudaGlweCiphertextList<u64> to a GlweCiphertext<Vec<u64>>
-pub fn cuda_glwe_list_to_glwe_ciphertext(
-    cuda_glwe_list: &CudaGlweCiphertextList<u64>,
-    streams: &CudaStreams,
-) -> GlweCiphertext<Vec<u64>> {
-    let cpu_glwe_list = cuda_glwe_list.to_glwe_ciphertext_list(streams);
-    GlweCiphertext::from_container(
-        cpu_glwe_list.clone().into_container(),
-        cpu_glwe_list.polynomial_size(),
-        cpu_glwe_list.ciphertext_modulus(),
-    )
-}
-
 impl ScalarMul<u64> for CudaDynLwe {
    type Output = Self;
    type SideResources = CudaSideResources;
@@ -340,14 +313,13 @@ impl StandardModSwitch<Self> for CudaDynLwe {
                panic!("U32 modulus switch not implemented for CudaDynLwe - only U64 is supported");
            }
            (Self::U64(input), Self::U64(output_cuda_lwe)) => {
-                let mut internal_output = input.duplicate(&side_resources.streams);
+                let internal_output = input.duplicate(&side_resources.streams);
                cuda_modulus_switch_ciphertext(
-                    &mut internal_output.0.d_vec,
+                    &mut output_cuda_lwe.0.d_vec,
                    output_modulus_log.0 as u32,
                    &side_resources.streams,
                );
                let mut cpu_lwe = internal_output.to_lwe_ciphertext_list(&side_resources.streams);
-
                let shift_to_map_to_native = u64::BITS - output_modulus_log.0 as u32;
                for val in cpu_lwe.as_mut_view().into_container().iter_mut() {
                    *val <<= shift_to_map_to_native;
@@ -741,193 +713,3 @@ impl AllocateLweBootstrapResult for CudaGlweCiphertextList<u128> {
        CudaDynLwe::U128(cuda_lwe)
    }
 }
-
-// Implement LweClassicFft128Bootstrap for CudaNoiseSquashingKey using 128-bit PBS CUDA function
-impl
-    crate::core_crypto::commons::noise_formulas::noise_simulation::traits::LweClassicFft128Bootstrap<
-        CudaDynLwe,
-        CudaDynLwe,
-        CudaGlweCiphertextList<u128>,
-    > for crate::integer::gpu::noise_squashing::keys::CudaNoiseSquashingKey
-{
-    type SideResources = CudaSideResources;
-
-    fn lwe_classic_fft_128_pbs(
-        &self,
-        input: &CudaDynLwe,
-        output: &mut CudaDynLwe,
-        accumulator: &CudaGlweCiphertextList<u128>,
-        side_resources: &mut Self::SideResources,
-    ) {
-        use crate::core_crypto::gpu::algorithms::lwe_programmable_bootstrapping::cuda_programmable_bootstrap_128_lwe_ciphertext_async;
-        use crate::integer::gpu::server_key::CudaBootstrappingKey;
-
-        match (input, output) {
-            (CudaDynLwe::U64(input_cuda_lwe), CudaDynLwe::U128(output_cuda_lwe)) => {
-                // Get the bootstrap key from self - it's already u128 type
-                let bsk = match &self.bootstrapping_key {
-                    CudaBootstrappingKey::Classic(d_bsk) => d_bsk,
-                    CudaBootstrappingKey::MultiBit(_) => {
-                        panic!("MultiBit bootstrapping keys are not supported for 128-bit PBS");
-                    }
-                };
-
-                unsafe {
-                    cuda_programmable_bootstrap_128_lwe_ciphertext_async(
-                        input_cuda_lwe,
-                        output_cuda_lwe,
-                        accumulator,
-                        bsk,
-                        &side_resources.streams,
-                    );
-                    side_resources.streams.synchronize();
-                }
-            }
-            _ => panic!("128-bit PBS expects U64 input and U128 output for CudaDynLwe"),
-        }
-    }
-}
-
-impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u64> {
-    type Output = CudaGlweCiphertextList<u64>;
-    type SideResources = CudaSideResources;
-
-    fn allocate_lwe_packing_keyswitch_result(
-        &self,
-        side_resources: &mut Self::SideResources,
-    ) -> Self::Output {
-        let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
-        let polynomial_size = self.output_polynomial_size();
-        let ciphertext_modulus = self.ciphertext_modulus();
-
-        CudaGlweCiphertextList::new(
-            glwe_dimension,
-            polynomial_size,
-            GlweCiphertextCount(1),
-            ciphertext_modulus,
-            &side_resources.streams,
-        )
-    }
-}
-
-impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u64>>
-    for CudaLwePackingKeyswitchKey<u64>
-{
-    type SideResources = CudaSideResources;
-
-    fn keyswitch_lwes_and_pack_in_glwe(
-        &self,
-        input: &[&CudaDynLwe],
-        output: &mut CudaGlweCiphertextList<u64>,
-        side_resources: &mut CudaSideResources,
-    ) {
-        use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64;
-        let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
-            input.iter().map(|ciphertext| ciphertext.as_lwe_64()),
-            &side_resources.streams,
-        );
-
-        cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64(
-            self,
-            &input_lwe_ciphertext_list,
-            output,
-            &side_resources.streams,
-        );
-    }
-}
-
-// Implement StandardModSwitch traits for CudaGlweCiphertextList<u64>
-impl AllocateStandardModSwitchResult for CudaGlweCiphertextList<u64> {
-    type Output = Self;
-    type SideResources = CudaSideResources;
-
-    fn allocate_standard_mod_switch_result(
-        &self,
-        side_resources: &mut Self::SideResources,
-    ) -> Self::Output {
-        Self::new(
-            self.glwe_dimension(),
-            self.polynomial_size(),
-            self.glwe_ciphertext_count(),
-            self.ciphertext_modulus(),
-            &side_resources.streams,
-        )
-    }
-}
-
-impl StandardModSwitch<Self> for CudaGlweCiphertextList<u64> {
-    type SideResources = CudaSideResources;
-
-    fn standard_mod_switch(
-        &self,
-        storage_log_modulus: CiphertextModulusLog,
-        output: &mut Self,
-        side_resources: &mut CudaSideResources,
-    ) {
-        let mut internal_output = self.duplicate(&side_resources.streams);
-
-        cuda_modulus_switch_ciphertext(
-            &mut internal_output.0.d_vec,
-            storage_log_modulus.0 as u32,
-            &side_resources.streams,
-        );
-        side_resources.streams.synchronize();
-        let mut cpu_glwe = internal_output.to_glwe_ciphertext_list(&side_resources.streams);
-
-        let shift_to_map_to_native = u64::BITS - storage_log_modulus.0 as u32;
-        for val in cpu_glwe.as_mut_view().into_container().iter_mut() {
-            *val <<= shift_to_map_to_native;
-        }
-        let d_after_ms = Self::from_glwe_ciphertext_list(&cpu_glwe, &side_resources.streams);
-
-        *output = d_after_ms;
-    }
-}
-
-impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u128> {
-    type Output = CudaGlweCiphertextList<u128>;
-    type SideResources = CudaSideResources;
-
-    fn allocate_lwe_packing_keyswitch_result(
-        &self,
-        side_resources: &mut Self::SideResources,
-    ) -> Self::Output {
-        let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
-        let polynomial_size = self.output_polynomial_size();
-        let ciphertext_modulus = self.ciphertext_modulus();
-
-        CudaGlweCiphertextList::new(
-            glwe_dimension,
-            polynomial_size,
-            GlweCiphertextCount(1),
-            ciphertext_modulus,
-            &side_resources.streams,
-        )
-    }
-}
-
-impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u128>>
-    for CudaLwePackingKeyswitchKey<u128>
-{
-    type SideResources = CudaSideResources;
-
-    fn keyswitch_lwes_and_pack_in_glwe(
-        &self,
-        input: &[&CudaDynLwe],
-        output: &mut CudaGlweCiphertextList<u128>,
-        side_resources: &mut CudaSideResources,
-    ) {
-        use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128;
-        let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
-            input.iter().map(|ciphertext| ciphertext.as_lwe_128()),
-            &side_resources.streams,
-        );
-
-        cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128(
-            self,
-            &input_lwe_ciphertext_list,
-            output,
-            &side_resources.streams,
-        );
-    }
-}
--- a/tfhe/src/integer/oprf.rs
+++ b/tfhe/src/integer/oprf.rs
@@ -2,7 +2,6 @@ use super::{RadixCiphertext, ServerKey, SignedRadixCiphertext};
 use crate::core_crypto::commons::generators::DeterministicSeeder;
 use crate::core_crypto::prelude::DefaultRandomGenerator;
 use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
-use std::num::NonZeroU64;

 pub use tfhe_csprng::seeders::{Seed, Seeder};

@@ -164,7 +163,6 @@ impl ServerKey {
    /// as `num_input_random_bits`
    ///
    /// ```rust
-    /// use std::num::NonZeroU64;
    /// use tfhe::integer::gen_keys_radix;
    /// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
    /// use tfhe::Seed;
@@ -175,7 +173,7 @@ impl ServerKey {
    /// let (cks, sks) = gen_keys_radix(PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128, size);
    ///
    /// let num_input_random_bits = 5;
-    /// let excluded_upper_bound = NonZeroU64::new(3).unwrap();
+    /// let excluded_upper_bound = 3;
    /// let num_blocks_output = 8;
    ///
    /// let ct_res = sks.par_generate_oblivious_pseudo_random_unsigned_custom_range(
@@ -188,17 +186,15 @@ impl ServerKey {
    /// // Decrypt:
    /// let dec_result: u64 = cks.decrypt(&ct_res);
    ///
-    /// assert!(dec_result < excluded_upper_bound.get());
+    /// assert!(dec_result < excluded_upper_bound);
    /// ```
    pub fn par_generate_oblivious_pseudo_random_unsigned_custom_range(
        &self,
        seed: Seed,
        num_input_random_bits: u64,
-        excluded_upper_bound: NonZeroU64,
+        excluded_upper_bound: u64,
        num_blocks_output: u64,
    ) -> RadixCiphertext {
-        let excluded_upper_bound = excluded_upper_bound.get();
-
        assert!(self.message_modulus().0.is_power_of_two());
        let message_bits_count = self.message_modulus().0.ilog2() as u64;

--- a/tfhe/src/integer/server_key/radix_parallel/tests_long_run/test_random_op_sequence.rs
+++ b/tfhe/src/integer/server_key/radix_parallel/tests_long_run/test_random_op_sequence.rs
@@ -10,7 +10,6 @@ use crate::integer::{BooleanBlock, IntegerKeyKind, RadixCiphertext, RadixClientK
 use crate::shortint::parameters::*;
 use crate::{ClientKey, CompressedServerKey, MatchValues, Seed, Tag};
 use std::cmp::{max, min};
-use std::num::NonZeroU64;
 use std::sync::Arc;

 create_parameterized_test!(random_op_sequence {
@@ -499,18 +498,7 @@ where
        &ServerKey::par_generate_oblivious_pseudo_random_unsigned_integer_bounded,
    );
    let oprf_custom_range_executor = OpSequenceCpuFunctionExecutor::new(
-        &|sk: &ServerKey,
-          seed: Seed,
-          num_input_random_bits: u64,
-          excluded_upper_bound: u64,
-          num_blocks_output: u64| {
-            sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
-                seed,
-                num_input_random_bits,
-                NonZeroU64::new(excluded_upper_bound).unwrap_or(NonZeroU64::new(1).unwrap()),
-                num_blocks_output,
-            )
-        },
+        &ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
    );

    let mut oprf_ops: Vec<(OprfExecutor, String)> = vec![(
--- a/tfhe/src/integer/server_key/radix_parallel/tests_unsigned/test_oprf.rs
+++ b/tfhe/src/integer/server_key/radix_parallel/tests_unsigned/test_oprf.rs
@@ -9,7 +9,6 @@ use crate::integer::{IntegerKeyKind, RadixCiphertext, RadixClientKey, ServerKey}
 use crate::shortint::parameters::*;
 use statrs::distribution::ContinuousCDF;
 use std::collections::HashMap;
-use std::num::NonZeroU64;
 use std::sync::Arc;
 use tfhe_csprng::seeders::Seed;

@@ -37,19 +36,9 @@ fn oprf_any_range_unsigned<P>(param: P)
 where
    P: Into<TestParameters>,
 {
-    let executor =
-        CpuFunctionExecutor::new(&|sk: &ServerKey,
-                                   seed: Seed,
-                                   num_input_random_bits: u64,
-                                   excluded_upper_bound: u64,
-                                   num_blocks_output: u64| {
-            sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
-                seed,
-                num_input_random_bits,
-                NonZeroU64::new(excluded_upper_bound).unwrap(),
-                num_blocks_output,
-            )
-        });
+    let executor = CpuFunctionExecutor::new(
+        &ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
+    );
    oprf_any_range_test(param, executor);
 }

@@ -57,19 +46,9 @@ fn oprf_almost_uniformity_unsigned<P>(param: P)
 where
    P: Into<TestParameters>,
 {
-    let executor =
-        CpuFunctionExecutor::new(&|sk: &ServerKey,
-                                   seed: Seed,
-                                   num_input_random_bits: u64,
-                                   excluded_upper_bound: u64,
-                                   num_blocks_output: u64| {
-            sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
-                seed,
-                num_input_random_bits,
-                NonZeroU64::new(excluded_upper_bound).unwrap(),
-                num_blocks_output,
-            )
-        });
+    let executor = CpuFunctionExecutor::new(
+        &ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
+    );
    oprf_almost_uniformity_test(param, executor);
 }

@@ -110,7 +89,7 @@ where
    );
 }

-pub(crate) fn oprf_uniformity_test<P, E>(param: P, mut executor: E)
+pub fn oprf_uniformity_test<P, E>(param: P, mut executor: E)
 where
    P: Into<TestParameters>,
    E: for<'a> FunctionExecutor<(Seed, u64, u64), RadixCiphertext>,
@@ -134,7 +113,7 @@ where
    });
 }

-pub(crate) fn oprf_any_range_test<P, E>(param: P, mut executor: E)
+pub fn oprf_any_range_test<P, E>(param: P, mut executor: E)
 where
    P: Into<TestParameters>,
    E: for<'a> FunctionExecutor<(Seed, u64, u64, u64), RadixCiphertext>,
@@ -170,7 +149,7 @@ where
    }
 }

-pub(crate) fn oprf_almost_uniformity_test<P, E>(param: P, mut executor: E)
+pub fn oprf_almost_uniformity_test<P, E>(param: P, mut executor: E)
 where
    P: Into<TestParameters>,
    E: for<'a> FunctionExecutor<(Seed, u64, u64, u64), RadixCiphertext>,
@@ -186,70 +165,40 @@ where
    let num_input_random_bits: u64 = 4;
    let num_blocks_output = 64;
    let excluded_upper_bound = 10;
+    let random_input_upper_bound = 1 << num_input_random_bits;
+
+    let mut density = vec![0_usize; excluded_upper_bound as usize];
+    for i in 0..random_input_upper_bound {
+        let index = ((i * excluded_upper_bound) as f64 / random_input_upper_bound as f64) as usize;
+        density[index] += 1;
+    }
+
+    let theoretical_pdf: Vec<f64> = density
+        .iter()
+        .map(|count| *count as f64 / random_input_upper_bound as f64)
+        .collect();

    let values: Vec<u64> = (0..sample_count)
        .map(|seed| {
            let img = executor.execute((
                Seed(seed as u128),
                num_input_random_bits,
-                excluded_upper_bound,
+                excluded_upper_bound as u64,
                num_blocks_output,
            ));
            cks.decrypt(&img)
        })
        .collect();

-    let p_value_upper_bound = p_value_upper_bound_oprf_almost_uniformity_from_values(
-        &values,
-        num_input_random_bits,
-        excluded_upper_bound,
-    );
-
-    assert!(p_value_limit < p_value_upper_bound);
-}
-
-pub(crate) fn p_value_upper_bound_oprf_almost_uniformity_from_values(
-    values: &[u64],
-    num_input_random_bits: u64,
-    excluded_upper_bound: u64,
-) -> f64 {
-    let density = oprf_density_function(excluded_upper_bound, num_input_random_bits);
-
-    let theoretical_pdf = probability_density_function_from_density(&density);
-
    let mut bins = vec![0_u64; excluded_upper_bound as usize];
-    for value in values.iter().copied() {
+    for value in values {
        bins[value as usize] += 1;
    }

    let cumulative_bins = cumulate(&bins);
    let theoretical_cdf = cumulate(&theoretical_pdf);
    let sup_diff = sup_diff(&cumulative_bins, &theoretical_cdf);
+    let p_value_upper_bound = dkw_alpha_from_epsilon(sample_count as f64, sup_diff);

-    dkw_alpha_from_epsilon(values.len() as f64, sup_diff)
-}
-
-pub(crate) fn oprf_density_function(
-    excluded_upper_bound: u64,
-    num_input_random_bits: u64,
-) -> Vec<usize> {
-    let random_input_upper_bound = 1 << num_input_random_bits;
-
-    let mut density = vec![0_usize; excluded_upper_bound as usize];
-
-    for i in 0..random_input_upper_bound {
-        let output = ((i * excluded_upper_bound) >> num_input_random_bits) as usize;
-
-        density[output] += 1;
-    }
-    density
-}
-
-pub(crate) fn probability_density_function_from_density(density: &[usize]) -> Vec<f64> {
-    let total_count: usize = density.iter().copied().sum();
-
-    density
-        .iter()
-        .map(|count| *count as f64 / total_count as f64)
-        .collect()
+    assert!(p_value_limit < p_value_upper_bound);
 }
--- a/tfhe/src/shortint/oprf.rs
+++ b/tfhe/src/shortint/oprf.rs
@@ -475,12 +475,8 @@ pub(crate) mod test {
        }
    }

-    pub(crate) fn test_uniformity<F>(
-        sample_count: usize,
-        p_value_limit: f64,
-        distinct_values: u64,
-        f: F,
-    ) where
+    pub fn test_uniformity<F>(sample_count: usize, p_value_limit: f64, distinct_values: u64, f: F)
+    where
        F: Sync + Fn(usize) -> u64,
    {
        let p_value = uniformity_p_value(f, sample_count, distinct_values);
@@ -491,7 +487,7 @@ pub(crate) mod test {
        );
    }

-    pub(crate) fn uniformity_p_value<F>(f: F, sample_count: usize, distinct_values: u64) -> f64
+    fn uniformity_p_value<F>(f: F, sample_count: usize, distinct_values: u64) -> f64
    where
        F: Sync + Fn(usize) -> u64,
    {
@@ -499,11 +495,8 @@ pub(crate) mod test {

        let mut values_count = HashMap::new();

-        for i in values.iter().copied() {
-            assert!(
-                i < distinct_values,
-                "i (={i}) is supposed to be smaller than distinct_values (={distinct_values})",
-            );
+        for i in &values {
+            assert!(*i < distinct_values, "i {} dv{}", *i, distinct_values);

            *values_count.entry(i).or_insert(0) += 1;
        }
--- a/tfhe/src/shortint/server_key/tests/noise_distribution/dp_ks_pbs128_packingks.rs
+++ b/tfhe/src/shortint/server_key/tests/noise_distribution/dp_ks_pbs128_packingks.rs
@@ -27,7 +27,7 @@ use crate::shortint::server_key::ServerKey;
 use rayon::prelude::*;

 #[allow(clippy::too_many_arguments)]
-pub fn dp_ks_any_ms_standard_pbs128<
+fn dp_ks_any_ms_standard_pbs128<
    InputCt,
    ScalarMulResult,
    KsResult,
@@ -111,7 +111,7 @@ where

 #[allow(clippy::too_many_arguments)]
 #[allow(clippy::type_complexity)]
-pub fn dp_ks_any_ms_standard_pbs128_packing_ks<
+fn dp_ks_any_ms_standard_pbs128_packing_ks<
    InputCt,
    ScalarMulResult,
    KsResult,
--- a/tfhe/web_wasm_parallel_tests/favicon.ico
+++ b/tfhe/web_wasm_parallel_tests/favicon.ico
--- a/tfhe/web_wasm_parallel_tests/index.js
+++ b/tfhe/web_wasm_parallel_tests/index.js
@@ -12,22 +12,14 @@ function setButtonsDisabledState(buttonIds, state) {

 async function setup() {
  let supportsThreads = await threads();
-  // This variable is set to true if we are using the `serve.multithreaded.json` config
-  if (crossOriginIsolated) {
-    if (supportsThreads) {
-      console.info("Running in multithreaded mode");
-    } else {
-      console.error("This browser does not support threads");
-      return;
-    }
-  } else {
-    console.warn("Running in unsafe coop mode");
+  if (!supportsThreads) {
+    console.error("This browser does not support threads");
+    return;
  }

  const worker = new Worker(new URL("worker.js", import.meta.url), {
    type: "module",
  });
-
  const demos = await Comlink.wrap(worker).demos;

  const demoNames = [
--- a/tfhe/web_wasm_parallel_tests/package.json
+++ b/tfhe/web_wasm_parallel_tests/package.json
@@ -5,9 +5,7 @@
  "main": "index.js",
  "scripts": {
    "build": "cp -r ../../tfhe/pkg ./ && webpack build ./index.js --mode production -o dist --output-filename index.js && cp index.html dist/ && cp favicon.ico dist/",
-    "server": "npm run server:multithreaded",
-    "server:multithreaded": "serve --config ../serve.multithreaded.json dist/",
-    "server:unsafe-coop": "serve --config ../serve.unsafe-coop.json dist/",
+    "server": "serve --config ../serve.json dist/",
    "format": "prettier . --write",
    "check-format": "prettier . --check"
  },
--- a/tfhe/web_wasm_parallel_tests/serve.multithreaded.json
+++ b/tfhe/web_wasm_parallel_tests/serve.multithreaded.json
--- a/tfhe/web_wasm_parallel_tests/serve.unsafe-coop.json
+++ b/tfhe/web_wasm_parallel_tests/serve.unsafe-coop.json
@@ -1,11 +0,0 @@
-{
-  "headers": [
-    {
-      "source": "**/*.@(js|html)",
-      "headers": [
-        { "key": "Cross-Origin-Embedder-Policy", "value": "unsafe-none" },
-        { "key": "Cross-Origin-Opener-Policy", "value": "unsafe-none" }
-      ]
-    }
-  ]
-}
--- a/tfhe/web_wasm_parallel_tests/worker.js
+++ b/tfhe/web_wasm_parallel_tests/worker.js
@@ -1,5 +1,4 @@
 import * as Comlink from "comlink";
-import { threads } from "wasm-feature-detect";
 import init, {
  initThreadPool,
  init_panic_hook,
@@ -727,15 +726,8 @@ async function compactPublicKeyZeroKnowledgeBench() {
            serialized_size = list.safe_serialize(BigInt(10000000)).length;
          }
          const mean = timing / bench_loops;
-
-          let base_bench_str = "compact_fhe_uint_proven_encryption_";
-          let supportsThreads = await threads();
-          if (!supportsThreads) {
-            base_bench_str += "unsafe_coop_";
-          }
-
          const common_bench_str =
-            base_bench_str +
+            "compact_fhe_uint_proven_encryption_" +
            params.zk_scheme +
            "_" +
            bits_to_encrypt +
@@ -761,10 +753,7 @@ async function compactPublicKeyZeroKnowledgeBench() {

 async function main() {
  await init();
-  let supportsThreads = await threads();
-  if (supportsThreads) {
-    await initThreadPool(navigator.hardwareConcurrency);
-  }
+  await initThreadPool(navigator.hardwareConcurrency);
  await init_panic_hook();

  return Comlink.proxy({
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Baptiste Roux	52b8e81ccb	fix(hpu): Correctly select adder configuration in ERC_20/ERC_20_SIMD Add knobs to select ripple or kogge adder in ERC_20/ERC_20_SIMD. Previously, it was hardcoded to ripple carry and thus degraded latency performance of ERC_20.	2025-12-24 10:38:38 +01:00
Baptiste Roux	b19a7773bb	feat: Add IfThenZero impl for Cpu	2025-12-24 10:38:38 +01:00
pgardratzama	0342b0466d	chore(hpu): fix panic msg	2025-12-24 10:38:38 +01:00
pgardratzama	edc9ef0026	fix(hpu): fix whitepaper erc20 for HPU using if_then_zero	2025-12-24 10:38:38 +01:00