Put back device synchronize, change active_gpu_count

DO NOT MERGE: remove device synchronization in drop for CudaVec to check the effect on multi-gpu throughput benchmarks
chore: remove aliases for gaussian parameters for compact PK
2026-01-11 07:38:08 -05:00 · 2024-12-16 11:07:55 +01:00 · 2024-12-16 10:29:10 +01:00 · 2024-12-16 09:50:12 +01:00 · 2024-12-16 09:23:41 +01:00 · 2024-12-13 18:31:30 +01:00
143 changed files with 2173 additions and 1904 deletions
--- a/.github/actions/hyperstack_setup/action.yml
+++ b/.github/actions/hyperstack_setup/action.yml
@@ -0,0 +1,53 @@
+name: Setup Cuda
+description: Setup Cuda on Hyperstack instance
+
+inputs:
+  cuda-version:
+    description: Version of Cuda to use
+    required: true
+  gcc-version:
+    description: Version of GCC to use
+    required: true
+  cmake-version:
+    description: Version of cmake to use
+    default: 3.29.6
+
+runs:
+  using: "composite"
+  steps:
+    # Mandatory on hyperstack since a bootable volume is not re-usable yet.
+    - name: Install dependencies
+      shell: bash
+      run: |
+        sudo apt update
+        sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
+        wget https://github.com/Kitware/CMake/releases/download/v${{ inputs.cmake-version }}/cmake-${{ inputs.cmake-version }}.tar.gz
+        tar -zxvf cmake-${{ inputs.cmake-version }}.tar.gz
+        cd cmake-${{ inputs.cmake-version }}
+        ./bootstrap
+        make -j"$(nproc)"
+        sudo make install
+
+    - name: Export CUDA variables
+      shell: bash
+      run: |
+        CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
+        echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
+        echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
+        echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
+        echo "CUDACXX=/usr/local/cuda-${{ inputs.cuda-version }}/bin/nvcc" >> "${GITHUB_ENV}"
+
+    # Specify the correct host compilers
+    - name: Export gcc and g++ variables
+      shell: bash
+      run: |
+        {
+          echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
+          echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
+          echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
+          echo "HOME=/home/ubuntu";
+        } >> "${GITHUB_ENV}"
+
+    - name: Check device is detected
+      shell: bash
+      run: nvidia-smi
--- a/.github/workflows/benchmark_gpu_core_crypto.yml
+++ b/.github/workflows/benchmark_gpu_core_crypto.yml
@@ -48,28 +48,19 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Get benchmark details
        run: |
          {
@@ -88,27 +79,6 @@ jobs:
        with:
          toolchain: nightly

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
      - name: Run benchmarks with AVX512
        run: |
          make bench_pbs_gpu
--- a/.github/workflows/benchmark_gpu_erc20.yml
+++ b/.github/workflows/benchmark_gpu_erc20.yml
@@ -13,6 +13,8 @@ on:
          - "single-h100 (n3-H100x1)"
          - "2-h100 (n3-H100x2)"
          - "multi-h100 (n3-H100x8)"
+          - "multi-h100-nvlink (n3-H100x8-NVLink)"
+          - "multi-h100-sxm5 (n3-H100x8-SXM5)"

 jobs:
  parse-inputs:
--- a/.github/workflows/benchmark_gpu_erc20_common.yml
+++ b/.github/workflows/benchmark_gpu_erc20_common.yml
@@ -75,28 +75,19 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Get benchmark details
        run: |
          {
@@ -115,30 +106,6 @@ jobs:
        with:
          toolchain: nightly

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run benchmarks
        run: |
          make bench_hlapi_erc20_gpu
--- a/.github/workflows/benchmark_gpu_integer.yml
+++ b/.github/workflows/benchmark_gpu_integer.yml
@@ -15,6 +15,7 @@ on:
          - "4-h100 (n3-H100x4)"
          - "multi-h100 (n3-H100x8)"
          - "multi-h100-nvlink (n3-H100x8-NVLink)"
+          - "multi-h100-sxm5 (n3-H100x8-SXM5)"
          - "multi-a100-nvlink (n3-A100x8-NVLink)"
      command:
        description: "Benchmark command to run"
--- a/.github/workflows/benchmark_gpu_integer_common.yml
+++ b/.github/workflows/benchmark_gpu_integer_common.yml
@@ -145,28 +145,19 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          fetch-depth: 0
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Get benchmark details
        run: |
          {
@@ -185,37 +176,6 @@ jobs:
        with:
          toolchain: nightly

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CUDA_PATH=$CUDA_PATH";
-            echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
-            echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
-          } >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-          } >> "${GITHUB_ENV}"
-
-      - name: Checkout Slab repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
-        with:
-          repository: zama-ai/slab
-          path: slab
-          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Should run benchmarks with all precisions
        if: inputs.all_precisions
        run: |
@@ -245,6 +205,13 @@ jobs:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
          path: ${{ env.RESULTS_FILENAME }}

+      - name: Checkout Slab repo
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.FHE_ACTIONS_TOKEN }}
+
      - name: Send data to Slab
        shell: bash
        run: |
--- a/.github/workflows/gpu_fast_h100_tests.yml
+++ b/.github/workflows/gpu_fast_h100_tests.yml
@@ -94,28 +94,19 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -125,29 +116,6 @@ jobs:
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run core crypto and internal CUDA backend tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
--- a/.github/workflows/gpu_fast_tests.yml
+++ b/.github/workflows/gpu_fast_tests.yml
@@ -92,28 +92,19 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -123,29 +114,6 @@ jobs:
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run core crypto and internal CUDA backend tests
        run: |
          make test_core_crypto_gpu
--- a/.github/workflows/gpu_full_h100_tests.yml
+++ b/.github/workflows/gpu_full_h100_tests.yml
@@ -49,9 +49,6 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
      - name: Install dependencies
@@ -71,6 +68,12 @@ jobs:
          persist-credentials: 'false'
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -80,29 +83,6 @@ jobs:
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run core crypto, integer and internal CUDA backend tests
        run: |
          make test_gpu
--- a/.github/workflows/gpu_full_multi_gpu_tests.yml
+++ b/.github/workflows/gpu_full_multi_gpu_tests.yml
@@ -94,28 +94,19 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -125,29 +116,6 @@ jobs:
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run multi-bit CUDA integer compression tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_integer_compression_gpu
--- a/.github/workflows/gpu_integer_long_run_tests.yml
+++ b/.github/workflows/gpu_integer_long_run_tests.yml
@@ -53,25 +53,16 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -81,29 +72,6 @@ jobs:
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run tests
        run: |
          make test_integer_long_run_gpu
--- a/.github/workflows/gpu_signed_integer_classic_tests.yml
+++ b/.github/workflows/gpu_signed_integer_classic_tests.yml
@@ -94,26 +94,16 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -123,29 +113,6 @@ jobs:
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run signed integer tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
--- a/.github/workflows/gpu_signed_integer_h100_tests.yml
+++ b/.github/workflows/gpu_signed_integer_h100_tests.yml
@@ -94,26 +94,16 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -123,29 +113,6 @@ jobs:
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run signed integer multi-bit tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
--- a/.github/workflows/gpu_signed_integer_tests.yml
+++ b/.github/workflows/gpu_signed_integer_tests.yml
@@ -101,29 +101,19 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
        with:
          persist-credentials: 'false'
          token: ${{ secrets.FHE_ACTIONS_TOKEN }}

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -133,25 +123,6 @@ jobs:
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
      - name: Should run nightly tests
        if: github.event_name == 'schedule'
        run: |
@@ -160,10 +131,6 @@ jobs:
            echo "NIGHTLY_TESTS=TRUE";
          } >> "${GITHUB_ENV}"

-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run signed integer multi-bit tests
        run: |
          make test_signed_integer_multi_bit_gpu_ci
--- a/.github/workflows/gpu_unsigned_integer_classic_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_classic_tests.yml
@@ -94,26 +94,16 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -123,29 +113,6 @@ jobs:
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run unsigned integer tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
--- a/.github/workflows/gpu_unsigned_integer_h100_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_h100_tests.yml
@@ -94,26 +94,16 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11 
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -123,29 +113,6 @@ jobs:
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run unsigned integer multi-bit tests
        run: |
          BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
--- a/.github/workflows/gpu_unsigned_integer_tests.yml
+++ b/.github/workflows/gpu_unsigned_integer_tests.yml
@@ -100,26 +100,16 @@ jobs:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 11
-    env:
-      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
-      CMAKE_VERSION: 3.29.6
    steps:
-      # Mandatory on hyperstack since a bootable volume is not re-usable yet.
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
-          wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
-          cd cmake-${{ env.CMAKE_VERSION }}
-          ./bootstrap
-          make -j"$(nproc)"
-          sudo make install
-
-
      - name: Checkout tfhe-rs
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/hyperstack_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+
      - name: Set up home
        run: |
          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
@@ -129,25 +119,6 @@ jobs:
        with:
          toolchain: stable

-      - name: Export CUDA variables
-        if: ${{ !cancelled() }}
-        run: |
-          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
-          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
-          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
-          echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
-
-      # Specify the correct host compilers
-      - name: Export gcc and g++ variables
-        if: ${{ !cancelled() }}
-        run: |
-          {
-            echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
-            echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
-            echo "HOME=/home/ubuntu";
-          } >> "${GITHUB_ENV}"
-
      - name: Should run nightly tests
        if: github.event_name == 'schedule'
        run: |
@@ -156,10 +127,6 @@ jobs:
            echo "NIGHTLY_TESTS=TRUE";
          } >> "${GITHUB_ENV}"

-      - name: Check device is detected
-        if: ${{ !cancelled() }}
-        run: nvidia-smi
-
      - name: Run unsigned integer multi-bit tests
        run: |
          make test_unsigned_integer_multi_bit_gpu_ci
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,11 +19,14 @@ exclude = [
    "utils/cargo-tfhe-lints"
 ]
 [workspace.dependencies]
-aligned-vec = { version = "0.5", default-features = false }
+aligned-vec = { version = "0.6", default-features = false }
 bytemuck = "1.14.3"
-dyn-stack = { version = "0.10", default-features = false }
+dyn-stack = { version = "0.11", default-features = false }
+itertools = "0.13"
 num-complex = "0.4"
-pulp = { version = "0.18.22", default-features = false }
+pulp = { version = "0.20.0", default-features = false }
+rand = "0.8"
+rayon = "1"
 serde = { version = "1.0", default-features = false }
 wasm-bindgen = ">=0.2.86,<0.2.94"

--- a/9
+++ b/9
@@ -25,6 +25,7 @@ BACKWARD_COMPAT_DATA_BRANCH?=v0.4
 BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
 BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
 TFHE_SPEC:=tfhe
+WASM_PACK_VERSION="0.13.1"
 # We are kind of hacking the cut here, the version cannot contain a quote '"'
 WASM_BINDGEN_VERSION:=$(shell grep '^wasm-bindgen[[:space:]]*=' Cargo.toml | cut -d '"' -f 2 | xargs)
 WEB_RUNNER_DIR=web-test-runner
@@ -116,8 +117,8 @@ install_wasm_bindgen_cli: install_rs_build_toolchain

 .PHONY: install_wasm_pack # Install wasm-pack to build JS packages
 install_wasm_pack: install_rs_build_toolchain
-	@wasm-pack --version > /dev/null 2>&1 || \
-	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.0 || \
+	@wasm-pack --version | grep "$(WASM_PACK_VERSION)" > /dev/null 2>&1 || \
+	cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.1 || \
 	( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )

 .PHONY: install_node # Install last version of NodeJS via nvm
@@ -1305,7 +1306,9 @@ sha256_bool: install_rs_check_toolchain

 .PHONY: pcc # pcc stands for pre commit checks (except GPU)
 pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
-clippy_all tfhe_lints check_compile_tests
+clippy_all check_compile_tests
+# TFHE lints deactivated as it's incompatible with 1.83 - temporary
+# tfhe_lints

 .PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
 pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change
--- a/apps/trivium/Cargo.toml
+++ b/apps/trivium/Cargo.toml
@@ -6,7 +6,7 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-rayon = { version = "1.7.0"}
+rayon = { workspace = true }

 [target.'cfg(target_arch = "x86_64")'.dependencies.tfhe]
 path = "../../tfhe"
--- a/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
+++ b/backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h
@@ -479,7 +479,6 @@ template <typename Torus> struct int_radix_lut {
        cuda_memcpy_async_gpu_to_gpu(dst_lut_indexes, src_lut_indexes,
                                     num_blocks * sizeof(Torus), streams[i],
                                     gpu_indexes[i]);
-        cuda_synchronize_stream(streams[i], gpu_indexes[i]);
      }
    }
  }
@@ -3063,7 +3062,7 @@ template <typename Torus> struct int_are_all_block_true_buffer {
  // This map store LUTs that checks the equality between some input and values
  // of interest in are_all_block_true(), as with max_value (the maximum message
  // value).
-  std::unordered_map<int, int_radix_lut<Torus> *> is_equal_to_lut_map;
+  int_radix_lut<Torus> *is_max_value;

  int_are_all_block_true_buffer(cudaStream_t const *streams,
                                uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -3084,16 +3083,26 @@ template <typename Torus> struct int_are_all_block_true_buffer {
      tmp_out = (Torus *)cuda_malloc_async((params.big_lwe_dimension + 1) *
                                               num_radix_blocks * sizeof(Torus),
                                           streams[0], gpu_indexes[0]);
+      is_max_value =
+          new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 2,
+                                   num_radix_blocks, allocate_gpu_memory);
+      auto is_max_value_f = [max_value](Torus x) -> Torus {
+        return x == max_value;
+      };
+
+      generate_device_accumulator<Torus>(
+          streams[0], gpu_indexes[0], is_max_value->get_lut(0, 0),
+          params.glwe_dimension, params.polynomial_size, params.message_modulus,
+          params.carry_modulus, is_max_value_f);
+
+      is_max_value->broadcast_lut(streams, gpu_indexes, 0);
    }
  }

  void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
               uint32_t gpu_count) {
-    for (auto &lut : is_equal_to_lut_map) {
-      lut.second->release(streams, gpu_indexes, gpu_count);
-      delete (lut.second);
-    }
-    is_equal_to_lut_map.clear();
+    is_max_value->release(streams, gpu_indexes, gpu_count);
+    delete (is_max_value);

    cuda_drop_async(tmp_block_accumulated, streams[0], gpu_indexes[0]);
    cuda_drop_async(tmp_out, streams[0], gpu_indexes[0]);
@@ -3270,8 +3279,7 @@ template <typename Torus> struct int_comparison_diff_buffer {
  int_radix_params params;
  COMPARISON_TYPE op;

-  Torus *tmp_packed_left;
-  Torus *tmp_packed_right;
+  Torus *tmp_packed;

  std::function<Torus(Torus)> operator_f;

@@ -3308,11 +3316,8 @@ template <typename Torus> struct int_comparison_diff_buffer {

      Torus big_size = (params.big_lwe_dimension + 1) * sizeof(Torus);

-      tmp_packed_left = (Torus *)cuda_malloc_async(
-          big_size * (num_radix_blocks / 2), streams[0], gpu_indexes[0]);
-
-      tmp_packed_right = (Torus *)cuda_malloc_async(
-          big_size * (num_radix_blocks / 2), streams[0], gpu_indexes[0]);
+      tmp_packed = (Torus *)cuda_malloc_async(big_size * num_radix_blocks,
+                                              streams[0], gpu_indexes[0]);

      tree_buffer = new int_tree_sign_reduction_buffer<Torus>(
          streams, gpu_indexes, gpu_count, operator_f, params, num_radix_blocks,
@@ -3335,8 +3340,7 @@ template <typename Torus> struct int_comparison_diff_buffer {
    reduce_signs_lut->release(streams, gpu_indexes, gpu_count);
    delete reduce_signs_lut;

-    cuda_drop_async(tmp_packed_left, streams[0], gpu_indexes[0]);
-    cuda_drop_async(tmp_packed_right, streams[0], gpu_indexes[0]);
+    cuda_drop_async(tmp_packed, streams[0], gpu_indexes[0]);
    cuda_drop_async(tmp_signs_a, streams[0], gpu_indexes[0]);
    cuda_drop_async(tmp_signs_b, streams[0], gpu_indexes[0]);
  }
--- a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cu
@@ -58,6 +58,9 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
  case GE:
  case LT:
  case LE:
+    if (num_radix_blocks % 2 != 0)
+      PANIC("Cuda error (comparisons): the number of radix blocks has to be "
+            "even.")
    host_integer_radix_difference_check_kb<uint64_t>(
        (cudaStream_t *)(streams), gpu_indexes, gpu_count,
        static_cast<uint64_t *>(lwe_array_out),
@@ -68,6 +71,8 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
    break;
  case MAX:
  case MIN:
+    if (num_radix_blocks % 2 != 0)
+      PANIC("Cuda error (max/min): the number of radix blocks has to be even.")
    host_integer_radix_maxmin_kb<uint64_t>(
        (cudaStream_t *)(streams), gpu_indexes, gpu_count,
        static_cast<uint64_t *>(lwe_array_out),
--- a/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/comparison.cuh
@@ -85,16 +85,19 @@ __host__ void are_all_comparisons_block_true(

  while (remaining_blocks > 0) {
    // Split in max_value chunks
-    uint32_t chunk_length = std::min(max_value, remaining_blocks);
-    int num_chunks = remaining_blocks / chunk_length;
+    int num_chunks = (remaining_blocks + max_value - 1) / max_value;

    // Since all blocks encrypt either 0 or 1, we can sum max_value of them
    // as in the worst case we will be adding `max_value` ones
    auto input_blocks = tmp_out;
    auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
-    auto is_equal_to_num_blocks_map =
-        &are_all_block_true_buffer->is_equal_to_lut_map;
+    auto is_max_value_lut = are_all_block_true_buffer->is_max_value;
+    uint32_t chunk_lengths[num_chunks];
+    auto begin_remaining_blocks = remaining_blocks;
    for (int i = 0; i < num_chunks; i++) {
+      uint32_t chunk_length =
+          std::min(max_value, begin_remaining_blocks - i * max_value);
+      chunk_lengths[i] = chunk_length;
      accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
                                   input_blocks, big_lwe_dimension,
                                   chunk_length);
@@ -111,29 +114,31 @@ __host__ void are_all_comparisons_block_true(
      // is_non_zero_lut_buffer LUT
      lut = mem_ptr->eq_buffer->is_non_zero_lut;
    } else {
-      if ((*is_equal_to_num_blocks_map).find(chunk_length) !=
-          (*is_equal_to_num_blocks_map).end()) {
-        // The LUT is already computed
-        lut = (*is_equal_to_num_blocks_map)[chunk_length];
-      } else {
+      if (chunk_lengths[num_chunks - 1] != max_value) {
        // LUT needs to be computed
-        auto new_lut =
-            new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
-                                     max_value, num_radix_blocks, true);
-
+        uint32_t chunk_length = chunk_lengths[num_chunks - 1];
        auto is_equal_to_num_blocks_lut_f = [chunk_length](Torus x) -> Torus {
          return x == chunk_length;
        };
        generate_device_accumulator<Torus>(
-            streams[0], gpu_indexes[0], new_lut->get_lut(0, 0), glwe_dimension,
-            polynomial_size, message_modulus, carry_modulus,
+            streams[0], gpu_indexes[0], is_max_value_lut->get_lut(0, 1),
+            glwe_dimension, polynomial_size, message_modulus, carry_modulus,
            is_equal_to_num_blocks_lut_f);

-        new_lut->broadcast_lut(streams, gpu_indexes, 0);
-
-        (*is_equal_to_num_blocks_map)[chunk_length] = new_lut;
-        lut = new_lut;
+        Torus *h_lut_indexes = (Torus *)malloc(num_chunks * sizeof(Torus));
+        for (int index = 0; index < num_chunks; index++) {
+          if (index == num_chunks - 1) {
+            h_lut_indexes[index] = 1;
+          } else {
+            h_lut_indexes[index] = 0;
+          }
+        }
+        cuda_memcpy_async_to_gpu(is_max_value_lut->get_lut_indexes(0, 0),
+                                 h_lut_indexes, num_chunks * sizeof(Torus),
+                                 streams[0], gpu_indexes[0]);
+        is_max_value_lut->broadcast_lut(streams, gpu_indexes, 0);
      }
+      lut = is_max_value_lut;
    }

    // Applies the LUT
@@ -182,14 +187,18 @@ __host__ void is_at_least_one_comparisons_block_true(
  uint32_t remaining_blocks = num_radix_blocks;
  while (remaining_blocks > 0) {
    // Split in max_value chunks
-    uint32_t chunk_length = std::min(max_value, remaining_blocks);
-    int num_chunks = remaining_blocks / chunk_length;
+    int num_chunks = (remaining_blocks + max_value - 1) / max_value;

    // Since all blocks encrypt either 0 or 1, we can sum max_value of them
    // as in the worst case we will be adding `max_value` ones
    auto input_blocks = mem_ptr->tmp_lwe_array_out;
    auto accumulator = buffer->tmp_block_accumulated;
+    uint32_t chunk_lengths[num_chunks];
+    auto begin_remaining_blocks = remaining_blocks;
    for (int i = 0; i < num_chunks; i++) {
+      uint32_t chunk_length =
+          std::min(max_value, begin_remaining_blocks - i * max_value);
+      chunk_lengths[i] = chunk_length;
      accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
                                   input_blocks, big_lwe_dimension,
                                   chunk_length);
@@ -481,8 +490,9 @@ __host__ void host_integer_radix_difference_check_kb(
  if (carry_modulus >= message_modulus) {
    // Packing is possible
    // Pack inputs
-    Torus *packed_left = diff_buffer->tmp_packed_left;
-    Torus *packed_right = diff_buffer->tmp_packed_right;
+    Torus *packed_left = diff_buffer->tmp_packed;
+    Torus *packed_right =
+        diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
    // In case the ciphertext is signed, the sign block and the one before it
    // are handled separately
    if (mem_ptr->is_signed) {
@@ -501,10 +511,7 @@ __host__ void host_integer_radix_difference_check_kb(
    auto identity_lut = mem_ptr->identity_lut;
    integer_radix_apply_univariate_lookup_table_kb<Torus>(
        streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
-        packed_num_radix_blocks, identity_lut);
-    integer_radix_apply_univariate_lookup_table_kb<Torus>(
-        streams, gpu_indexes, gpu_count, packed_right, packed_right, bsks, ksks,
-        packed_num_radix_blocks, identity_lut);
+        2 * packed_num_radix_blocks, identity_lut);

    lhs = packed_left;
    rhs = packed_right;
@@ -533,11 +540,13 @@ __host__ void host_integer_radix_difference_check_kb(

      // Compare the last block before the sign block separately
      auto identity_lut = mem_ptr->identity_lut;
+      Torus *packed_left = diff_buffer->tmp_packed;
+      Torus *packed_right =
+          diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
      Torus *last_left_block_before_sign_block =
-          diff_buffer->tmp_packed_left + packed_num_radix_blocks * big_lwe_size;
+          packed_left + packed_num_radix_blocks * big_lwe_size;
      Torus *last_right_block_before_sign_block =
-          diff_buffer->tmp_packed_right +
-          packed_num_radix_blocks * big_lwe_size;
+          packed_right + packed_num_radix_blocks * big_lwe_size;
      integer_radix_apply_univariate_lookup_table_kb<Torus>(
          streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
          lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cu
@@ -22,6 +22,9 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
  case GE:
  case LT:
  case LE:
+    if (lwe_ciphertext_count % 2 != 0)
+      PANIC("Cuda error (scalar comparisons): the number of radix blocks has "
+            "to be even.")
    host_integer_radix_scalar_difference_check_kb<uint64_t>(
        (cudaStream_t *)(streams), gpu_indexes, gpu_count,
        static_cast<uint64_t *>(lwe_array_out),
@@ -32,6 +35,9 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
    break;
  case MAX:
  case MIN:
+    if (lwe_ciphertext_count % 2 != 0)
+      PANIC("Cuda error (scalar max/min): the number of radix blocks has to be "
+            "even.")
    host_integer_radix_scalar_maxmin_kb<uint64_t>(
        (cudaStream_t *)(streams), gpu_indexes, gpu_count,
        static_cast<uint64_t *>(lwe_array_out),
--- a/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cuh
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/scalar_comparison.cuh
@@ -141,8 +141,9 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(

    //////////////
    // lsb
-    Torus *lhs = diff_buffer->tmp_packed_left;
-    Torus *rhs = diff_buffer->tmp_packed_right;
+    Torus *lhs = diff_buffer->tmp_packed;
+    Torus *rhs =
+        diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;

    pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
                       big_lwe_dimension, num_lsb_radix_blocks,
@@ -210,8 +211,9 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
    uint32_t num_lsb_radix_blocks = total_num_radix_blocks;
    uint32_t num_scalar_blocks = total_num_scalar_blocks;

-    Torus *lhs = diff_buffer->tmp_packed_left;
-    Torus *rhs = diff_buffer->tmp_packed_right;
+    Torus *lhs = diff_buffer->tmp_packed;
+    Torus *rhs =
+        diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;

    pack_blocks<Torus>(streams[0], gpu_indexes[0], lhs, lwe_array_in,
                       big_lwe_dimension, num_lsb_radix_blocks,
@@ -358,8 +360,9 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(

    //////////////
    // lsb
-    Torus *lhs = diff_buffer->tmp_packed_left;
-    Torus *rhs = diff_buffer->tmp_packed_right;
+    Torus *lhs = diff_buffer->tmp_packed;
+    Torus *rhs =
+        diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;

    pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
                       big_lwe_dimension, num_lsb_radix_blocks,
@@ -458,8 +461,9 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
    auto lwe_array_ct_out = mem_ptr->tmp_lwe_array_out;
    auto lwe_array_sign_out =
        lwe_array_ct_out + (num_lsb_radix_blocks / 2) * big_lwe_size;
-    Torus *lhs = diff_buffer->tmp_packed_left;
-    Torus *rhs = diff_buffer->tmp_packed_right;
+    Torus *lhs = diff_buffer->tmp_packed;
+    Torus *rhs =
+        diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;

    pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
                       big_lwe_dimension, num_lsb_radix_blocks - 1,
--- a/backends/tfhe-cuda-backend/cuda/src/utils/helper_multi_gpu.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/utils/helper_multi_gpu.cu
@@ -38,7 +38,7 @@ int32_t cuda_setup_multi_gpu() {
 int get_active_gpu_count(int num_inputs, int gpu_count) {
  int active_gpu_count = gpu_count;
  if (gpu_count > num_inputs) {
-    active_gpu_count = num_inputs;
+    active_gpu_count = 1;
  }
  return active_gpu_count;
 }
@@ -56,8 +56,8 @@ int get_num_inputs_on_gpu(int total_num_inputs, int gpu_index, int gpu_count) {
  // If there are fewer inputs than GPUs, not all GPUs are active and GPU 0
  // handles everything
  if (gpu_count > total_num_inputs) {
-    if (gpu_index < total_num_inputs) {
-      num_inputs = 1;
+    if (gpu_index == 0) {
+      num_inputs = total_num_inputs;
    }
  } else {
    // If there are more inputs than GPUs, all GPUs are active and compute over
--- a/ci/slab.toml
+++ b/ci/slab.toml
@@ -58,6 +58,12 @@ environment_name = "canada"
 image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
 flavor_name = "n3-H100x8-NVLink"

+
+[backend.hyperstack.multi-h100-sxm5]
+environment_name = "canada"
+image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
+flavor_name = "n3-H100-SXM5x8"
+
 [backend.hyperstack.multi-a100-nvlink]
 environment_name = "canada"
 image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
--- a/tasks/src/check_tfhe_docs_are_tested.rs
+++ b/tasks/src/check_tfhe_docs_are_tested.rs
@@ -101,7 +101,7 @@ pub fn check_tfhe_docs_are_tested() -> Result<(), Error> {
        .into_iter()
        .filter_map(|entry| {
            let path = entry.path().canonicalize().ok()?;
-            if path.is_file() && path.extension().map_or(false, |e| e == "md") {
+            if path.is_file() && path.extension().is_some_and(|e| e == "md") {
                let file_content = std::fs::read_to_string(&path).ok()?;
                if file_content.contains("```rust") {
                    Some(path.to_path_buf())
--- a/tfhe-csprng/Cargo.toml
+++ b/tfhe-csprng/Cargo.toml
@@ -13,13 +13,13 @@ rust-version = "1.72"

 [dependencies]
 aes = "0.8.2"
-rayon = { version = "1.5.0", optional = true }
+rayon = { workspace = true , optional = true }

 [target.'cfg(target_os = "macos")'.dependencies]
 libc = "0.2.133"

 [dev-dependencies]
-rand = "0.8.3"
+rand = { workspace = true }
 criterion = "0.5.1"
 clap = "=4.4.4"

--- a/tfhe-fft/Cargo.toml
+++ b/tfhe-fft/Cargo.toml
@@ -29,7 +29,7 @@ serde = ["dep:serde", "num-complex/serde"]

 [dev-dependencies]
 rustfft = "6.0"
-rand = "0.8"
+rand = { workspace = true }
 bincode = "1.3"
 more-asserts = "0.3.1"
 serde_json = "1.0.96"
--- a/tfhe-fft/README.md
+++ b/tfhe-fft/README.md
@@ -40,7 +40,7 @@ Additionally, an optional 128-bit negacyclic FFT module is provided.
 ```rust
 use tfhe_fft::c64;
 use tfhe_fft::ordered::{Method, Plan};
-use dyn_stack::{GlobalPodBuffer, PodStack, ReborrowMut};
+use dyn_stack::{GlobalPodBuffer, PodStack};
 use num_complex::ComplexFloat;
 use std::time::Duration;

@@ -48,7 +48,7 @@ fn main() {
    const N: usize = 4;
    let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
    let mut scratch_memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
-    let mut stack = PodStack::new(&mut scratch_memory);
+    let stack = PodStack::new(&mut scratch_memory);

    let data = [
        c64::new(1.0, 0.0),
@@ -58,10 +58,10 @@ fn main() {
    ];

    let mut transformed_fwd = data;
-    plan.fwd(&mut transformed_fwd, stack.rb_mut());
+    plan.fwd(&mut transformed_fwd, stack);

    let mut transformed_inv = transformed_fwd;
-    plan.inv(&mut transformed_inv, stack.rb_mut());
+    plan.inv(&mut transformed_inv, stack);

    for (actual, expected) in transformed_inv.iter().map(|z| z / N as f64).zip(data) {
        assert!((expected - actual).abs() < 1e-9);
--- a/tfhe-fft/benches/fft.rs
+++ b/tfhe-fft/benches/fft.rs
@@ -1,6 +1,6 @@
 use core::ptr::NonNull;
 use criterion::{criterion_group, criterion_main, Criterion};
-use dyn_stack::{PodStack, ReborrowMut, StackReq};
+use dyn_stack::{PodStack, StackReq};
 use serde::Serialize;
 use std::{fs, path::PathBuf};
 use tfhe_fft::c64;
@@ -129,7 +129,7 @@ pub fn bench_ffts(c: &mut Criterion) {
            StackReq::new_aligned::<c64>(n, 256),     // src
            StackReq::new_aligned::<c64>(n, 256),     // dst
        ]));
-        let mut stack = PodStack::new(&mut mem);
+        let stack = PodStack::new(&mut mem);
        let z = c64::new(0.0, 0.0);

        use rustfft::FftPlannerAvx;
@@ -139,8 +139,8 @@ pub fn bench_ffts(c: &mut Criterion) {
        let unordered =
            tfhe_fft::unordered::Plan::new(n, tfhe_fft::unordered::Method::Measure(bench_duration));

-        let (dst, stack) = stack.rb_mut().make_aligned_with::<c64, _>(n, 64, |_| z);
-        let (src, mut stack) = stack.make_aligned_with::<c64, _>(n, 64, |_| z);
+        let (dst, stack) = stack.make_aligned_with::<c64>(n, 64, |_| z);
+        let (src, stack) = stack.make_aligned_with::<c64>(n, 64, |_| z);

        let bench_id = format!("rustfft-fwd-{n}");
        c.bench_function(&bench_id, |b| {
@@ -164,19 +164,19 @@ pub fn bench_ffts(c: &mut Criterion) {
                tfhe_fft::ordered::Plan::new(n, tfhe_fft::ordered::Method::Measure(bench_duration));

            let bench_id = format!("tfhe-ordered-fwd-{n}");
-            c.bench_function(&bench_id, |b| b.iter(|| ordered.fwd(dst, stack.rb_mut())));
+            c.bench_function(&bench_id, |b| b.iter(|| ordered.fwd(dst, stack)));
            write_to_json(&bench_id, "tfhe-ordered-fwd", n);
        }

        let bench_id = format!("tfhe-unordered-fwd-{n}");
        c.bench_function(&bench_id, |b| {
-            b.iter(|| unordered.fwd(dst, stack.rb_mut()));
+            b.iter(|| unordered.fwd(dst, stack));
        });
        write_to_json(&bench_id, "tfhe-unordered-fwd", n);

        let bench_id = format!("tfhe-unordered-inv-{n}");
        c.bench_function(&bench_id, |b| {
-            b.iter(|| unordered.inv(dst, stack.rb_mut()));
+            b.iter(|| unordered.inv(dst, stack));
        });
        write_to_json(&bench_id, "tfhe-unordered-inv", n);

--- a/tfhe-fft/src/fft128/f128_ops.rs
+++ b/tfhe-fft/src/fft128/f128_ops.rs
@@ -645,7 +645,7 @@ pub mod x86 {

    #[inline(always)]
    pub(crate) fn two_diff_f64x4(simd: V3, a: f64x4, b: f64x4) -> (f64x4, f64x4) {
-        two_sum_f64x4(simd, a, simd.f64s_neg(b))
+        two_sum_f64x4(simd, a, simd.neg_f64s(b))
    }

    #[inline(always)]
@@ -677,7 +677,7 @@ pub mod x86 {
    #[inline(always)]
    #[cfg(feature = "nightly")]
    pub(crate) fn two_diff_f64x8(simd: V4, a: f64x8, b: f64x8) -> (f64x8, f64x8) {
-        two_sum_f64x8(simd, a, simd.f64s_neg(b))
+        two_sum_f64x8(simd, a, simd.neg_f64s(b))
    }

    #[cfg(feature = "nightly")]
@@ -714,8 +714,8 @@ pub mod x86 {
            simd,
            a,
            f64x16 {
-                lo: simd.f64s_neg(b.lo),
-                hi: simd.f64s_neg(b.hi),
+                lo: simd.neg_f64s(b.lo),
+                hi: simd.neg_f64s(b.hi),
            },
        )
    }
--- a/tfhe-fft/src/lib.rs
+++ b/tfhe-fft/src/lib.rs
@@ -36,14 +36,14 @@
 #![cfg_attr(not(feature = "std"), doc = "```ignore")]
 //! use tfhe_fft::c64;
 //! use tfhe_fft::ordered::{Plan, Method};
-//! use dyn_stack::{PodStack, GlobalPodBuffer, ReborrowMut};
+//! use dyn_stack::{PodStack, GlobalPodBuffer};
 //! use num_complex::ComplexFloat;
 //! use std::time::Duration;
 //!
 //! const N: usize = 4;
 //! let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
 //! let mut scratch_memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
-//! let mut stack = PodStack::new(&mut scratch_memory);
+//! let stack = PodStack::new(&mut scratch_memory);
 //!
 //! let data = [
 //!     c64::new(1.0, 0.0),
@@ -53,10 +53,10 @@
 //! ];
 //!
 //! let mut transformed_fwd = data;
-//! plan.fwd(&mut transformed_fwd, stack.rb_mut());
+//! plan.fwd(&mut transformed_fwd, stack);
 //!
 //! let mut transformed_inv = transformed_fwd;
-//! plan.inv(&mut transformed_inv, stack.rb_mut());
+//! plan.inv(&mut transformed_inv, stack);
 //!
 //! for (actual, expected) in transformed_inv.iter().map(|z| z / N as f64).zip(data) {
 //!     assert!((expected - actual).abs() < 1e-9);
--- a/tfhe-fft/src/ordered.rs
+++ b/tfhe-fft/src/ordered.rs
@@ -16,7 +16,7 @@ use aligned_vec::{avec, ABox, CACHELINE_ALIGN};
 #[cfg(feature = "std")]
 use core::time::Duration;
 #[cfg(feature = "std")]
-use dyn_stack::{GlobalPodBuffer, ReborrowMut};
+use dyn_stack::GlobalPodBuffer;
 use dyn_stack::{PodStack, SizeOverflow, StackReq};

 /// Internal FFT algorithm.
@@ -65,7 +65,7 @@ fn measure_n_runs(
    buf: &mut [c64],
    twiddles_init: &[c64],
    twiddles: &[c64],
-    stack: PodStack,
+    stack: &mut PodStack,
 ) -> Duration {
    let n = buf.len();
    let (scratch, _) = stack.make_aligned_raw::<c64>(n, CACHELINE_ALIGN);
@@ -99,7 +99,7 @@ pub(crate) fn measure_fastest_scratch(n: usize) -> StackReq {
 pub(crate) fn measure_fastest(
    min_bench_duration_per_algo: Duration,
    n: usize,
-    stack: PodStack,
+    stack: &mut PodStack,
 ) -> (FftAlgo, Duration) {
    const N_ALGOS: usize = 8;
    const MIN_DURATION: Duration = if cfg!(target_arch = "wasm32") {
@@ -116,14 +116,14 @@ pub(crate) fn measure_fastest(

    let f = |_| c64 { re: 0.0, im: 0.0 };

-    let (twiddles, stack) = stack.make_aligned_with::<c64, _>(2 * n, align, f);
+    let (twiddles, stack) = stack.make_aligned_with::<c64>(2 * n, align, f);
    let twiddles_init = &twiddles[..n];
    let twiddles = &twiddles[n..];
-    let (buf, mut stack) = stack.make_aligned_with::<c64, _>(n, align, f);
+    let (buf, stack) = stack.make_aligned_with::<c64>(n, align, f);

    {
        // initialize scratch to load it in the cpu cache
-        drop(stack.rb_mut().make_aligned_with::<c64, _>(n, align, f));
+        drop(stack.make_aligned_with::<c64>(n, align, f));
    }

    let mut avg_durations = [Duration::ZERO; N_ALGOS];
@@ -149,8 +149,7 @@ pub(crate) fn measure_fastest(
            let mut n_runs: u128 = 1;

            loop {
-                let duration =
-                    measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack.rb_mut());
+                let duration = measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack);

                if duration < MIN_DURATION {
                    n_runs *= 2;
@@ -165,8 +164,7 @@ pub(crate) fn measure_fastest(
        *avg = if n_runs <= init_n_runs {
            approx_duration
        } else {
-            let duration =
-                measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack.rb_mut());
+            let duration = measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack);
            duration_div_f64(duration, n_runs as f64)
        };
    }
@@ -339,7 +337,7 @@ impl Plan {
    /// let mut buf = [c64::default(); 4];
    /// plan.fwd(&mut buf, stack);
    /// ```
-    pub fn fwd(&self, buf: &mut [c64], stack: PodStack) {
+    pub fn fwd(&self, buf: &mut [c64], stack: &mut PodStack) {
        let n = self.fft_size();
        let (scratch, _) = stack.make_aligned_raw::<c64>(n, CACHELINE_ALIGN);
        let (w_init, w) = split_2(&self.twiddles);
@@ -353,19 +351,19 @@ impl Plan {
    #[cfg_attr(not(feature = "std"), doc = " ```ignore")]
    /// use tfhe_fft::c64;
    /// use tfhe_fft::ordered::{Method, Plan};
-    /// use dyn_stack::{PodStack, GlobalPodBuffer, ReborrowMut};
+    /// use dyn_stack::{PodStack, GlobalPodBuffer};
    /// use core::time::Duration;
    ///
    /// let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
    ///
    /// let mut memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
-    /// let mut stack = PodStack::new(&mut memory);
+    /// let stack = PodStack::new(&mut memory);
    ///
    /// let mut buf = [c64::default(); 4];
-    /// plan.fwd(&mut buf, stack.rb_mut());
+    /// plan.fwd(&mut buf, stack);
    /// plan.inv(&mut buf, stack);
    /// ```
-    pub fn inv(&self, buf: &mut [c64], stack: PodStack) {
+    pub fn inv(&self, buf: &mut [c64], stack: &mut PodStack) {
        let n = self.fft_size();
        let (scratch, _) = stack.make_aligned_raw::<c64>(n, CACHELINE_ALIGN);
        let (w_init, w) = split_2(&self.twiddles_inv);
--- a/tfhe-fft/src/unordered.rs
+++ b/tfhe-fft/src/unordered.rs
@@ -18,7 +18,7 @@ use aligned_vec::{avec, ABox, CACHELINE_ALIGN};
 #[cfg(feature = "std")]
 use core::time::Duration;
 #[cfg(feature = "std")]
-use dyn_stack::{GlobalPodBuffer, ReborrowMut};
+use dyn_stack::GlobalPodBuffer;
 use dyn_stack::{PodStack, SizeOverflow, StackReq};

 #[inline(always)]
@@ -553,7 +553,7 @@ fn measure_fastest_scratch(n: usize) -> StackReq {
 fn measure_fastest(
    mut min_bench_duration_per_algo: Duration,
    n: usize,
-    mut stack: PodStack,
+    stack: &mut PodStack,
 ) -> (FftAlgo, usize, Duration) {
    const MIN_DURATION: Duration = Duration::from_millis(1);
    min_bench_duration_per_algo = min_bench_duration_per_algo.max(MIN_DURATION);
@@ -581,11 +581,8 @@ fn measure_fastest(
            n_algos += 1;

            // we'll measure the corresponding plan
-            let (base_algo, duration) = crate::ordered::measure_fastest(
-                min_bench_duration_per_algo,
-                base_n,
-                stack.rb_mut(),
-            );
+            let (base_algo, duration) =
+                crate::ordered::measure_fastest(min_bench_duration_per_algo, base_n, stack);

            algos[i] = Some(base_algo);

@@ -599,11 +596,9 @@ fn measure_fastest(

            let f = |_| c64 { re: 0.0, im: 0.0 };
            let align = CACHELINE_ALIGN;
-            let (w, stack) = stack
-                .rb_mut()
-                .make_aligned_with::<c64, _>(n + base_n, align, f);
-            let (scratch, stack) = stack.make_aligned_with::<c64, _>(base_n, align, f);
-            let (z, _) = stack.make_aligned_with::<c64, _>(n, align, f);
+            let (w, stack) = stack.make_aligned_with::<c64>(n + base_n, align, f);
+            let (scratch, stack) = stack.make_aligned_with::<c64>(base_n, align, f);
+            let (z, _) = stack.make_aligned_with::<c64>(n, align, f);

            let n_runs = min_bench_duration_per_algo.as_secs_f64()
                / (duration.as_secs_f64() * (n / base_n) as f64);
@@ -823,7 +818,7 @@ impl Plan {
    /// let mut buf = [c64::default(); 4];
    /// plan.fwd(&mut buf, stack);
    /// ```
-    pub fn fwd(&self, buf: &mut [c64], stack: PodStack) {
+    pub fn fwd(&self, buf: &mut [c64], stack: &mut PodStack) {
        assert_eq!(self.fft_size(), buf.len());
        let (scratch, _) = stack.make_aligned_raw::<c64>(self.algo().1, CACHELINE_ALIGN);
        fwd_depth(
@@ -912,19 +907,19 @@ impl Plan {
    #[cfg_attr(not(feature = "std"), doc = " ```ignore")]
    /// use tfhe_fft::c64;
    /// use tfhe_fft::unordered::{Method, Plan};
-    /// use dyn_stack::{PodStack, GlobalPodBuffer, ReborrowMut};
+    /// use dyn_stack::{PodStack, GlobalPodBuffer};
    /// use core::time::Duration;
    ///
    /// let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
    ///
    /// let mut memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
-    /// let mut stack = PodStack::new(&mut memory);
+    /// let stack = PodStack::new(&mut memory);
    ///
    /// let mut buf = [c64::default(); 4];
-    /// plan.fwd(&mut buf, stack.rb_mut());
+    /// plan.fwd(&mut buf, stack);
    /// plan.inv(&mut buf, stack);
    /// ```
-    pub fn inv(&self, buf: &mut [c64], stack: PodStack) {
+    pub fn inv(&self, buf: &mut [c64], stack: &mut PodStack) {
        assert_eq!(self.fft_size(), buf.len());
        let (scratch, _) = stack.make_aligned_raw::<c64>(self.algo().1, CACHELINE_ALIGN);
        inv_depth(
@@ -995,7 +990,7 @@ impl Plan {
            base_n: usize,
        }

-        impl<'de, 'a> Visitor<'de> for SeqVisitor<'a> {
+        impl<'de> Visitor<'de> for SeqVisitor<'_> {
            type Value = ();

            fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
@@ -1062,7 +1057,7 @@ fn bit_rev_twice_inv(nbits: u32, base_nbits: u32, i: usize) -> usize {
 mod tests {
    use super::*;
    use alloc::vec;
-    use dyn_stack::{GlobalPodBuffer, ReborrowMut};
+    use dyn_stack::GlobalPodBuffer;
    use num_complex::ComplexFloat;
    use rand::random;

@@ -1157,8 +1152,8 @@ mod tests {
                },
            );
            let mut mem = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
-            let mut stack = PodStack::new(&mut mem);
-            plan.fwd(&mut z, stack.rb_mut());
+            let stack = PodStack::new(&mut mem);
+            plan.fwd(&mut z, stack);
            plan.inv(&mut z, stack);

            for z in &mut z {
@@ -9400,7 +9395,7 @@ mod tests {
 mod tests_serde {
    use super::*;
    use alloc::{vec, vec::Vec};
-    use dyn_stack::{GlobalPodBuffer, ReborrowMut};
+    use dyn_stack::GlobalPodBuffer;
    use num_complex::ComplexFloat;
    use rand::random;

@@ -9440,9 +9435,9 @@ mod tests_serde {
                    .unwrap()
                    .or(plan2.fft_scratch().unwrap()),
            );
-            let mut stack = PodStack::new(&mut mem);
+            let stack = PodStack::new(&mut mem);

-            plan1.fwd(&mut z, stack.rb_mut());
+            plan1.fwd(&mut z, stack);

            let mut buf = Vec::<u8>::new();
            let mut serializer = bincode::Serializer::new(&mut buf, bincode::options());
--- a/tfhe-ntt/Cargo.toml
+++ b/tfhe-ntt/Cargo.toml
@@ -23,7 +23,7 @@ nightly = ["pulp/nightly"]

 [dev-dependencies]
 criterion = "0.4"
-rand = "0.8"
+rand = { workspace = true }
 serde = "1.0.163"
 serde_json = "1.0.96"

--- a/tfhe-zk-pok/Cargo.toml
+++ b/tfhe-zk-pok/Cargo.toml
@@ -16,8 +16,8 @@ ark-bls12-381 = "0.5.0"
 ark-ec = { version = "0.5.0", features = ["parallel"] }
 ark-ff = { version = "0.5.0", features = ["parallel"] }
 ark-poly = { version = "0.5.0", features = ["parallel"] }
-rand = "0.8.5"
-rayon = "1.8.0"
+rand = { workspace = true }
+rayon = { workspace = true }
 sha3 = "0.10.8"
 serde = { workspace = true, features = ["default", "derive"] }
 zeroize = "1.7.0"
@@ -26,7 +26,7 @@ tfhe-versionable = { version = "0.3.2", path = "../utils/tfhe-versionable" }

 [dev-dependencies]
 serde_json = "~1.0"
-itertools = "0.11.0"
+itertools = { workspace = true }
 bincode = "1.3.3"
 criterion = "0.5.1"

--- a/tfhe-zk-pok/src/curve_446/mod.rs
+++ b/tfhe-zk-pok/src/curve_446/mod.rs
@@ -1,3 +1,6 @@
+#![allow(unexpected_cfgs)]
+// This is a bug/unwanted behavior from ark-ff macro, for now warn instead of erroring
+
 use ark_ec::bls12::{Bls12, Bls12Config, TwistType};
 use ark_ff::fields::*;
 use ark_ff::MontFp;
--- a/tfhe/Cargo.toml
+++ b/tfhe/Cargo.toml
@@ -17,12 +17,12 @@ exclude = [
    "/js_on_wasm_tests/",
    "/web_wasm_parallel_tests/",
 ]
-rust-version = "1.81"
+rust-version = "1.83"

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dev-dependencies]
-rand = "0.8.5"
+rand = { workspace = true }
 rand_distr = "0.4.3"
 lazy_static = { version = "1.4.0" }
 criterion = "0.5.1"
@@ -33,7 +33,6 @@ serde_json = "1.0.94"
 clap = { version = "=4.4.4", features = ["derive"] }
 # Used in user documentation
 fs2 = { version = "0.4.3" }
-itertools = "0.11.0"
 statrs = "0.16"
 # For erf and normality test
 libm = "0.2.6"
@@ -49,6 +48,7 @@ ron = "0.8"
 tfhe-backward-compat-data = { git = "https://github.com/zama-ai/tfhe-backward-compat-data.git", branch = "v0.4", default-features = false, features = [
    "load",
 ] }
+strum = { version = "0.26", features = ["derive"] }

 [build-dependencies]
 cbindgen = { version = "0.26.0", optional = true }
@@ -60,7 +60,7 @@ tfhe-csprng = { version = "0.4.1", path = "../tfhe-csprng", features = [
 ] }
 lazy_static = { version = "1.4.0", optional = true }
 serde = { workspace = true, features = ["default", "derive"] }
-rayon = { version = "1.5.0" }
+rayon = { workspace = true }
 bincode = "1.3.3"
 tfhe-fft = { version = "0.6.0", path = "../tfhe-fft", features = [
    "serde",
@@ -75,8 +75,7 @@ paste = "1.0.7"
 fs2 = { version = "0.4.3", optional = true }
 # Used for OPRF in shortint
 sha3 = { version = "0.10", optional = true }
-# While we wait for repeat_n in rust standard library
-itertools = "0.11.0"
+itertools = { workspace = true }
 rand_core = { version = "0.6.4", features = ["std"] }
 tfhe-zk-pok = { version = "0.3.1", path = "../tfhe-zk-pok", optional = true }
 tfhe-versionable = { version = "0.3.2", path = "../utils/tfhe-versionable" }
@@ -157,6 +156,13 @@ x86_64-unix = ["x86_64", "seeder_unix"]
 aarch64 = ["generator_aarch64_aes"]
 aarch64-unix = ["aarch64", "seeder_unix"]

+# Cover several profiles as we cannot have a wildcard it seems
+[package.metadata.wasm-pack.profile.dev.wasm-bindgen]
+split-linked-modules = true
+
+[package.metadata.wasm-pack.profile.release.wasm-bindgen]
+split-linked-modules = true
+
 [package.metadata.docs.rs]
 # TODO: manage builds for docs.rs based on their documentation https://docs.rs/about
 features = ["x86_64-unix", "boolean", "shortint", "integer", "gpu", "zk-pok"]
@@ -342,4 +348,6 @@ unexpected_cfgs = { level = "warn", check-cfg = [
    'cfg(bench)',
    'cfg(tarpaulin)',
    'cfg(tfhe_lints)',
+    # This is a bug/unwanted behavior from wasm_bindgen macro, for now warn instead of erroring
+    'cfg(wasm_bindgen_unstable_test_coverage)',
 ] }
--- a/tfhe/c_api_tests/test_high_level_custom_integers.c
+++ b/tfhe/c_api_tests/test_high_level_custom_integers.c
@@ -215,7 +215,7 @@ int main(void) {

    config_builder_default(&builder);
    config_builder_use_custom_parameters(&builder,
-                                         SHORTINT_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS);
+                                         SHORTINT_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64);
    config_builder_build(builder, &config);

    ClientKey *client_key = NULL;
@@ -243,7 +243,7 @@ int main(void) {

    config_builder_default(&builder);
    config_builder_use_custom_parameters(&builder,
-                                         SHORTINT_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS);
+                                         SHORTINT_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64);
    config_builder_build(builder, &config);

    ClientKey *client_key = NULL;
--- a/tfhe/docs/fundamentals/compress.md
+++ b/tfhe/docs/fundamentals/compress.md
@@ -214,7 +214,7 @@ use tfhe::{
 fn main() {
    let config = ConfigBuilder::default()
        .use_custom_parameters(
-            tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS,
+            tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
        )
        .build();
    let (client_key, _) = generate_keys(config);
--- a/tfhe/docs/guides/js_on_wasm_api.md
+++ b/tfhe/docs/guides/js_on_wasm_api.md
@@ -34,7 +34,7 @@ function fhe_uint32_example() {
    // the error message will be displayed in the console
    init_panic_hook();

-    const block_params = new ShortintParameters(ShortintParametersName.PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS);
+    const block_params = new ShortintParameters(ShortintParametersName.PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64);
    let config = TfheConfigBuilder.default()
        .build();

@@ -79,7 +79,7 @@ async function example() {
    await initThreadPool(navigator.hardwareConcurrency);
    await init_panic_hook();

-    const block_params = new ShortintParameters(ShortintParametersName.PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS);
+    const block_params = new ShortintParameters(ShortintParametersName.PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64);
    // ....
 }
 ```
--- a/tfhe/docs/guides/public_key.md
+++ b/tfhe/docs/guides/public_key.md
@@ -45,7 +45,7 @@ use tfhe::{
 fn main() {
     let config = ConfigBuilder::default()
        .use_custom_parameters(
-            tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS,
+            tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
        )
        .build();
    let (client_key, _) = generate_keys(config);
--- a/tfhe/docs/guides/run_on_gpu.md
+++ b/tfhe/docs/guides/run_on_gpu.md
@@ -256,3 +256,78 @@ fn main() {

 }
 ```
+
+## Array types
+
+It is possible to use array types on GPU, just as [on CPU](array.md). Here is an example showing how to do it:
+```rust
+use tfhe::{ConfigBuilder, set_server_key, ClearArray, ClientKey, CompressedServerKey};
+use tfhe::array::GpuFheUint32Array;
+use tfhe::prelude::*;
+
+fn main() {
+    let config = ConfigBuilder::default().build();
+
+    let cks = ClientKey::generate(config);
+    let compressed_server_key = CompressedServerKey::new(&cks);
+
+    let gpu_key = compressed_server_key.decompress_to_gpu();
+    set_server_key(gpu_key);
+
+    let num_elems = 4 * 4;
+    let clear_xs = (0..num_elems as u32).collect::<Vec<_>>();
+    let clear_ys = vec![1u32; num_elems];
+
+    // Encrypted 2D array with values
+    // [[  0,  1,  2,  3]
+    //  [  4,  5,  6,  7]
+    //  [  8,  9, 10, 11]
+    //  [ 12, 13, 14, 15]]
+    let xs = GpuFheUint32Array::try_encrypt((clear_xs.as_slice(), vec![4, 4]), &cks).unwrap();
+    // Encrypted 2D array with values
+    // [[  1,  1,  1,  1]
+    //  [  1,  1,  1,  1]
+    //  [  1,  1,  1,  1]
+    //  [  1,  1,  1,  1]]
+    let ys = GpuFheUint32Array::try_encrypt((clear_ys.as_slice(), vec![4, 4]), &cks).unwrap();
+
+    assert_eq!(xs.num_dim(), 2);
+    assert_eq!(xs.shape(), &[4, 4]);
+    assert_eq!(ys.num_dim(), 2);
+    assert_eq!(ys.shape(), &[4, 4]);
+
+    // Take a sub slice
+    //  [[ 10, 11]
+    //   [ 14, 15]]
+    let xss = xs.slice(&[2..4, 2..4]);
+    // Take a sub slice
+    //  [[  1,  1]
+    //   [  1,  1]]
+    let yss = ys.slice(&[2..4, 2..4]);
+
+    assert_eq!(xss.num_dim(), 2);
+    assert_eq!(xss.shape(), &[2, 2]);
+    assert_eq!(yss.num_dim(), 2);
+    assert_eq!(yss.shape(), &[2, 2]);
+
+    let r = &xss + &yss;
+
+    // Result is
+    //  [[ 11, 12]
+    //   [ 15, 16]]
+    let result: Vec<u32> = r.decrypt(&cks);
+    assert_eq!(result, vec![11, 12, 15, 16]);
+
+    // Clear 2D array with values
+    //  [[  10,  20]
+    //   [  30,  40]]
+    let clear_array = ClearArray::new(vec![10u32, 20u32, 30u32, 40u32], vec![2, 2]);
+    let r = &xss + &clear_array;
+
+    // Result is
+    //  [[ 20, 31]
+    //   [ 44, 55]]
+    let r: Vec<u32> = r.decrypt(&cks);
+    assert_eq!(r, vec![20, 31, 44, 55]);
+}
+```
--- a/tfhe/examples/utilities/generates_test_keys.rs
+++ b/tfhe/examples/utilities/generates_test_keys.rs
@@ -7,9 +7,10 @@ use tfhe::shortint::keycache::KEY_CACHE_WOPBS;
 use tfhe::shortint::keycache::{KEY_CACHE, KEY_CACHE_KSK};
 #[cfg(tarpaulin)]
 use tfhe::shortint::parameters::coverage_parameters::{
-    COVERAGE_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS,
-    COVERAGE_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS, COVERAGE_PARAM_MESSAGE_2_CARRY_2_KS_PBS,
-    COVERAGE_PARAM_MESSAGE_2_CARRY_3_KS_PBS, COVERAGE_PARAM_MESSAGE_5_CARRY_1_KS_PBS,
+    COVERAGE_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    COVERAGE_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64,
+    COVERAGE_PARAM_MESSAGE_2_CARRY_2_KS_PBS, COVERAGE_PARAM_MESSAGE_2_CARRY_3_KS_PBS,
+    COVERAGE_PARAM_MESSAGE_5_CARRY_1_KS_PBS,
    COVERAGE_PARAM_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_2_KS_PBS,
 };
 use tfhe::shortint::parameters::key_switching::p_fail_2_minus_64::ks_pbs::PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS;
@@ -82,8 +83,8 @@ fn client_server_keys() {
                COVERAGE_PARAM_MESSAGE_2_CARRY_2_KS_PBS,
                COVERAGE_PARAM_MESSAGE_2_CARRY_3_KS_PBS,
                COVERAGE_PARAM_MESSAGE_5_CARRY_1_KS_PBS,
-                COVERAGE_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS,
-                COVERAGE_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS,
+                COVERAGE_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64,
+                COVERAGE_PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
            ];

            generate_pbs_keys(&PBS_PARAMS);
--- a/tfhe/examples/utilities/hlapi_compact_pk_ct_sizes.rs
+++ b/tfhe/examples/utilities/hlapi_compact_pk_ct_sizes.rs
@@ -10,7 +10,8 @@ use tfhe::integer::U256;
 use tfhe::keycache::NamedParam;
 use tfhe::prelude::*;
 use tfhe::shortint::parameters::classic::compact_pk::{
-    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS, PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS,
+    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64,
 };
 use tfhe::shortint::PBSParameters;
 use tfhe::{
@@ -39,7 +40,7 @@ pub fn cpk_and_cctl_sizes(results_file: &Path) {
    let operator = OperatorType::Atomic;

    {
-        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS;
+        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64;
        let config = ConfigBuilder::default()
            .use_custom_parameters(params)
            .build();
@@ -97,7 +98,7 @@ pub fn cpk_and_cctl_sizes(results_file: &Path) {
    }

    {
-        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS;
+        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64;
        let config = ConfigBuilder::default()
            .use_custom_parameters(params)
            .build();
@@ -156,7 +157,7 @@ pub fn cpk_and_cctl_sizes(results_file: &Path) {

    // 256 bits
    {
-        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS;
+        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64;
        let config = ConfigBuilder::default()
            .use_custom_parameters(params)
            .build();
@@ -204,7 +205,7 @@ pub fn cpk_and_cctl_sizes(results_file: &Path) {
    }

    {
-        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS;
+        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64;
        let config = ConfigBuilder::default()
            .use_custom_parameters(params)
            .build();
--- a/tfhe/examples/utilities/wasm_benchmarks_parser.rs
+++ b/tfhe/examples/utilities/wasm_benchmarks_parser.rs
@@ -11,12 +11,14 @@ use std::path::Path;
 use tfhe::keycache::NamedParam;
 use tfhe::shortint::keycache::{
    PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64_NAME,
-    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_NAME, PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_NAME,
+    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64_NAME,
+    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64_NAME,
    PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64_NAME,
    PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64_NAME,
 };
 use tfhe::shortint::parameters::classic::compact_pk::{
-    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS, PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS,
+    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64,
 };
 use tfhe::shortint::parameters::classic::tuniform::p_fail_2_minus_64::ks_pbs::PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64;
 use tfhe::shortint::parameters::{
@@ -34,8 +36,12 @@ struct Args {

 fn params_from_name(name: &str) -> ClassicPBSParameters {
    match name.to_uppercase().as_str() {
-        PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_NAME => PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS,
-        PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_NAME => PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS,
+        PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64_NAME => {
+            PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64
+        }
+        PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64_NAME => {
+            PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64
+        }
        PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64_NAME => {
            PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64
        }
--- a/tfhe/js_on_wasm_tests/test-hlapi-signed.js
+++ b/tfhe/js_on_wasm_tests/test-hlapi-signed.js
@@ -401,7 +401,7 @@ function hlapi_compact_public_key_encrypt_decrypt_int32_single(config) {
 }

 test('hlapi_compact_public_key_encrypt_decrypt_int32_big_single', (t) => {
-    const block_params = new ShortintParameters(ShortintParametersName.PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS);
+    const block_params = new ShortintParameters(ShortintParametersName.PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64);
    let config = TfheConfigBuilder.default()
        .use_custom_parameters(block_params)
        .build();
@@ -410,7 +410,7 @@ test('hlapi_compact_public_key_encrypt_decrypt_int32_big_single', (t) => {
 });

 test('hlapi_compact_public_key_encrypt_decrypt_int32_small_single', (t) => {
-    const block_params = new ShortintParameters(ShortintParametersName.PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS);
+    const block_params = new ShortintParameters(ShortintParametersName.PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64);
    let config = TfheConfigBuilder.default()
        .use_custom_parameters(block_params)
        .build();
--- a/tfhe/src/boolean/parameters/mod.rs
+++ b/tfhe/src/boolean/parameters/mod.rs
@@ -126,6 +126,7 @@ impl BooleanKeySwitchingParameters {

 /// Parameter sets given in TFHE-lib:
 /// <https://github.com/tfhe/tfhe/blob/bc71bfae7ad9d5f8ce5f29bdfd691189bfe207f3/src/libtfhe/tfhe_gate_bootstrapping.cpp#L51>
+///
 /// Original security in 2020 was 129-bits, while it is currently around 120 bits.
 pub const TFHE_LIB_PARAMETERS: BooleanParameters = BooleanParameters {
    lwe_dimension: LweDimension(630),
--- a/tfhe/src/c_api/high_level_api/mod.rs
+++ b/tfhe/src/c_api/high_level_api/mod.rs
@@ -22,36 +22,36 @@ mod zk;
 #[repr(C)]
 #[allow(non_camel_case_types)]
 pub enum FheTypes {
-    Type_FheBool,
-    Type_FheUint2,
-    Type_FheUint4,
-    Type_FheUint6,
-    Type_FheUint8,
-    Type_FheUint10,
-    Type_FheUint12,
-    Type_FheUint14,
-    Type_FheUint16,
-    Type_FheUint32,
-    Type_FheUint64,
-    Type_FheUint128,
-    Type_FheUint160,
-    Type_FheUint256,
-    Type_FheUint512,
-    Type_FheUint1024,
-    Type_FheUint2048,
-    Type_FheInt2,
-    Type_FheInt4,
-    Type_FheInt6,
-    Type_FheInt8,
-    Type_FheInt10,
-    Type_FheInt12,
-    Type_FheInt14,
-    Type_FheInt16,
-    Type_FheInt32,
-    Type_FheInt64,
-    Type_FheInt128,
-    Type_FheInt160,
-    Type_FheInt256,
+    Type_FheBool = 0,
+    Type_FheUint4 = 1,
+    Type_FheUint8 = 2,
+    Type_FheUint16 = 3,
+    Type_FheUint32 = 4,
+    Type_FheUint64 = 5,
+    Type_FheUint128 = 6,
+    Type_FheUint160 = 7,
+    Type_FheUint256 = 8,
+    Type_FheUint512 = 9,
+    Type_FheUint1024 = 10,
+    Type_FheUint2048 = 11,
+    Type_FheUint2 = 12,
+    Type_FheUint6 = 13,
+    Type_FheUint10 = 14,
+    Type_FheUint12 = 15,
+    Type_FheUint14 = 16,
+    Type_FheInt2 = 17,
+    Type_FheInt4 = 18,
+    Type_FheInt6 = 19,
+    Type_FheInt8 = 20,
+    Type_FheInt10 = 21,
+    Type_FheInt12 = 22,
+    Type_FheInt14 = 23,
+    Type_FheInt16 = 24,
+    Type_FheInt32 = 25,
+    Type_FheInt64 = 26,
+    Type_FheInt128 = 27,
+    Type_FheInt160 = 28,
+    Type_FheInt256 = 29,
 }

 impl From<crate::FheTypes> for FheTypes {
@@ -90,3 +90,14 @@ impl From<crate::FheTypes> for FheTypes {
        }
    }
 }
+
+#[test]
+fn fhe_types_enum_to_int_compatible() {
+    use strum::IntoEnumIterator;
+
+    for rust_value in crate::FheTypes::iter() {
+        let c_value = FheTypes::from(rust_value);
+
+        assert_eq!(rust_value as i32, c_value as i32)
+    }
+}
--- a/tfhe/src/c_api/shortint/parameters.rs
+++ b/tfhe/src/c_api/shortint/parameters.rs
@@ -326,39 +326,39 @@ expose_as_shortint_pbs_parameters!(
    PARAM_MESSAGE_3_CARRY_3_PBS_KS_GAUSSIAN_2M64,
    PARAM_MESSAGE_4_CARRY_4_PBS_KS_GAUSSIAN_2M64,
    // CPK
-    PARAM_MESSAGE_1_CARRY_1_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_1_CARRY_2_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_1_CARRY_3_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_1_CARRY_4_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_1_CARRY_5_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_1_CARRY_6_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_1_CARRY_7_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_2_CARRY_1_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_2_CARRY_3_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_2_CARRY_4_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_2_CARRY_5_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_2_CARRY_6_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_3_CARRY_1_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_3_CARRY_2_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_3_CARRY_3_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_3_CARRY_4_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_3_CARRY_5_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_4_CARRY_1_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_4_CARRY_2_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_4_CARRY_3_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_4_CARRY_4_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_5_CARRY_1_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_5_CARRY_2_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_5_CARRY_3_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_6_CARRY_1_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_6_CARRY_2_COMPACT_PK_KS_PBS,
-    PARAM_MESSAGE_7_CARRY_1_COMPACT_PK_KS_PBS,
+    PARAM_MESSAGE_1_CARRY_1_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_1_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_1_CARRY_3_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_1_CARRY_4_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_1_CARRY_5_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_1_CARRY_6_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_1_CARRY_7_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_2_CARRY_1_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_2_CARRY_3_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_2_CARRY_4_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_2_CARRY_5_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_2_CARRY_6_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_3_CARRY_1_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_3_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_3_CARRY_3_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_3_CARRY_4_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_3_CARRY_5_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_4_CARRY_1_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_4_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_4_CARRY_3_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_4_CARRY_4_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_5_CARRY_1_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_5_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_5_CARRY_3_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_6_CARRY_1_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_6_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_7_CARRY_1_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
    // CPK SMALL
-    PARAM_MESSAGE_1_CARRY_1_COMPACT_PK_PBS_KS,
-    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS,
-    PARAM_MESSAGE_3_CARRY_3_COMPACT_PK_PBS_KS,
-    PARAM_MESSAGE_4_CARRY_4_COMPACT_PK_PBS_KS,
+    PARAM_MESSAGE_1_CARRY_1_COMPACT_PK_PBS_KS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_3_CARRY_3_COMPACT_PK_PBS_KS_GAUSSIAN_2M64,
+    PARAM_MESSAGE_4_CARRY_4_COMPACT_PK_PBS_KS_GAUSSIAN_2M64,
    // TUniform
    PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
 );
--- a/tfhe/src/core_crypto/algorithms/ggsw_conversion.rs
+++ b/tfhe/src/core_crypto/algorithms/ggsw_conversion.rs
@@ -49,7 +49,7 @@ pub fn convert_standard_ggsw_ciphertext_to_fourier_mem_optimized<Scalar, InputCo
    input_ggsw: &GgswCiphertext<InputCont>,
    output_ggsw: &mut FourierGgswCiphertext<OutputCont>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    Scalar: UnsignedTorus,
    InputCont: Container<Element = Scalar>,
--- a/tfhe/src/core_crypto/algorithms/lwe_bootstrap_key_conversion.rs
+++ b/tfhe/src/core_crypto/algorithms/lwe_bootstrap_key_conversion.rs
@@ -46,7 +46,7 @@ pub fn convert_standard_lwe_bootstrap_key_to_fourier_mem_optimized<Scalar, Input
    input_bsk: &LweBootstrapKey<InputCont>,
    output_bsk: &mut FourierLweBootstrapKey<OutputCont>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    Scalar: UnsignedTorus,
    InputCont: Container<Element = Scalar>,
--- a/tfhe/src/core_crypto/algorithms/lwe_multi_bit_bootstrap_key_conversion.rs
+++ b/tfhe/src/core_crypto/algorithms/lwe_multi_bit_bootstrap_key_conversion.rs
@@ -8,7 +8,7 @@ use crate::core_crypto::entities::*;
 use crate::core_crypto::fft_impl::fft64::math::fft::{
    par_convert_polynomials_list_to_fourier, Fft, FftView,
 };
-use dyn_stack::{PodStack, ReborrowMut, SizeOverflow, StackReq};
+use dyn_stack::{PodStack, SizeOverflow, StackReq};
 use tfhe_fft::c64;

 /// Convert an [`LWE multi_bit bootstrap key`](`LweMultiBitBootstrapKey`) with standard
@@ -50,7 +50,7 @@ pub fn convert_standard_lwe_multi_bit_bootstrap_key_to_fourier_mem_optimized<
    input_bsk: &LweMultiBitBootstrapKey<InputCont>,
    output_bsk: &mut FourierLweMultiBitBootstrapKey<OutputCont>,
    fft: FftView<'_>,
-    mut stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    Scalar: UnsignedTorus,
    InputCont: Container<Element = Scalar>,
@@ -69,7 +69,7 @@ pub fn convert_standard_lwe_multi_bit_bootstrap_key_to_fourier_mem_optimized<
        .zip(input_bsk_as_polynomial_list.iter())
    {
        // SAFETY: forward_as_torus doesn't write any uninitialized values into its output
-        fft.forward_as_torus(fourier_poly, coef_poly, stack.rb_mut());
+        fft.forward_as_torus(fourier_poly, coef_poly, stack);
    }
 }

--- a/tfhe/src/core_crypto/algorithms/lwe_multi_bit_programmable_bootstrapping.rs
+++ b/tfhe/src/core_crypto/algorithms/lwe_multi_bit_programmable_bootstrapping.rs
@@ -77,10 +77,9 @@ pub struct StandardMultiBitModulusSwitchedCt<
 }

 impl<
-        'a,
        Scalar: UnsignedInteger + CastInto<usize> + CastFrom<usize>,
        C: Container<Element = Scalar> + Sync,
-    > MultiBitModulusSwitchedCt for StandardMultiBitModulusSwitchedCt<'a, Scalar, C>
+    > MultiBitModulusSwitchedCt for StandardMultiBitModulusSwitchedCt<'_, Scalar, C>
 {
    fn lwe_dimension(&self) -> LweDimension {
        self.input.lwe_size().to_lwe_dimension()
--- a/tfhe/src/core_crypto/algorithms/lwe_programmable_bootstrapping/fft128.rs
+++ b/tfhe/src/core_crypto/algorithms/lwe_programmable_bootstrapping/fft128.rs
@@ -233,7 +233,7 @@ pub fn programmable_bootstrap_f128_lwe_ciphertext_mem_optimized<
    accumulator: &GlweCiphertext<AccCont>,
    fourier_bsk: &Fourier128LweBootstrapKey<KeyCont>,
    fft: Fft128View<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    // CastInto required for PBS modulus switch which returns a usize
    Scalar: UnsignedTorus + CastInto<usize>,
--- a/tfhe/src/core_crypto/algorithms/lwe_programmable_bootstrapping/fft64.rs
+++ b/tfhe/src/core_crypto/algorithms/lwe_programmable_bootstrapping/fft64.rs
@@ -233,7 +233,7 @@ pub fn blind_rotate_assign_mem_optimized<
    lut: &mut GlweCiphertext<OutputCont>,
    fourier_bsk: &FourierLweBootstrapKey<KeyCont>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    // CastInto required for PBS modulus switch which returns a usize
    InputScalar: UnsignedTorus + CastInto<usize>,
@@ -455,7 +455,7 @@ pub fn add_external_product_assign_mem_optimized<Scalar, OutputGlweCont, InputGl
    ggsw: &FourierGgswCiphertext<GgswCont>,
    glwe: &GlweCiphertext<InputGlweCont>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    Scalar: UnsignedTorus,
    OutputGlweCont: ContainerMut<Element = Scalar>,
@@ -746,7 +746,7 @@ pub fn cmux_assign_mem_optimized<Scalar, Cont0, Cont1, GgswCont>(
    ct1: &mut GlweCiphertext<Cont1>,
    ggsw: &FourierGgswCiphertext<GgswCont>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    Scalar: UnsignedTorus,
    Cont0: ContainerMut<Element = Scalar>,
@@ -1020,7 +1020,7 @@ pub fn programmable_bootstrap_lwe_ciphertext_mem_optimized<
    accumulator: &GlweCiphertext<AccCont>,
    fourier_bsk: &FourierLweBootstrapKey<KeyCont>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    // CastInto required for PBS modulus switch which returns a usize
    InputScalar: UnsignedTorus + CastInto<usize>,
@@ -1091,7 +1091,7 @@ pub fn batch_programmable_bootstrap_lwe_ciphertext_mem_optimized<
    accumulator: &GlweCiphertextList<AccCont>,
    fourier_bsk: &FourierLweBootstrapKey<KeyCont>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    // CastInto required for PBS modulus switch which returns a usize
    InputScalar: UnsignedTorus + CastInto<usize>,
--- a/tfhe/src/core_crypto/algorithms/lwe_programmable_bootstrapping/ntt64.rs
+++ b/tfhe/src/core_crypto/algorithms/lwe_programmable_bootstrapping/ntt64.rs
@@ -18,7 +18,7 @@ use crate::core_crypto::commons::traits::*;
 use crate::core_crypto::commons::utils::izip;
 use crate::core_crypto::entities::*;
 use aligned_vec::CACHELINE_ALIGN;
-use dyn_stack::{PodStack, ReborrowMut, SizeOverflow, StackReq};
+use dyn_stack::{PodStack, SizeOverflow, StackReq};

 /// Perform a blind rotation given an input [`LWE ciphertext`](`LweCiphertext`), modifying a look-up
 /// table passed as a [`GLWE ciphertext`](`GlweCiphertext`) and an [`LWE bootstrap
@@ -209,7 +209,7 @@ pub fn blind_rotate_ntt64_assign_mem_optimized<InputCont, OutputCont, KeyCont>(
    lut: &mut GlweCiphertext<OutputCont>,
    bsk: &NttLweBootstrapKey<KeyCont>,
    ntt: Ntt64View<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    InputCont: Container<Element = u64>,
    OutputCont: ContainerMut<Element = u64>,
@@ -220,7 +220,7 @@ pub fn blind_rotate_ntt64_assign_mem_optimized<InputCont, OutputCont, KeyCont>(
        mut lut: GlweCiphertextMutView<'_, u64>,
        lwe: &[u64],
        ntt: Ntt64View<'_>,
-        mut stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        let (lwe_body, lwe_mask) = lwe.split_last().unwrap();
        let modulus = ntt.custom_modulus();
@@ -248,7 +248,7 @@ pub fn blind_rotate_ntt64_assign_mem_optimized<InputCont, OutputCont, KeyCont>(

        for (lwe_mask_element, bootstrap_key_ggsw) in izip!(lwe_mask.iter(), bsk.into_ggsw_iter()) {
            if *lwe_mask_element != 0u64 {
-                let stack = stack.rb_mut();
+                let stack = &mut *stack;
                // We copy ct_0 to ct_1
                let (ct1, stack) =
                    stack.collect_aligned(CACHELINE_ALIGN, ct0.as_ref().iter().copied());
@@ -479,7 +479,7 @@ pub fn programmable_bootstrap_ntt64_lwe_ciphertext_mem_optimized<
    accumulator: &GlweCiphertext<AccCont>,
    bsk: &NttLweBootstrapKey<KeyCont>,
    ntt: Ntt64View<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    InputCont: Container<Element = u64>,
    OutputCont: ContainerMut<Element = u64>,
@@ -492,7 +492,7 @@ pub fn programmable_bootstrap_ntt64_lwe_ciphertext_mem_optimized<
        lwe_in: LweCiphertextView<'_, u64>,
        accumulator: GlweCiphertextView<'_, u64>,
        ntt: Ntt64View<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        debug_assert_eq!(lwe_out.ciphertext_modulus(), lwe_in.ciphertext_modulus());
        debug_assert_eq!(
@@ -544,7 +544,7 @@ pub(crate) fn add_external_product_ntt64_assign<InputGlweCont>(
    ggsw: NttGgswCiphertextView<'_, u64>,
    glwe: &GlweCiphertext<InputGlweCont>,
    ntt: Ntt64View<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    InputGlweCont: Container<Element = u64>,
 {
@@ -565,7 +565,7 @@ pub(crate) fn add_external_product_ntt64_assign<InputGlweCont>(
        out.ciphertext_modulus(),
    );

-    let (output_fft_buffer, mut substack0) =
+    let (output_fft_buffer, substack0) =
        stack.make_aligned_raw::<u64>(poly_size * ggsw.glwe_size().0, align);
    // output_fft_buffer is initially uninitialized, considered to be implicitly zero, to avoid
    // the cost of filling it up with zeros. `is_output_uninit` is set to `false` once
@@ -576,18 +576,18 @@ pub(crate) fn add_external_product_ntt64_assign<InputGlweCont>(
        // ------------------------------------------------------ EXTERNAL PRODUCT IN FOURIER DOMAIN
        // In this section, we perform the external product in the ntt domain, and accumulate
        // the result in the output_fft_buffer variable.
-        let (mut decomposition, mut substack1) = TensorSignedDecompositionLendingIterNonNative::new(
+        let (mut decomposition, substack1) = TensorSignedDecompositionLendingIterNonNative::new(
            &decomposer,
            glwe.as_ref(),
            ntt.custom_modulus(),
-            substack0.rb_mut(),
+            substack0,
        );

        // We loop through the levels (we reverse to match the order of the decomposition iterator.)
        ggsw.into_levels().for_each(|ggsw_decomp_matrix| {
            // We retrieve the decomposition of this level.
-            let (glwe_level, glwe_decomp_term, mut substack2) =
-                decomposition.collect_next_term(&mut substack1, align);
+            let (glwe_level, glwe_decomp_term, substack2) =
+                decomposition.collect_next_term(substack1, align);
            let glwe_decomp_term = GlweCiphertextView::from_container(
                &*glwe_decomp_term,
                ggsw.polynomial_size(),
@@ -612,7 +612,7 @@ pub(crate) fn add_external_product_ntt64_assign<InputGlweCont>(
                glwe_decomp_term.as_polynomial_list().iter()
            )
            .for_each(|(ggsw_row, glwe_poly)| {
-                let (ntt_poly, _) = substack2.rb_mut().make_aligned_raw::<u64>(poly_size, align);
+                let (ntt_poly, _) = substack2.make_aligned_raw::<u64>(poly_size, align);
                // We perform the forward ntt transform for the glwe polynomial
                ntt.forward(PolynomialMutView::from_container(ntt_poly), glwe_poly);
                // Now we loop through the polynomials of the output, and add the
@@ -657,7 +657,7 @@ pub(crate) fn cmux_ntt64_assign(
    mut ct1: GlweCiphertextMutView<'_, u64>,
    ggsw: NttGgswCiphertextView<'_, u64>,
    ntt: Ntt64View<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) {
    izip!(ct1.as_mut(), ct0.as_ref(),).for_each(|(c1, c0)| {
        *c1 = c1.wrapping_sub_custom_mod(*c0, ntt.custom_modulus());
--- a/tfhe/src/core_crypto/algorithms/lwe_wopbs.rs
+++ b/tfhe/src/core_crypto/algorithms/lwe_wopbs.rs
@@ -327,7 +327,7 @@ pub fn extract_bits_from_lwe_ciphertext_mem_optimized<
    delta_log: DeltaLog,
    number_of_bits_to_extract: ExtractedBitsCount,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    // CastInto required for PBS modulus switch which returns a usize
    Scalar: UnsignedTorus + CastInto<usize>,
@@ -661,7 +661,7 @@ pub fn circuit_bootstrap_boolean_vertical_packing_lwe_ciphertext_list_mem_optimi
    base_log_cbs: DecompositionBaseLog,
    level_cbs: DecompositionLevelCount,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    // CastInto required for PBS modulus switch which returns a usize
    Scalar: UnsignedTorus + CastInto<usize>,
--- a/tfhe/src/core_crypto/commons/computation_buffers.rs
+++ b/tfhe/src/core_crypto/commons/computation_buffers.rs
@@ -23,7 +23,7 @@ impl ComputationBuffers {

    /// Return a `PodStack` borrowoing from the managed memory buffer for use with optimized fft
    /// primitives or other functions using `PodStack` to manage temporary memory.
-    pub fn stack(&mut self) -> PodStack<'_> {
+    pub fn stack(&mut self) -> &mut PodStack {
        PodStack::new(&mut self.memory)
    }
 }
--- a/tfhe/src/core_crypto/commons/math/decomposition/iter.rs
+++ b/tfhe/src/core_crypto/commons/math/decomposition/iter.rs
@@ -4,7 +4,7 @@ use crate::core_crypto::commons::math::decomposition::{
 };
 use crate::core_crypto::commons::numeric::UnsignedInteger;
 use crate::core_crypto::commons::parameters::{DecompositionBaseLog, DecompositionLevelCount};
-use dyn_stack::{PodStack, ReborrowMut};
+use dyn_stack::PodStack;

 /// An iterator that yields the terms of the signed decomposition of an integer.
 ///
@@ -318,8 +318,8 @@ impl<'buffers> TensorSignedDecompositionLendingIterNonNative<'buffers> {
        decomposer: &SignedDecomposerNonNative<u64>,
        input: &[u64],
        modulus: u64,
-        stack: PodStack<'buffers>,
-    ) -> (Self, PodStack<'buffers>) {
+        stack: &'buffers mut PodStack,
+    ) -> (Self, &'buffers mut PodStack) {
        let shift = modulus.ceil_ilog2() as usize - decomposer.base_log * decomposer.level_count;
        let input_size = input.len();
        let (states, stack) =
@@ -409,10 +409,9 @@ impl<'buffers> TensorSignedDecompositionLendingIterNonNative<'buffers> {
        &mut self,
        substack1: &'a mut PodStack,
        align: usize,
-    ) -> (DecompositionLevel, &'a mut [u64], PodStack<'a>) {
+    ) -> (DecompositionLevel, &'a mut [u64], &'a mut PodStack) {
        let (glwe_level, _, glwe_decomp_term) = self.next_term().unwrap();
-        let (glwe_decomp_term, substack2) =
-            substack1.rb_mut().collect_aligned(align, glwe_decomp_term);
+        let (glwe_decomp_term, substack2) = substack1.collect_aligned(align, glwe_decomp_term);
        (glwe_level, glwe_decomp_term, substack2)
    }
 }
--- a/tfhe/src/core_crypto/commons/traits/contiguous_entity_container.rs
+++ b/tfhe/src/core_crypto/commons/traits/contiguous_entity_container.rs
@@ -15,7 +15,7 @@ type WrappingFunction<'data, Element, WrappingType> = fn(
 type ChunksWrappingLendingIterator<'data, Element, WrappingType> = std::iter::Map<
    std::iter::Zip<
        std::slice::Chunks<'data, Element>,
-        itertools::RepeatN<<WrappingType as CreateFrom<&'data [Element]>>::Metadata>,
+        core::iter::RepeatN<<WrappingType as CreateFrom<&'data [Element]>>::Metadata>,
    >,
    WrappingFunction<'data, Element, WrappingType>,
 >;
@@ -23,7 +23,7 @@ type ChunksWrappingLendingIterator<'data, Element, WrappingType> = std::iter::Ma
 type ChunksExactWrappingLendingIterator<'data, Element, WrappingType> = std::iter::Map<
    std::iter::Zip<
        std::slice::ChunksExact<'data, Element>,
-        itertools::RepeatN<<WrappingType as CreateFrom<&'data [Element]>>::Metadata>,
+        core::iter::RepeatN<<WrappingType as CreateFrom<&'data [Element]>>::Metadata>,
    >,
    WrappingFunction<'data, Element, WrappingType>,
 >;
@@ -54,7 +54,7 @@ type WrappingFunctionMut<'data, Element, WrappingType> = fn(
 type ChunksWrappingLendingIteratorMut<'data, Element, WrappingType> = std::iter::Map<
    std::iter::Zip<
        std::slice::ChunksMut<'data, Element>,
-        itertools::RepeatN<<WrappingType as CreateFrom<&'data mut [Element]>>::Metadata>,
+        core::iter::RepeatN<<WrappingType as CreateFrom<&'data mut [Element]>>::Metadata>,
    >,
    WrappingFunctionMut<'data, Element, WrappingType>,
 >;
@@ -62,7 +62,7 @@ type ChunksWrappingLendingIteratorMut<'data, Element, WrappingType> = std::iter:
 type ChunksExactWrappingLendingIteratorMut<'data, Element, WrappingType> = std::iter::Map<
    std::iter::Zip<
        std::slice::ChunksExactMut<'data, Element>,
-        itertools::RepeatN<<WrappingType as CreateFrom<&'data mut [Element]>>::Metadata>,
+        core::iter::RepeatN<<WrappingType as CreateFrom<&'data mut [Element]>>::Metadata>,
    >,
    WrappingFunctionMut<'data, Element, WrappingType>,
 >;
@@ -130,7 +130,7 @@ pub trait ContiguousEntityContainer: AsRef<[Self::Element]> {
        let entity_view_pod_size = self.get_entity_view_pod_size();
        self.as_ref()
            .chunks_exact(entity_view_pod_size)
-            .zip(itertools::repeat_n(meta, entity_count))
+            .zip(core::iter::repeat_n(meta, entity_count))
            .map(|(elt, meta)| Self::EntityView::<'_>::create_from(elt, meta))
    }

@@ -219,7 +219,7 @@ pub trait ContiguousEntityContainer: AsRef<[Self::Element]> {
        let meta = self.get_self_view_creation_metadata();
        self.as_ref()
            .chunks(pod_chunk_size)
-            .zip(itertools::repeat_n(meta, entity_count))
+            .zip(core::iter::repeat_n(meta, entity_count))
            .map(|(elt, meta)| Self::SelfView::<'_>::create_from(elt, meta))
    }

@@ -240,7 +240,7 @@ pub trait ContiguousEntityContainer: AsRef<[Self::Element]> {
        let meta = self.get_self_view_creation_metadata();
        self.as_ref()
            .chunks_exact(pod_chunk_size)
-            .zip(itertools::repeat_n(meta, entity_count))
+            .zip(core::iter::repeat_n(meta, entity_count))
            .map(|(elt, meta)| Self::SelfView::<'_>::create_from(elt, meta))
    }

@@ -341,7 +341,7 @@ pub trait ContiguousEntityContainerMut: ContiguousEntityContainer + AsMut<[Self:
        let entity_view_pod_size = self.get_entity_view_pod_size();
        self.as_mut()
            .chunks_exact_mut(entity_view_pod_size)
-            .zip(itertools::repeat_n(meta, entity_count))
+            .zip(core::iter::repeat_n(meta, entity_count))
            .map(|(elt, meta)| Self::EntityMutView::<'_>::create_from(elt, meta))
    }

@@ -417,7 +417,7 @@ pub trait ContiguousEntityContainerMut: ContiguousEntityContainer + AsMut<[Self:
        let meta = self.get_self_view_creation_metadata();
        self.as_mut()
            .chunks_mut(pod_chunk_size)
-            .zip(itertools::repeat_n(meta, entity_count))
+            .zip(core::iter::repeat_n(meta, entity_count))
            .map(|(elt, meta)| Self::SelfMutView::<'_>::create_from(elt, meta))
    }

@@ -439,7 +439,7 @@ pub trait ContiguousEntityContainerMut: ContiguousEntityContainer + AsMut<[Self:
        let meta = self.get_self_view_creation_metadata();
        self.as_mut()
            .chunks_exact_mut(pod_chunk_size)
-            .zip(itertools::repeat_n(meta, entity_count))
+            .zip(core::iter::repeat_n(meta, entity_count))
            .map(|(elt, meta)| Self::SelfMutView::<'_>::create_from(elt, meta))
    }

--- a/tfhe/src/core_crypto/experimental/algorithms/glwe_fast_keyswitch.rs
+++ b/tfhe/src/core_crypto/experimental/algorithms/glwe_fast_keyswitch.rs
@@ -15,7 +15,7 @@ use crate::core_crypto::fft_impl::fft64::math::polynomial::{
    FourierPolynomialMutView, FourierPolynomialView,
 };
 use aligned_vec::CACHELINE_ALIGN;
-use dyn_stack::{PodStack, ReborrowMut, SizeOverflow, StackReq};
+use dyn_stack::{PodStack, SizeOverflow, StackReq};
 use tfhe_fft::c64;

 /// The caller must provide a properly configured [`FftView`] object and a `PodStack` used as a
@@ -156,7 +156,7 @@ pub fn glwe_fast_keyswitch<Scalar, OutputGlweCont, InputGlweCont, GgswCont>(
    pseudo_ggsw: &PseudoFourierGgswCiphertext<GgswCont>,
    glwe: &GlweCiphertext<InputGlweCont>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    Scalar: UnsignedTorus,
    OutputGlweCont: ContainerMut<Element = Scalar>,
@@ -174,7 +174,7 @@ pub fn glwe_fast_keyswitch<Scalar, OutputGlweCont, InputGlweCont, GgswCont>(
        ggsw: PseudoFourierGgswCiphertextView<'_>,
        glwe: &GlweCiphertext<InputGlweCont>,
        fft: FftView<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        Scalar: UnsignedTorus,
        InputGlweCont: Container<Element = Scalar>,
@@ -193,7 +193,7 @@ pub fn glwe_fast_keyswitch<Scalar, OutputGlweCont, InputGlweCont, GgswCont>(
            ggsw.decomposition_base_log(),
            ggsw.decomposition_level_count(),
        );
-        let (output_fft_buffer, mut substack0) =
+        let (output_fft_buffer, substack0) =
            stack.make_aligned_raw::<c64>(fourier_poly_size * ggsw.glwe_size_out().0, align);
        // output_fft_buffer is initially uninitialized, considered to be implicitly zero, to avoid
        // the cost of filling it up with zeros. `is_output_uninit` is set to `false` once
@@ -204,21 +204,21 @@ pub fn glwe_fast_keyswitch<Scalar, OutputGlweCont, InputGlweCont, GgswCont>(
            // ------------ EXTERNAL PRODUCT IN FOURIER DOMAIN
            // In this section, we perform the external product in the fourier
            // domain, and accumulate the result in the output_fft_buffer variable.
-            let (mut decomposition, mut substack1) = TensorSignedDecompositionLendingIter::new(
+            let (mut decomposition, substack1) = TensorSignedDecompositionLendingIter::new(
                glwe.as_ref()
                    .iter()
                    .map(|s| decomposer.init_decomposer_state(*s)),
                DecompositionBaseLog(decomposer.base_log),
                DecompositionLevelCount(decomposer.level_count),
-                substack0.rb_mut(),
+                substack0,
            );

            // We loop through the levels (we reverse to match the order of the decomposition
            // iterator.)
            ggsw.into_levels().for_each(|ggsw_decomp_matrix| {
                // We retrieve the decomposition of this level.
-                let (glwe_level, glwe_decomp_term, mut substack2) =
-                    collect_next_term(&mut decomposition, &mut substack1, align);
+                let (glwe_level, glwe_decomp_term, substack2) =
+                    collect_next_term(&mut decomposition, substack1, align);
                let glwe_decomp_term = GlweCiphertextView::from_container(
                    &*glwe_decomp_term,
                    ggsw.polynomial_size(),
@@ -243,9 +243,8 @@ pub fn glwe_fast_keyswitch<Scalar, OutputGlweCont, InputGlweCont, GgswCont>(
                    glwe_decomp_term.get_mask().as_polynomial_list().iter()
                )
                .for_each(|(ggsw_row, glwe_poly)| {
-                    let (fourier, substack3) = substack2
-                        .rb_mut()
-                        .make_aligned_raw::<c64>(fourier_poly_size, align);
+                    let (fourier, substack3) =
+                        substack2.make_aligned_raw::<c64>(fourier_poly_size, align);

                    // We perform the forward fft transform for the glwe polynomial
                    let fourier = fft
@@ -285,7 +284,7 @@ pub fn glwe_fast_keyswitch<Scalar, OutputGlweCont, InputGlweCont, GgswCont>(
                    .map(|slice| FourierPolynomialView { data: slice }),
            )
            .for_each(|(out, fourier)| {
-                fft.add_backward_as_torus(out, fourier, substack0.rb_mut());
+                fft.add_backward_as_torus(out, fourier, substack0);
            });
        }

--- a/tfhe/src/core_crypto/experimental/algorithms/pseudo_ggsw_conversion.rs
+++ b/tfhe/src/core_crypto/experimental/algorithms/pseudo_ggsw_conversion.rs
@@ -52,7 +52,7 @@ pub fn convert_standard_pseudo_ggsw_ciphertext_to_fourier_mem_optimized<
    input_ggsw: &PseudoGgswCiphertext<InputCont>,
    output_ggsw: &mut PseudoFourierGgswCiphertext<OutputCont>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    Scalar: UnsignedTorus,
    InputCont: Container<Element = Scalar>,
--- a/tfhe/src/core_crypto/experimental/entities/fourier_pseudo_ggsw_ciphertext.rs
+++ b/tfhe/src/core_crypto/experimental/entities/fourier_pseudo_ggsw_ciphertext.rs
@@ -11,7 +11,7 @@ use crate::core_crypto::fft_impl::fft64::math::decomposition::DecompositionLevel
 use crate::core_crypto::fft_impl::fft64::math::fft::{FftView, FourierPolynomialList};
 use crate::core_crypto::fft_impl::fft64::math::polynomial::FourierPolynomialMutView;
 use aligned_vec::{avec, ABox};
-use dyn_stack::{PodStack, ReborrowMut, SizeOverflow, StackReq};
+use dyn_stack::{PodStack, SizeOverflow, StackReq};
 use tfhe_fft::c64;

 /// A pseudo GGSW ciphertext in the Fourier domain.
@@ -263,7 +263,7 @@ pub fn fill_with_forward_fourier_scratch(fft: FftView<'_>) -> Result<StackReq, S
    fft.forward_scratch()
 }

-impl<'a> PseudoFourierGgswCiphertextMutView<'a> {
+impl PseudoFourierGgswCiphertextMutView<'_> {
    /// Fill a GGSW ciphertext with the Fourier transform of a GGSW ciphertext in the standard
    /// domain.
    pub fn fill_with_forward_fourier<
@@ -273,7 +273,7 @@ impl<'a> PseudoFourierGgswCiphertextMutView<'a> {
        self,
        coef_ggsw: &PseudoGgswCiphertext<InputCont>,
        fft: FftView<'_>,
-        mut stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        debug_assert_eq!(coef_ggsw.polynomial_size(), self.polynomial_size());
        let fourier_poly_size = coef_ggsw.polynomial_size().to_fourier_polynomial_size().0;
@@ -285,7 +285,7 @@ impl<'a> PseudoFourierGgswCiphertextMutView<'a> {
            fft.forward_as_torus(
                FourierPolynomialMutView { data: fourier_poly },
                coef_poly,
-                stack.rb_mut(),
+                stack,
            );
        }
    }
--- a/tfhe/src/core_crypto/fft_impl/common.rs
+++ b/tfhe/src/core_crypto/fft_impl/common.rs
@@ -43,7 +43,7 @@ pub trait FourierBootstrapKey<Scalar: UnsignedInteger> {
        &mut self,
        coef_bsk: &LweBootstrapKey<ContBsk>,
        fft: &Self::Fft,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        ContBsk: Container<Element = Scalar>;

@@ -59,7 +59,7 @@ pub trait FourierBootstrapKey<Scalar: UnsignedInteger> {
        lwe_in: &LweCiphertext<ContLweIn>,
        accumulator: &GlweCiphertext<ContAcc>,
        fft: &Self::Fft,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        ContLweOut: ContainerMut<Element = Scalar>,
        ContLweIn: Container<Element = Scalar>,
--- a/tfhe/src/core_crypto/fft_impl/fft128/crypto/bootstrap.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft128/crypto/bootstrap.rs
@@ -20,7 +20,7 @@ use crate::core_crypto::prelude::ContainerMut;
 use aligned_vec::{avec, ABox, CACHELINE_ALIGN};
 use core::any::TypeId;
 use core::mem::transmute;
-use dyn_stack::{PodStack, ReborrowMut, SizeOverflow, StackReq};
+use dyn_stack::{PodStack, SizeOverflow, StackReq};
 use tfhe_versionable::Versionize;

 #[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize, Versionize)]
@@ -250,7 +250,7 @@ where
        lut: &mut GlweCiphertext<ContLut>,
        lwe: &LweCiphertext<ContLwe>,
        fft: Fft128View<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        Scalar: UnsignedTorus + CastInto<usize>,
        ContLut: ContainerMut<Element = Scalar>,
@@ -261,7 +261,7 @@ where
            mut lut: GlweCiphertext<&mut [Scalar]>,
            lwe: LweCiphertext<&[Scalar]>,
            fft: Fft128View<'_>,
-            mut stack: PodStack<'_>,
+            stack: &mut PodStack,
        ) {
            let lwe = lwe.as_ref();
            let (lwe_body, lwe_mask) = lwe.split_last().unwrap();
@@ -287,7 +287,7 @@ where
                izip!(lwe_mask.iter(), this.into_ggsw_iter())
            {
                if *lwe_mask_element != Scalar::ZERO {
-                    let stack = stack.rb_mut();
+                    let stack = &mut *stack;
                    // We copy ct_0 to ct_1
                    let (ct1, stack) =
                        stack.collect_aligned(CACHELINE_ALIGN, ct0.as_ref().iter().copied());
@@ -335,7 +335,7 @@ where
        lwe_in: &LweCiphertext<ContLweIn>,
        accumulator: &GlweCiphertext<ContAcc>,
        fft: Fft128View<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        // CastInto required for PBS modulus switch which returns a usize
        Scalar: UnsignedTorus + CastInto<usize>,
@@ -349,7 +349,7 @@ where
            lwe_in: LweCiphertext<&[Scalar]>,
            accumulator: GlweCiphertext<&[Scalar]>,
            fft: Fft128View<'_>,
-            stack: PodStack<'_>,
+            stack: &mut PodStack,
        ) {
            // We type check dynamically with TypeId
            #[allow(clippy::transmute_undefined_repr)]
@@ -417,7 +417,7 @@ where
        &mut self,
        coef_bsk: &LweBootstrapKey<ContBsk>,
        fft: &Self::Fft,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        ContBsk: Container<Element = Scalar>,
    {
@@ -440,7 +440,7 @@ where
        lwe_in: &LweCiphertext<ContLweIn>,
        accumulator: &GlweCiphertext<ContAcc>,
        fft: &Self::Fft,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        ContLweOut: ContainerMut<Element = Scalar>,
        ContLweIn: Container<Element = Scalar>,
--- a/tfhe/src/core_crypto/fft_impl/fft128/crypto/ggsw.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft128/crypto/ggsw.rs
@@ -16,7 +16,7 @@ use crate::core_crypto::entities::glwe_ciphertext::{GlweCiphertext, GlweCipherte
 use crate::core_crypto::fft_impl::fft64::math::decomposition::TensorSignedDecompositionLendingIter;
 use crate::core_crypto::prelude::ContainerMut;
 use aligned_vec::CACHELINE_ALIGN;
-use dyn_stack::{PodStack, ReborrowMut, SizeOverflow, StackReq};
+use dyn_stack::{PodStack, SizeOverflow, StackReq};
 use tfhe_fft::fft128::f128;
 use tfhe_versionable::Versionize;

@@ -365,7 +365,7 @@ pub fn add_external_product_assign<Scalar, ContOut, ContGgsw, ContGlwe>(
    ggsw: &Fourier128GgswCiphertext<ContGgsw>,
    glwe: &GlweCiphertext<ContGlwe>,
    fft: Fft128View<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    Scalar: UnsignedTorus,
    ContOut: ContainerMut<Element = Scalar>,
@@ -377,7 +377,7 @@ pub fn add_external_product_assign<Scalar, ContOut, ContGgsw, ContGlwe>(
        ggsw: Fourier128GgswCiphertext<&[f64]>,
        glwe: GlweCiphertext<&[Scalar]>,
        fft: Fft128View<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        // we check that the polynomial sizes match
        debug_assert_eq!(ggsw.polynomial_size(), glwe.polynomial_size());
@@ -404,7 +404,7 @@ pub fn add_external_product_assign<Scalar, ContOut, ContGgsw, ContGlwe>(
            stack.make_aligned_raw::<f64>(fourier_poly_size * ggsw.glwe_size().0, align);
        let (output_fft_buffer_im0, stack) =
            stack.make_aligned_raw::<f64>(fourier_poly_size * ggsw.glwe_size().0, align);
-        let (output_fft_buffer_im1, mut substack0) =
+        let (output_fft_buffer_im1, substack0) =
            stack.make_aligned_raw::<f64>(fourier_poly_size * ggsw.glwe_size().0, align);

        // output_fft_buffer is initially uninitialized, considered to be implicitly zero, to avoid
@@ -416,21 +416,21 @@ pub fn add_external_product_assign<Scalar, ContOut, ContGgsw, ContGlwe>(
            // ------------------------------------------------------ EXTERNAL PRODUCT IN FOURIER
            // DOMAIN In this section, we perform the external product in the fourier
            // domain, and accumulate the result in the output_fft_buffer variable.
-            let (mut decomposition, mut substack1) = TensorSignedDecompositionLendingIter::new(
+            let (mut decomposition, substack1) = TensorSignedDecompositionLendingIter::new(
                glwe.as_ref()
                    .iter()
                    .map(|s| decomposer.init_decomposer_state(*s)),
                DecompositionBaseLog(decomposer.base_log),
                DecompositionLevelCount(decomposer.level_count),
-                substack0.rb_mut(),
+                substack0,
            );

            // We loop through the levels (we reverse to match the order of the decomposition
            // iterator.)
            for ggsw_decomp_matrix in ggsw.into_levels() {
                // We retrieve the decomposition of this level.
-                let (glwe_level, glwe_decomp_term, mut substack2) =
-                    collect_next_term(&mut decomposition, &mut substack1, align);
+                let (glwe_level, glwe_decomp_term, substack2) =
+                    collect_next_term(&mut decomposition, substack1, align);
                let glwe_decomp_term = GlweCiphertextView::from_container(
                    &*glwe_decomp_term,
                    ggsw.polynomial_size(),
@@ -455,7 +455,7 @@ pub fn add_external_product_assign<Scalar, ContOut, ContGgsw, ContGlwe>(
                    glwe_decomp_term.as_polynomial_list().iter()
                ) {
                    let len = fourier_poly_size;
-                    let stack = substack2.rb_mut();
+                    let stack = &mut *substack2;
                    let (fourier_re0, stack) = stack.make_aligned_raw::<f64>(len, align);
                    let (fourier_re1, stack) = stack.make_aligned_raw::<f64>(len, align);
                    let (fourier_im0, stack) = stack.make_aligned_raw::<f64>(len, align);
@@ -509,7 +509,7 @@ pub fn add_external_product_assign<Scalar, ContOut, ContGgsw, ContGlwe>(
                    fourier_re1,
                    fourier_im0,
                    fourier_im1,
-                    substack0.rb_mut(),
+                    substack0,
                );
            }
        }
@@ -528,9 +528,9 @@ fn collect_next_term<'a, Scalar: UnsignedTorus>(
    decomposition: &mut TensorSignedDecompositionLendingIter<'_, Scalar>,
    substack1: &'a mut PodStack,
    align: usize,
-) -> (DecompositionLevel, &'a mut [Scalar], PodStack<'a>) {
+) -> (DecompositionLevel, &'a mut [Scalar], &'a mut PodStack) {
    let (glwe_level, _, glwe_decomp_term) = decomposition.next_term().unwrap();
-    let (glwe_decomp_term, substack2) = substack1.rb_mut().collect_aligned(align, glwe_decomp_term);
+    let (glwe_decomp_term, substack2) = substack1.collect_aligned(align, glwe_decomp_term);
    (glwe_level, glwe_decomp_term, substack2)
 }

@@ -767,7 +767,7 @@ pub fn cmux<Scalar, ContCt0, ContCt1, ContGgsw>(
    ct1: &mut GlweCiphertext<ContCt1>,
    ggsw: &Fourier128GgswCiphertext<ContGgsw>,
    fft: Fft128View<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    Scalar: UnsignedTorus,
    ContCt0: ContainerMut<Element = Scalar>,
@@ -779,7 +779,7 @@ pub fn cmux<Scalar, ContCt0, ContCt1, ContGgsw>(
        mut ct1: GlweCiphertext<&mut [Scalar]>,
        ggsw: Fourier128GgswCiphertext<&[f64]>,
        fft: Fft128View<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        for (c1, c0) in izip!(ct1.as_mut(), ct0.as_ref()) {
            *c1 = c1.wrapping_sub(*c0);
--- a/tfhe/src/core_crypto/fft_impl/fft128/math/fft/mod.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft128/math/fft/mod.rs
@@ -346,7 +346,7 @@ fn convert_backward_torus<Scalar: UnsignedTorus>(
    }
 }

-impl<'a> Fft128View<'a> {
+impl Fft128View<'_> {
    pub fn polynomial_size(self) -> PolynomialSize {
        PolynomialSize(2 * self.plan.fft_size())
    }
@@ -437,7 +437,7 @@ impl<'a> Fft128View<'a> {
        fourier_re1: &[f64],
        fourier_im0: &[f64],
        fourier_im1: &[f64],
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        self.backward_with_conv(
            standard,
@@ -463,7 +463,7 @@ impl<'a> Fft128View<'a> {
        fourier_re1: &[f64],
        fourier_im0: &[f64],
        fourier_im1: &[f64],
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        self.backward_with_conv(
            standard,
@@ -487,7 +487,7 @@ impl<'a> Fft128View<'a> {
        fourier_im0: &[f64],
        fourier_im1: &[f64],
        conv_fn: F,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        let n = standard.len();
        debug_assert_eq!(n, 2 * fourier_re0.len());
--- a/tfhe/src/core_crypto/fft_impl/fft128/math/fft/tests.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft128/math/fft/tests.rs
@@ -1,7 +1,7 @@
 use super::*;
 use crate::core_crypto::commons::test_tools::{modular_distance, new_random_generator};
 use aligned_vec::avec;
-use dyn_stack::{GlobalPodBuffer, ReborrowMut};
+use dyn_stack::GlobalPodBuffer;

 fn test_roundtrip<Scalar: UnsignedTorus>() {
    let mut generator = new_random_generator();
@@ -24,7 +24,7 @@ fn test_roundtrip<Scalar: UnsignedTorus>() {
        }

        let mut mem = GlobalPodBuffer::new(fft.backward_scratch().unwrap());
-        let mut stack = PodStack::new(&mut mem);
+        let stack = PodStack::new(&mut mem);

        fft.forward_as_torus(
            &mut fourier_re0,
@@ -39,7 +39,7 @@ fn test_roundtrip<Scalar: UnsignedTorus>() {
            &fourier_re1,
            &fourier_im0,
            &fourier_im1,
-            stack.rb_mut(),
+            stack,
        );

        for (expected, actual) in izip!(poly.as_ref().iter(), roundtrip.as_ref().iter()) {
@@ -111,7 +111,7 @@ fn test_product<Scalar: UnsignedTorus>() {
            }

            let mut mem = GlobalPodBuffer::new(fft.backward_scratch().unwrap());
-            let mut stack = PodStack::new(&mut mem);
+            let stack = PodStack::new(&mut mem);

            fft.forward_as_torus(
                &mut fourier0_re0,
@@ -153,7 +153,7 @@ fn test_product<Scalar: UnsignedTorus>() {
                &fourier0_re1,
                &fourier0_im0,
                &fourier0_im1,
-                stack.rb_mut(),
+                stack,
            );
            convolution_naive(
                convolution_from_naive.as_mut(),
--- a/tfhe/src/core_crypto/fft_impl/fft128_u128/crypto/bootstrap.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft128_u128/crypto/bootstrap.rs
@@ -11,7 +11,7 @@ use crate::core_crypto::entities::*;
 use crate::core_crypto::fft_impl::common::pbs_modulus_switch;
 use crate::core_crypto::prelude::{Container, ContainerMut};
 use aligned_vec::CACHELINE_ALIGN;
-use dyn_stack::{PodStack, ReborrowMut};
+use dyn_stack::PodStack;

 pub fn polynomial_wrapping_monic_monomial_mul_assign_split(
    output_lo: Polynomial<&mut [u64]>,
@@ -64,7 +64,7 @@ where
        lut_hi: &mut GlweCiphertext<ContLutHi>,
        lwe: &LweCiphertext<ContLwe>,
        fft: Fft128View<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        ContLutLo: ContainerMut<Element = u64>,
        ContLutHi: ContainerMut<Element = u64>,
@@ -76,7 +76,7 @@ where
            mut lut_hi: GlweCiphertext<&mut [u64]>,
            lwe: LweCiphertext<&[u128]>,
            fft: Fft128View<'_>,
-            mut stack: PodStack<'_>,
+            stack: &mut PodStack,
        ) {
            let lwe = lwe.as_ref();
            let (lwe_body, lwe_mask) = lwe.split_last().unwrap();
@@ -103,7 +103,7 @@ where
                izip!(lwe_mask.iter(), this.into_ggsw_iter())
            {
                if *lwe_mask_element != 0 {
-                    let stack = stack.rb_mut();
+                    let stack = &mut *stack;
                    // We copy ct_0 to ct_1
                    let (ct1_lo, stack) =
                        stack.collect_aligned(CACHELINE_ALIGN, ct0_lo.as_ref().iter().copied());
@@ -160,7 +160,7 @@ where
        lwe_in: &LweCiphertext<ContLweIn>,
        accumulator: &GlweCiphertext<ContAcc>,
        fft: Fft128View<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        ContLweOut: ContainerMut<Element = u128>,
        ContLweIn: Container<Element = u128>,
@@ -172,14 +172,14 @@ where
            lwe_in: LweCiphertext<&[u128]>,
            accumulator: GlweCiphertext<&[u128]>,
            fft: Fft128View<'_>,
-            stack: PodStack<'_>,
+            stack: &mut PodStack,
        ) {
            let align = CACHELINE_ALIGN;
            let ciphertext_modulus = accumulator.ciphertext_modulus();

            let (local_accumulator_lo, stack) =
                stack.collect_aligned(align, accumulator.as_ref().iter().map(|i| *i as u64));
-            let (local_accumulator_hi, mut stack) = stack.collect_aligned(
+            let (local_accumulator_hi, stack) = stack.collect_aligned(
                align,
                accumulator.as_ref().iter().map(|i| (*i >> 64) as u64),
            );
@@ -205,7 +205,7 @@ where
                &mut local_accumulator_hi,
                &lwe_in,
                fft,
-                stack.rb_mut(),
+                stack,
            );
            let (local_accumulator, _) = stack.collect_aligned(
                align,
--- a/tfhe/src/core_crypto/fft_impl/fft128_u128/crypto/ggsw.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft128_u128/crypto/ggsw.rs
@@ -9,7 +9,7 @@ use crate::core_crypto::entities::*;
 use crate::core_crypto::fft_impl::fft128::crypto::ggsw::update_with_fmadd;
 use crate::core_crypto::prelude::{Container, ContainerMut, SignedDecomposer};
 use aligned_vec::CACHELINE_ALIGN;
-use dyn_stack::{PodStack, ReborrowMut};
+use dyn_stack::PodStack;

 #[cfg_attr(feature = "__profiling", inline(never))]
 pub fn add_external_product_assign_split<ContOutLo, ContOutHi, ContGgsw, ContGlweLo, ContGlweHi>(
@@ -19,7 +19,7 @@ pub fn add_external_product_assign_split<ContOutLo, ContOutHi, ContGgsw, ContGlw
    glwe_lo: &GlweCiphertext<ContGlweLo>,
    glwe_hi: &GlweCiphertext<ContGlweHi>,
    fft: Fft128View<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    ContOutLo: ContainerMut<Element = u64>,
    ContOutHi: ContainerMut<Element = u64>,
@@ -34,7 +34,7 @@ pub fn add_external_product_assign_split<ContOutLo, ContOutHi, ContGgsw, ContGlw
        glwe_lo: GlweCiphertext<&[u64]>,
        glwe_hi: GlweCiphertext<&[u64]>,
        fft: Fft128View<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        // we check that the polynomial sizes match
        debug_assert_eq!(ggsw.polynomial_size(), glwe_lo.polynomial_size());
@@ -69,7 +69,7 @@ pub fn add_external_product_assign_split<ContOutLo, ContOutHi, ContGgsw, ContGlw
            stack.make_aligned_raw::<f64>(fourier_poly_size * ggsw.glwe_size().0, align);
        let (output_fft_buffer_im0, stack) =
            stack.make_aligned_raw::<f64>(fourier_poly_size * ggsw.glwe_size().0, align);
-        let (output_fft_buffer_im1, mut substack0) =
+        let (output_fft_buffer_im1, substack0) =
            stack.make_aligned_raw::<f64>(fourier_poly_size * ggsw.glwe_size().0, align);

        // output_fft_buffer is initially uninitialized, considered to be implicitly zero, to avoid
@@ -81,10 +81,9 @@ pub fn add_external_product_assign_split<ContOutLo, ContOutHi, ContGgsw, ContGlw
            // ------------------------------------------------------ EXTERNAL PRODUCT IN FOURIER
            // DOMAIN In this section, we perform the external product in the fourier
            // domain, and accumulate the result in the output_fft_buffer variable.
-            let (decomposition_states_lo, stack) = substack0
-                .rb_mut()
-                .make_aligned_raw::<u64>(poly_size * glwe_size, align);
-            let (decomposition_states_hi, mut substack1) =
+            let (decomposition_states_lo, stack) =
+                substack0.make_aligned_raw::<u64>(poly_size * glwe_size, align);
+            let (decomposition_states_hi, substack1) =
                stack.make_aligned_raw::<u64>(poly_size * glwe_size, align);

            for (out_lo, out_hi, in_lo, in_hi) in izip!(
@@ -113,10 +112,9 @@ pub fn add_external_product_assign_split<ContOutLo, ContOutHi, ContGgsw, ContGlw
                assert_ne!(current_level, 0);
                let glwe_level = DecompositionLevel(current_level);
                current_level -= 1;
-                let (glwe_decomp_term_lo, stack) = substack1
-                    .rb_mut()
-                    .make_aligned_raw::<u64>(poly_size * glwe_size, align);
-                let (glwe_decomp_term_hi, mut substack2) =
+                let (glwe_decomp_term_lo, stack) =
+                    substack1.make_aligned_raw::<u64>(poly_size * glwe_size, align);
+                let (glwe_decomp_term_hi, substack2) =
                    stack.make_aligned_raw::<u64>(poly_size * glwe_size, align);

                let base_log = decomposer.base_log;
@@ -161,7 +159,7 @@ pub fn add_external_product_assign_split<ContOutLo, ContOutHi, ContGgsw, ContGlw
                    glwe_decomp_term_hi.as_polynomial_list().iter(),
                ) {
                    let len = fourier_poly_size;
-                    let stack = substack2.rb_mut();
+                    let stack = &mut *substack2;
                    let (fourier_re0, stack) = stack.make_aligned_raw::<f64>(len, align);
                    let (fourier_re1, stack) = stack.make_aligned_raw::<f64>(len, align);
                    let (fourier_im0, stack) = stack.make_aligned_raw::<f64>(len, align);
@@ -219,7 +217,7 @@ pub fn add_external_product_assign_split<ContOutLo, ContOutHi, ContGgsw, ContGlw
                    fourier_re1,
                    fourier_im0,
                    fourier_im1,
-                    substack0.rb_mut(),
+                    substack0,
                );
            }
        }
@@ -612,7 +610,7 @@ pub fn cmux_split<ContCt0Lo, ContCt0Hi, ContCt1Lo, ContCt1Hi, ContGgsw>(
    ct1_hi: &mut GlweCiphertext<ContCt1Hi>,
    ggsw: &Fourier128GgswCiphertext<ContGgsw>,
    fft: Fft128View<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    ContCt0Lo: ContainerMut<Element = u64>,
    ContCt0Hi: ContainerMut<Element = u64>,
@@ -627,7 +625,7 @@ pub fn cmux_split<ContCt0Lo, ContCt0Hi, ContCt1Lo, ContCt1Hi, ContGgsw>(
        mut ct1_hi: GlweCiphertext<&mut [u64]>,
        ggsw: Fourier128GgswCiphertext<&[f64]>,
        fft: Fft128View<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        for (c1_lo, c1_hi, c0_lo, c0_hi) in izip!(
            ct1_lo.as_mut(),
--- a/tfhe/src/core_crypto/fft_impl/fft128_u128/crypto/tests.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft128_u128/crypto/tests.rs
@@ -6,7 +6,7 @@ use crate::core_crypto::fft_impl::common::tests::{
 use crate::core_crypto::prelude::test::{TestResources, FFT128_U128_PARAMS};
 use crate::core_crypto::prelude::*;
 use aligned_vec::CACHELINE_ALIGN;
-use dyn_stack::{GlobalPodBuffer, PodStack, ReborrowMut};
+use dyn_stack::{GlobalPodBuffer, PodStack};

 #[test]
 fn test_split_external_product() {
@@ -177,7 +177,7 @@ fn test_split_pbs() {
        )
        .unwrap(),
    );
-    let mut stack = PodStack::new(&mut mem);
+    let stack = PodStack::new(&mut mem);

    for _ in 0..20 {
        for x in lwe_in.as_mut() {
@@ -203,7 +203,7 @@ fn test_split_pbs() {
            lwe_in: LweCiphertext<&[Scalar]>,
            accumulator: GlweCiphertext<&[Scalar]>,
            fft: Fft128View<'_>,
-            stack: PodStack<'_>,
+            stack: &mut PodStack,
        ) {
            let (local_accumulator_data, stack) =
                stack.collect_aligned(CACHELINE_ALIGN, accumulator.as_ref().iter().copied());
@@ -226,7 +226,7 @@ fn test_split_pbs() {
            lwe_in.as_view(),
            accumulator.as_view(),
            fft,
-            stack.rb_mut(),
+            stack,
        );

        let mut lwe_out_split = LweCiphertext::new(
@@ -236,13 +236,7 @@ fn test_split_pbs() {
                .to_lwe_size(),
            ciphertext_modulus,
        );
-        fourier_bsk.bootstrap_u128(
-            &mut lwe_out_split,
-            &lwe_in,
-            &accumulator,
-            fft,
-            stack.rb_mut(),
-        );
+        fourier_bsk.bootstrap_u128(&mut lwe_out_split, &lwe_in, &accumulator, fft, stack);

        assert_eq!(lwe_out_split, lwe_out_non_split);
    }
--- a/tfhe/src/core_crypto/fft_impl/fft128_u128/math/fft/mod.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft128_u128/math/fft/mod.rs
@@ -1218,7 +1218,7 @@ pub fn convert_add_backward_torus(
    );
 }

-impl<'a> Fft128View<'a> {
+impl Fft128View<'_> {
    pub fn forward_as_integer_split(
        self,
        fourier_re0: &mut [f64],
@@ -1253,7 +1253,7 @@ impl<'a> Fft128View<'a> {
        fourier_re1: &[f64],
        fourier_im0: &[f64],
        fourier_im1: &[f64],
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        self.backward_with_conv_split(
            standard_lo,
@@ -1308,7 +1308,7 @@ impl<'a> Fft128View<'a> {
        fourier_im0: &[f64],
        fourier_im1: &[f64],
        conv_fn: impl Fn(&mut [u64], &mut [u64], &mut [u64], &mut [u64], &[f64], &[f64], &[f64], &[f64]),
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        let n = standard_lo.len();
        debug_assert_eq!(n, 2 * fourier_re0.len());
--- a/tfhe/src/core_crypto/fft_impl/fft64/crypto/bootstrap.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft64/crypto/bootstrap.rs
@@ -20,7 +20,7 @@ use crate::core_crypto::fft_impl::common::{pbs_modulus_switch, FourierBootstrapK
 use crate::core_crypto::fft_impl::fft64::math::fft::par_convert_polynomials_list_to_fourier;
 use crate::core_crypto::prelude::{CiphertextCount, CiphertextModulus, ContainerMut};
 use aligned_vec::{avec, ABox, CACHELINE_ALIGN};
-use dyn_stack::{PodStack, ReborrowMut, SizeOverflow, StackReq};
+use dyn_stack::{PodStack, SizeOverflow, StackReq};
 use tfhe_fft::c64;
 use tfhe_versionable::Versionize;

@@ -184,19 +184,19 @@ pub fn fill_with_forward_fourier_scratch(fft: FftView<'_>) -> Result<StackReq, S
    fft.forward_scratch()
 }

-impl<'a> FourierLweBootstrapKeyMutView<'a> {
+impl FourierLweBootstrapKeyMutView<'_> {
    /// Fill a bootstrapping key with the Fourier transform of a bootstrapping key in the standard
    /// domain.
    pub fn fill_with_forward_fourier<Scalar: UnsignedTorus>(
        mut self,
        coef_bsk: LweBootstrapKey<&'_ [Scalar]>,
        fft: FftView<'_>,
-        mut stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        for (fourier_ggsw, standard_ggsw) in
            izip!(self.as_mut_view().into_ggsw_iter(), coef_bsk.iter())
        {
-            fourier_ggsw.fill_with_forward_fourier(standard_ggsw, fft, stack.rb_mut());
+            fourier_ggsw.fill_with_forward_fourier(standard_ggsw, fft, stack);
        }
    }
    /// Fill a bootstrapping key with the Fourier transform of a bootstrapping key in the standard
@@ -281,14 +281,14 @@ pub fn batch_bootstrap_scratch<Scalar>(
        )?)
 }

-impl<'a> FourierLweBootstrapKeyView<'a> {
+impl FourierLweBootstrapKeyView<'_> {
    // CastInto required for PBS modulus switch which returns a usize
    pub fn blind_rotate_assign<InputScalar, OutputScalar>(
        self,
        mut lut: GlweCiphertextMutView<'_, OutputScalar>,
        lwe: LweCiphertextView<'_, InputScalar>,
        fft: FftView<'_>,
-        mut stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        InputScalar: UnsignedTorus + CastInto<usize>,
        OutputScalar: UnsignedTorus,
@@ -303,9 +303,7 @@ impl<'a> FourierLweBootstrapKeyView<'a> {
        lut.as_mut_polynomial_list()
            .iter_mut()
            .for_each(|mut poly| {
-                let (tmp_poly, _) = stack
-                    .rb_mut()
-                    .make_aligned_raw(poly.as_ref().len(), CACHELINE_ALIGN);
+                let (tmp_poly, _) = stack.make_aligned_raw(poly.as_ref().len(), CACHELINE_ALIGN);

                let mut tmp_poly = Polynomial::from_container(&mut *tmp_poly);
                tmp_poly.as_mut().copy_from_slice(poly.as_ref());
@@ -314,7 +312,7 @@ impl<'a> FourierLweBootstrapKeyView<'a> {

        // We initialize the ct_0 used for the successive cmuxes
        let mut ct0 = lut;
-        let (ct1, mut stack) = stack.make_aligned_raw(ct0.as_ref().len(), CACHELINE_ALIGN);
+        let (ct1, stack) = stack.make_aligned_raw(ct0.as_ref().len(), CACHELINE_ALIGN);
        let mut ct1 =
            GlweCiphertextMutView::from_container(&mut *ct1, lut_poly_size, ciphertext_modulus);

@@ -349,7 +347,7 @@ impl<'a> FourierLweBootstrapKeyView<'a> {
                    bootstrap_key_ggsw,
                    ct1.as_view(),
                    fft,
-                    stack.rb_mut(),
+                    stack,
                );
            }
        }
@@ -375,7 +373,7 @@ impl<'a> FourierLweBootstrapKeyView<'a> {
        mut lut_list: GlweCiphertextListMutView<'_, OutputScalar>,
        lwe_list: LweCiphertextListView<'_, InputScalar>,
        fft: FftView<'_>,
-        mut stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        InputScalar: UnsignedTorus + CastInto<usize>,
        OutputScalar: UnsignedTorus,
@@ -393,9 +391,8 @@ impl<'a> FourierLweBootstrapKeyView<'a> {
            lut.as_mut_polynomial_list()
                .iter_mut()
                .for_each(|mut poly| {
-                    let (tmp_poly, _) = stack
-                        .rb_mut()
-                        .make_aligned_raw(poly.as_ref().len(), CACHELINE_ALIGN);
+                    let (tmp_poly, _) =
+                        stack.make_aligned_raw(poly.as_ref().len(), CACHELINE_ALIGN);

                    let mut tmp_poly = Polynomial::from_container(&mut *tmp_poly);
                    tmp_poly.as_mut().copy_from_slice(poly.as_ref());
@@ -405,8 +402,7 @@ impl<'a> FourierLweBootstrapKeyView<'a> {

        // We initialize the ct_0 used for the successive cmuxes
        let mut ct0_list = lut_list;
-        let (ct1_list, mut stack) =
-            stack.make_aligned_raw(ct0_list.as_ref().len(), CACHELINE_ALIGN);
+        let (ct1_list, stack) = stack.make_aligned_raw(ct0_list.as_ref().len(), CACHELINE_ALIGN);
        let mut ct1_list = GlweCiphertextListMutView::from_container(
            &mut *ct1_list,
            ct0_list.glwe_size(),
@@ -450,7 +446,7 @@ impl<'a> FourierLweBootstrapKeyView<'a> {
                        bootstrap_key_ggsw,
                        ct1.as_view(),
                        fft,
-                        stack.rb_mut(),
+                        stack,
                    );
                }
            }
@@ -478,7 +474,7 @@ impl<'a> FourierLweBootstrapKeyView<'a> {
        lwe_in: LweCiphertextView<'_, InputScalar>,
        accumulator: GlweCiphertextView<'_, OutputScalar>,
        fft: FftView<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        // CastInto required for PBS modulus switch which returns a usize
        InputScalar: UnsignedTorus + CastInto<usize>,
@@ -518,7 +514,7 @@ impl<'a> FourierLweBootstrapKeyView<'a> {
        lwe_in: LweCiphertextListView<'_, InputScalar>,
        accumulator: &GlweCiphertextListView<'_, OutputScalar>,
        fft: FftView<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        // CastInto required for PBS modulus switch which returns a usize
        InputScalar: UnsignedTorus + CastInto<usize>,
@@ -586,7 +582,7 @@ where
        &mut self,
        coef_bsk: &LweBootstrapKey<ContBsk>,
        fft: &Self::Fft,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        ContBsk: Container<Element = Scalar>,
    {
@@ -608,7 +604,7 @@ where
        lwe_in: &LweCiphertext<ContLweIn>,
        accumulator: &GlweCiphertext<ContAcc>,
        fft: &Self::Fft,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) where
        ContLweOut: ContainerMut<Element = Scalar>,
        ContLweIn: Container<Element = Scalar>,
--- a/tfhe/src/core_crypto/fft_impl/fft64/crypto/ggsw.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft64/crypto/ggsw.rs
@@ -16,7 +16,7 @@ use crate::core_crypto::entities::ggsw_ciphertext::{
 };
 use crate::core_crypto::entities::glwe_ciphertext::{GlweCiphertextMutView, GlweCiphertextView};
 use aligned_vec::{avec, ABox, CACHELINE_ALIGN};
-use dyn_stack::{PodStack, ReborrowMut, SizeOverflow, StackReq};
+use dyn_stack::{PodStack, SizeOverflow, StackReq};
 use tfhe_fft::c64;
 use tfhe_versionable::Versionize;

@@ -250,14 +250,14 @@ pub fn fill_with_forward_fourier_scratch(fft: FftView<'_>) -> Result<StackReq, S
    fft.forward_scratch()
 }

-impl<'a> FourierGgswCiphertextMutView<'a> {
+impl FourierGgswCiphertextMutView<'_> {
    /// Fill a GGSW ciphertext with the Fourier transform of a GGSW ciphertext in the standard
    /// domain.
    pub fn fill_with_forward_fourier<Scalar: UnsignedTorus>(
        self,
        coef_ggsw: GgswCiphertextView<'_, Scalar>,
        fft: FftView<'_>,
-        mut stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        debug_assert_eq!(coef_ggsw.polynomial_size(), self.polynomial_size());
        let fourier_poly_size = coef_ggsw.polynomial_size().to_fourier_polynomial_size().0;
@@ -269,7 +269,7 @@ impl<'a> FourierGgswCiphertextMutView<'a> {
            fft.forward_as_torus(
                FourierPolynomialMutView { data: fourier_poly },
                coef_poly,
-                stack.rb_mut(),
+                stack,
            );
        }
    }
@@ -483,7 +483,7 @@ pub fn add_external_product_assign<Scalar>(
    ggsw: FourierGgswCiphertextView<'_>,
    glwe: GlweCiphertextView<Scalar>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) where
    Scalar: UnsignedTorus,
 {
@@ -503,7 +503,7 @@ pub fn add_external_product_assign<Scalar>(
        ggsw.decomposition_level_count(),
    );

-    let (output_fft_buffer, mut substack0) =
+    let (output_fft_buffer, substack0) =
        stack.make_aligned_raw::<c64>(fourier_poly_size * ggsw.glwe_size().0, align);
    // output_fft_buffer is initially uninitialized, considered to be implicitly zero, to avoid
    // the cost of filling it up with zeros. `is_output_uninit` is set to `false` once
@@ -515,20 +515,20 @@ pub fn add_external_product_assign<Scalar>(
        // ------------------------------------------------------ EXTERNAL PRODUCT IN FOURIER DOMAIN
        // In this section, we perform the external product in the fourier domain, and accumulate
        // the result in the output_fft_buffer variable.
-        let (mut decomposition, mut substack1) = TensorSignedDecompositionLendingIter::new(
+        let (mut decomposition, substack1) = TensorSignedDecompositionLendingIter::new(
            glwe.as_ref()
                .iter()
                .map(|s| decomposer.init_decomposer_state(*s)),
            DecompositionBaseLog(decomposer.base_log),
            DecompositionLevelCount(decomposer.level_count),
-            substack0.rb_mut(),
+            substack0,
        );

        // We loop through the levels (we reverse to match the order of the decomposition iterator.)
        ggsw.into_levels().for_each(|ggsw_decomp_matrix| {
            // We retrieve the decomposition of this level.
-            let (glwe_level, glwe_decomp_term, mut substack2) =
-                collect_next_term(&mut decomposition, &mut substack1, align);
+            let (glwe_level, glwe_decomp_term, substack2) =
+                collect_next_term(&mut decomposition, substack1, align);
            let glwe_decomp_term = GlweCiphertextView::from_container(
                &*glwe_decomp_term,
                ggsw.polynomial_size(),
@@ -553,9 +553,8 @@ pub fn add_external_product_assign<Scalar>(
                glwe_decomp_term.as_polynomial_list().iter()
            )
            .for_each(|(ggsw_row, glwe_poly)| {
-                let (fourier, substack3) = substack2
-                    .rb_mut()
-                    .make_aligned_raw::<c64>(fourier_poly_size, align);
+                let (fourier, substack3) =
+                    substack2.make_aligned_raw::<c64>(fourier_poly_size, align);
                // We perform the forward fft transform for the glwe polynomial
                let fourier = fft
                    .forward_as_integer(
@@ -596,7 +595,7 @@ pub fn add_external_product_assign<Scalar>(
        .for_each(|(out, fourier)| {
            // The fourier buffer is not re-used afterwards so we can use the in-place version of
            // the add_backward_as_torus function
-            fft.add_backward_in_place_as_torus(out, fourier, substack0.rb_mut());
+            fft.add_backward_in_place_as_torus(out, fourier, substack0);
        });
    }
 }
@@ -606,9 +605,9 @@ pub(crate) fn collect_next_term<'a, Scalar: UnsignedTorus>(
    decomposition: &mut TensorSignedDecompositionLendingIter<'_, Scalar>,
    substack1: &'a mut PodStack,
    align: usize,
-) -> (DecompositionLevel, &'a mut [Scalar], PodStack<'a>) {
+) -> (DecompositionLevel, &'a mut [Scalar], &'a mut PodStack) {
    let (glwe_level, _, glwe_decomp_term) = decomposition.next_term().unwrap();
-    let (glwe_decomp_term, substack2) = substack1.rb_mut().collect_aligned(align, glwe_decomp_term);
+    let (glwe_decomp_term, substack2) = substack1.collect_aligned(align, glwe_decomp_term);
    (glwe_level, glwe_decomp_term, substack2)
 }

@@ -647,18 +646,18 @@ pub(crate) fn update_with_fmadd(
                is_output_uninit: bool,
                fourier_poly_size: usize,
            ) {
-                let rhs = S::c64s_as_simd(fourier).0;
+                let rhs = S::as_simd_c64s(fourier).0;

                if is_output_uninit {
                    for (output_fourier, ggsw_poly) in izip!(
                        output_fft_buffer.into_chunks(fourier_poly_size),
                        lhs_polynomial_list.into_chunks(fourier_poly_size)
                    ) {
-                        let out = S::c64s_as_mut_simd(output_fourier).0;
-                        let lhs = S::c64s_as_simd(ggsw_poly).0;
+                        let out = S::as_mut_simd_c64s(output_fourier).0;
+                        let lhs = S::as_simd_c64s(ggsw_poly).0;

                        for (out, lhs, rhs) in izip!(out, lhs, rhs) {
-                            *out = simd.c64s_mul(*lhs, *rhs);
+                            *out = simd.mul_c64s(*lhs, *rhs);
                        }
                    }
                } else {
@@ -666,11 +665,11 @@ pub(crate) fn update_with_fmadd(
                        output_fft_buffer.into_chunks(fourier_poly_size),
                        lhs_polynomial_list.into_chunks(fourier_poly_size)
                    ) {
-                        let out = S::c64s_as_mut_simd(output_fourier).0;
-                        let lhs = S::c64s_as_simd(ggsw_poly).0;
+                        let out = S::as_mut_simd_c64s(output_fourier).0;
+                        let lhs = S::as_simd_c64s(ggsw_poly).0;

                        for (out, lhs, rhs) in izip!(out, lhs, rhs) {
-                            *out = simd.c64s_mul_add_e(*lhs, *rhs, *out);
+                            *out = simd.mul_add_c64s(*lhs, *rhs, *out);
                        }
                    }
                }
@@ -718,25 +717,25 @@ pub(crate) fn update_with_fmadd_factor(

        #[inline(always)]
        fn with_simd<S: pulp::Simd>(self, simd: S) -> Self::Output {
-            let factor = simd.c64s_splat(self.factor);
+            let factor = simd.splat_c64s(self.factor);

            for (output_fourier, ggsw_poly) in izip!(
                self.output_fft_buffer.into_chunks(self.fourier_poly_size),
                self.lhs_polynomial_list.into_chunks(self.fourier_poly_size)
            ) {
-                let out = S::c64s_as_mut_simd(output_fourier).0;
-                let lhs = S::c64s_as_simd(ggsw_poly).0;
-                let rhs = S::c64s_as_simd(self.fourier).0;
+                let out = S::as_mut_simd_c64s(output_fourier).0;
+                let lhs = S::as_simd_c64s(ggsw_poly).0;
+                let rhs = S::as_simd_c64s(self.fourier).0;

                if self.is_output_uninit {
                    for (out, &lhs, &rhs) in izip!(out, lhs, rhs) {
                        // NOTE: factor * (lhs * rhs) is more efficient than (lhs * rhs) * factor
-                        *out = simd.c64s_mul(factor, simd.c64s_mul(lhs, rhs));
+                        *out = simd.mul_c64s(factor, simd.mul_c64s(lhs, rhs));
                    }
                } else {
                    for (out, &lhs, &rhs) in izip!(out, lhs, rhs) {
                        // NOTE: see above
-                        *out = simd.c64s_mul_add_e(factor, simd.c64s_mul(lhs, rhs), *out);
+                        *out = simd.mul_add_c64s(factor, simd.mul_c64s(lhs, rhs), *out);
                    }
                }
            }
@@ -768,7 +767,7 @@ pub fn cmux<Scalar: UnsignedTorus>(
    mut ct1: GlweCiphertextMutView<'_, Scalar>,
    ggsw: FourierGgswCiphertextView<'_>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) {
    izip!(ct1.as_mut(), ct0.as_ref()).for_each(|(c1, c0)| {
        *c1 = c1.wrapping_sub(*c0);
--- a/tfhe/src/core_crypto/fft_impl/fft64/crypto/wop_pbs/mod.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft64/crypto/wop_pbs/mod.rs
@@ -15,7 +15,7 @@ use crate::core_crypto::commons::traits::*;
 use crate::core_crypto::commons::utils::izip;
 use crate::core_crypto::entities::*;
 use aligned_vec::CACHELINE_ALIGN;
-use dyn_stack::{PodStack, ReborrowMut, SizeOverflow, StackReq};
+use dyn_stack::{PodStack, SizeOverflow, StackReq};
 use tfhe_fft::c64;

 pub fn extract_bits_scratch<Scalar>(
@@ -68,7 +68,7 @@ pub fn extract_bits<Scalar: UnsignedTorus + CastInto<usize>>(
    delta_log: DeltaLog,
    number_of_bits_to_extract: ExtractedBitsCount,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) {
    debug_assert!(lwe_list_out.ciphertext_modulus() == lwe_in.ciphertext_modulus());
    debug_assert!(lwe_in.ciphertext_modulus() == ksk.ciphertext_modulus());
@@ -143,7 +143,7 @@ pub fn extract_bits<Scalar: UnsignedTorus + CastInto<usize>>(
    let lwe_size = glwe_dimension
        .to_equivalent_lwe_dimension(polynomial_size)
        .to_lwe_size();
-    let (lwe_out_pbs_buffer_data, mut stack) =
+    let (lwe_out_pbs_buffer_data, stack) =
        stack.make_aligned_with(lwe_size.0, align, |_| Scalar::ZERO);
    let mut lwe_out_pbs_buffer = LweCiphertext::from_container(
        &mut *lwe_out_pbs_buffer_data,
@@ -155,7 +155,7 @@ pub fn extract_bits<Scalar: UnsignedTorus + CastInto<usize>>(
        // Block to keep the lwe_bit_left_shift_buffer_data alive only as long as needed
        {
            // Shift on padding bit
-            let (lwe_bit_left_shift_buffer_data, _) = stack.rb_mut().collect_aligned(
+            let (lwe_bit_left_shift_buffer_data, _) = stack.collect_aligned(
                align,
                lwe_in_buffer
                    .as_ref()
@@ -206,7 +206,7 @@ pub fn extract_bits<Scalar: UnsignedTorus + CastInto<usize>>(
            lwe_out_ks_buffer.as_view(),
            pbs_accumulator.as_view(),
            fft,
-            stack.rb_mut(),
+            stack,
        );

        // Add alpha where alpha = delta*2^{bit_idx-1} to end up with an encryption of 0 if the
@@ -244,7 +244,7 @@ pub fn circuit_bootstrap_boolean<Scalar: UnsignedTorus + CastInto<usize>>(
    delta_log: DeltaLog,
    pfpksk_list: LwePrivateFunctionalPackingKeyswitchKeyList<&[Scalar]>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) {
    debug_assert!(lwe_in.ciphertext_modulus() == ggsw_out.ciphertext_modulus());
    debug_assert!(ggsw_out.ciphertext_modulus() == pfpksk_list.ciphertext_modulus());
@@ -306,7 +306,7 @@ pub fn circuit_bootstrap_boolean<Scalar: UnsignedTorus + CastInto<usize>>(
    );

    // Output for every bootstrapping
-    let (lwe_out_bs_buffer_data, mut stack) = stack.make_aligned_with(
+    let (lwe_out_bs_buffer_data, stack) = stack.make_aligned_with(
        fourier_bsk_output_lwe_dimension.to_lwe_size().0,
        CACHELINE_ALIGN,
        |_| Scalar::ZERO,
@@ -324,7 +324,7 @@ pub fn circuit_bootstrap_boolean<Scalar: UnsignedTorus + CastInto<usize>>(
            base_log_cbs,
            delta_log,
            fft,
-            stack.rb_mut(),
+            stack,
        );

        for (pfpksk, mut glwe_out) in pfpksk_list
@@ -371,7 +371,7 @@ pub fn homomorphic_shift_boolean<Scalar: UnsignedTorus + CastInto<usize>>(
    base_log_cbs: DecompositionBaseLog,
    delta_log: DeltaLog,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) {
    debug_assert!(lwe_out.ciphertext_modulus() == lwe_in.ciphertext_modulus());
    debug_assert!(
@@ -467,7 +467,7 @@ pub fn cmux_tree_memory_optimized<Scalar: UnsignedTorus + CastInto<usize>>(
    lut_per_layer: PolynomialList<&[Scalar]>,
    ggsw_list: FourierGgswCiphertextListView<'_>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) {
    debug_assert!(lut_per_layer.polynomial_count().0 == 1 << ggsw_list.count());

@@ -510,7 +510,7 @@ pub fn cmux_tree_memory_optimized<Scalar: UnsignedTorus + CastInto<usize>>(
            ciphertext_modulus,
        );

-        let (t_fill, mut stack) = stack.make_with(nb_layer, |_| 0_usize);
+        let (t_fill, stack) = stack.make_with(nb_layer, |_| 0_usize);

        let mut lut_polynomial_iter = lut_per_layer.iter();
        loop {
@@ -537,7 +537,7 @@ pub fn cmux_tree_memory_optimized<Scalar: UnsignedTorus + CastInto<usize>>(

            for (j, ggsw) in ggsw_list.into_ggsw_iter().rev().enumerate() {
                if t_fill[j] == 2 {
-                    let (diff_data, stack) = stack.rb_mut().collect_aligned(
+                    let (diff_data, stack) = stack.collect_aligned(
                        CACHELINE_ALIGN,
                        izip!(t1_j.as_ref(), t0_j.as_ref()).map(|(&a, &b)| a.wrapping_sub(b)),
                    );
@@ -648,7 +648,7 @@ pub fn circuit_bootstrap_boolean_vertical_packing<Scalar: UnsignedTorus + CastIn
    level_cbs: DecompositionLevelCount,
    base_log_cbs: DecompositionBaseLog,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) {
    debug_assert!(stack.can_hold(
        circuit_bootstrap_boolean_vertical_packing_scratch::<Scalar>(
@@ -686,7 +686,7 @@ pub fn circuit_bootstrap_boolean_vertical_packing<Scalar: UnsignedTorus + CastIn
        CACHELINE_ALIGN,
        |_| c64::default(),
    );
-    let (ggsw_res_data, mut stack) = stack.make_aligned_with(
+    let (ggsw_res_data, stack) = stack.make_aligned_with(
        pfpksk_list.output_polynomial_size().0 * glwe_size.0 * glwe_size.0 * level_cbs.0,
        CACHELINE_ALIGN,
        |_| Scalar::ZERO,
@@ -717,10 +717,10 @@ pub fn circuit_bootstrap_boolean_vertical_packing<Scalar: UnsignedTorus + CastIn
            DeltaLog(Scalar::BITS - 1),
            pfpksk_list.as_view(),
            fft,
-            stack.rb_mut(),
+            stack,
        );

-        ggsw.fill_with_forward_fourier(ggsw_res.as_view(), fft, stack.rb_mut());
+        ggsw.fill_with_forward_fourier(ggsw_res.as_view(), fft, stack);
    }

    // We deduce the number of luts in the vec_lut from the number of cipherxtexts in lwe_list_out
@@ -732,7 +732,7 @@ pub fn circuit_bootstrap_boolean_vertical_packing<Scalar: UnsignedTorus + CastIn
        big_lut_as_polynomial_list.chunks_exact(small_lut_size),
        lwe_list_out.iter_mut(),
    ) {
-        vertical_packing(lut, lwe_out, ggsw_list.as_view(), fft, stack.rb_mut());
+        vertical_packing(lut, lwe_out, ggsw_list.as_view(), fft, stack);
    }
 }

@@ -778,7 +778,7 @@ pub fn vertical_packing<Scalar: UnsignedTorus + CastInto<usize>>(
    mut lwe_out: LweCiphertext<&mut [Scalar]>,
    ggsw_list: FourierGgswCiphertextListView<'_>,
    fft: FftView<'_>,
-    stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) {
    debug_assert!(
        lwe_out.ciphertext_modulus().is_native_modulus(),
@@ -815,26 +815,15 @@ pub fn vertical_packing<Scalar: UnsignedTorus + CastInto<usize>>(
    // the last blind rotation.
    let (cmux_ggsw, br_ggsw) = ggsw_list.split_at(log_number_of_luts_for_cmux_tree);

-    let (cmux_tree_lut_res_data, mut stack) =
+    let (cmux_tree_lut_res_data, stack) =
        stack.make_aligned_with(polynomial_size.0 * glwe_size.0, CACHELINE_ALIGN, |_| {
            Scalar::ZERO
        });
    let mut cmux_tree_lut_res =
        GlweCiphertext::from_container(cmux_tree_lut_res_data, polynomial_size, ciphertext_modulus);

-    cmux_tree_memory_optimized(
-        cmux_tree_lut_res.as_mut_view(),
-        lut,
-        cmux_ggsw,
-        fft,
-        stack.rb_mut(),
-    );
-    blind_rotate_assign(
-        cmux_tree_lut_res.as_mut_view(),
-        br_ggsw,
-        fft,
-        stack.rb_mut(),
-    );
+    cmux_tree_memory_optimized(cmux_tree_lut_res.as_mut_view(), lut, cmux_ggsw, fft, stack);
+    blind_rotate_assign(cmux_tree_lut_res.as_mut_view(), br_ggsw, fft, stack);

    // sample extract of the RLWE of the Vertical packing
    extract_lwe_sample_from_glwe_ciphertext(&cmux_tree_lut_res, &mut lwe_out, MonomialDegree(0));
@@ -855,15 +844,14 @@ pub fn blind_rotate_assign<Scalar: UnsignedTorus + CastInto<usize>>(
    mut lut: GlweCiphertext<&mut [Scalar]>,
    ggsw_list: FourierGgswCiphertextListView<'_>,
    fft: FftView<'_>,
-    mut stack: PodStack<'_>,
+    stack: &mut PodStack,
 ) {
    let mut monomial_degree = MonomialDegree(1);

    for ggsw in ggsw_list.into_ggsw_iter().rev() {
        let ct_0 = lut.as_mut_view();
-        let (ct1_data, stack) = stack
-            .rb_mut()
-            .collect_aligned(CACHELINE_ALIGN, ct_0.as_ref().iter().copied());
+        let (ct1_data, stack) =
+            stack.collect_aligned(CACHELINE_ALIGN, ct_0.as_ref().iter().copied());
        let mut ct_1 = GlweCiphertext::from_container(
            &mut *ct1_data,
            ct_0.polynomial_size(),
--- a/tfhe/src/core_crypto/fft_impl/fft64/crypto/wop_pbs/tests.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft64/crypto/wop_pbs/tests.rs
@@ -172,7 +172,7 @@ pub fn test_extract_bits() {
    };
    let req = req().unwrap();
    let mut mem = GlobalPodBuffer::new(req);
-    let mut stack = PodStack::new(&mut mem);
+    let stack = PodStack::new(&mut mem);

    fourier_bsk
        .as_mut_view()
@@ -225,7 +225,7 @@ pub fn test_extract_bits() {
            delta_log,
            number_values_to_extract,
            fft,
-            stack.rb_mut(),
+            stack,
        );

        // Decryption of extracted bit
--- a/tfhe/src/core_crypto/fft_impl/fft64/math/decomposition.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft64/math/decomposition.rs
@@ -29,8 +29,8 @@ impl<'buffers, Scalar: UnsignedInteger> TensorSignedDecompositionLendingIter<'bu
        input: impl Iterator<Item = Scalar>,
        base_log: DecompositionBaseLog,
        level: DecompositionLevelCount,
-        stack: PodStack<'buffers>,
-    ) -> (Self, PodStack<'buffers>) {
+        stack: &'buffers mut PodStack,
+    ) -> (Self, &'buffers mut PodStack) {
        let (states, stack) = stack.collect_aligned(aligned_vec::CACHELINE_ALIGN, input);
        (
            TensorSignedDecompositionLendingIter {
@@ -46,6 +46,11 @@ impl<'buffers, Scalar: UnsignedInteger> TensorSignedDecompositionLendingIter<'bu

    // inlining this improves perf of external product by about 25%, even in LTO builds
    #[inline]
+    #[allow(
+        clippy::type_complexity,
+        reason = "The type complexity would require a pub type = ...; \
+        but impl Trait is not stable in pub type so we tell clippy to leave us alone"
+    )]
    pub fn next_term<'short>(
        &'short mut self,
    ) -> Option<(
--- a/tfhe/src/core_crypto/fft_impl/fft64/math/fft/mod.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft64/math/fft/mod.rs
@@ -9,7 +9,7 @@ use crate::core_crypto::commons::traits::{Container, ContainerMut, IntoContainer
 use crate::core_crypto::commons::utils::izip;
 use crate::core_crypto::entities::*;
 use aligned_vec::{avec, ABox};
-use dyn_stack::{PodStack, ReborrowMut, SizeOverflow, StackReq};
+use dyn_stack::{PodStack, SizeOverflow, StackReq};
 use rayon::prelude::*;
 use std::any::TypeId;
 use std::collections::hash_map::Entry;
@@ -329,7 +329,7 @@ fn convert_add_backward_torus<Scalar: UnsignedTorus>(
    convert_add_backward_torus_scalar::<Scalar>(out_re, out_im, inp, twisties);
 }

-impl<'a> FftView<'a> {
+impl FftView<'_> {
    /// Return the polynomial size that this FFT was made for.
    pub fn polynomial_size(self) -> PolynomialSize {
        PolynomialSize(2 * self.plan.fft_size())
@@ -383,7 +383,7 @@ impl<'a> FftView<'a> {
        self,
        fourier: FourierPolynomialMutView<'out>,
        standard: PolynomialView<'_, Scalar>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) -> FourierPolynomialMutView<'out> {
        self.forward_with_conv(fourier, standard, convert_forward_torus, stack)
    }
@@ -403,7 +403,7 @@ impl<'a> FftView<'a> {
        self,
        fourier: FourierPolynomialMutView<'out>,
        standard: PolynomialView<'_, Scalar>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) -> FourierPolynomialMutView<'out> {
        self.forward_with_conv(fourier, standard, convert_forward_integer, stack)
    }
@@ -462,7 +462,7 @@ impl<'a> FftView<'a> {
        self,
        standard: PolynomialMutView<'_, Scalar>,
        fourier: FourierPolynomialView<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        self.backward_with_conv(standard, fourier, convert_backward_torus, stack);
    }
@@ -481,7 +481,7 @@ impl<'a> FftView<'a> {
        self,
        standard: PolynomialMutView<'_, Scalar>,
        fourier: FourierPolynomialView<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        self.backward_with_conv(standard, fourier, convert_add_backward_torus, stack);
    }
@@ -492,7 +492,7 @@ impl<'a> FftView<'a> {
        self,
        standard: PolynomialMutView<'_, Scalar>,
        fourier: FourierPolynomialMutView<'_>,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        self.backward_with_conv_in_place(standard, fourier, convert_add_backward_torus, stack);
    }
@@ -506,7 +506,7 @@ impl<'a> FftView<'a> {
        fourier: FourierPolynomialMutView<'out>,
        standard: PolynomialView<'_, Scalar>,
        conv_fn: F,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) -> FourierPolynomialMutView<'out> {
        let fourier = fourier.data;
        let standard = standard.as_ref();
@@ -526,7 +526,7 @@ impl<'a> FftView<'a> {
        mut standard: PolynomialMutView<'_, Scalar>,
        fourier: FourierPolynomialView<'_>,
        conv_fn: F,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        let fourier = fourier.data;
        let standard = standard.as_mut();
@@ -548,7 +548,7 @@ impl<'a> FftView<'a> {
        mut standard: PolynomialMutView<'_, Scalar>,
        fourier: FourierPolynomialMutView<'_>,
        conv_fn: F,
-        stack: PodStack<'_>,
+        stack: &mut PodStack,
    ) {
        let fourier = fourier.data;
        let standard = standard.as_mut();
@@ -629,7 +629,7 @@ impl<C: Container<Element = c64>> serde::Serialize for FourierPolynomialList<C>
                buf: &'a [c64],
            }

-            impl<'a> serde::Serialize for SingleFourierPolynomial<'a> {
+            impl serde::Serialize for SingleFourierPolynomial<'_> {
                fn serialize<S: serde::Serializer>(
                    &self,
                    serializer: S,
@@ -701,7 +701,7 @@ impl<'de, C: IntoContainerOwned<Element = c64>> serde::Deserialize<'de>
                    buf: &'a mut [c64],
                }

-                impl<'de, 'a> serde::de::DeserializeSeed<'de> for FillFourier<'a> {
+                impl<'de> serde::de::DeserializeSeed<'de> for FillFourier<'_> {
                    type Value = ();

                    fn deserialize<D: serde::Deserializer<'de>>(
@@ -771,9 +771,9 @@ pub fn par_convert_polynomials_list_to_fourier<Scalar: UnsignedTorus>(
                .unwrap()
                .try_unaligned_bytes_required()
                .unwrap();
-            let mut stack = vec![0; stack_len];
+            let mut mem = vec![0; stack_len];

-            let mut stack = PodStack::new(&mut stack);
+            let stack = PodStack::new(&mut mem);

            for (fourier_poly, standard_poly) in izip!(
                fourier_poly_chunk.chunks_exact_mut(f_polynomial_size),
@@ -782,7 +782,7 @@ pub fn par_convert_polynomials_list_to_fourier<Scalar: UnsignedTorus>(
                fft.forward_as_torus(
                    FourierPolynomialMutView { data: fourier_poly },
                    PolynomialView::from_container(standard_poly),
-                    stack.rb_mut(),
+                    stack,
                );
            }
        });
--- a/tfhe/src/core_crypto/fft_impl/fft64/math/fft/tests.rs
+++ b/tfhe/src/core_crypto/fft_impl/fft64/math/fft/tests.rs
@@ -28,11 +28,11 @@ fn test_roundtrip<Scalar: UnsignedTorus>() {
                .unwrap()
                .and(fft.backward_scratch().unwrap()),
        );
-        let mut stack = PodStack::new(&mut mem);
+        let stack = PodStack::new(&mut mem);

        // Simple roundtrip
-        fft.forward_as_torus(fourier.as_mut_view(), poly.as_view(), stack.rb_mut());
-        fft.backward_as_torus(roundtrip.as_mut_view(), fourier.as_view(), stack.rb_mut());
+        fft.forward_as_torus(fourier.as_mut_view(), poly.as_view(), stack);
+        fft.backward_as_torus(roundtrip.as_mut_view(), fourier.as_view(), stack);

        for (expected, actual) in izip!(poly.as_ref().iter(), roundtrip.as_ref().iter()) {
            if Scalar::BITS == 32 {
@@ -45,8 +45,8 @@ fn test_roundtrip<Scalar: UnsignedTorus>() {
        // Simple add roundtrip
        // Need to zero out the buffer to have a correct result as we will be adding the result
        roundtrip.as_mut().fill(Scalar::ZERO);
-        fft.forward_as_torus(fourier.as_mut_view(), poly.as_view(), stack.rb_mut());
-        fft.add_backward_as_torus(roundtrip.as_mut_view(), fourier.as_view(), stack.rb_mut());
+        fft.forward_as_torus(fourier.as_mut_view(), poly.as_view(), stack);
+        fft.add_backward_as_torus(roundtrip.as_mut_view(), fourier.as_view(), stack);

        for (expected, actual) in izip!(poly.as_ref().iter(), roundtrip.as_ref().iter()) {
            if Scalar::BITS == 32 {
@@ -59,12 +59,8 @@ fn test_roundtrip<Scalar: UnsignedTorus>() {
        // Forward, then add backward in place
        // Need to zero out the buffer to have a correct result as we will be adding the result
        roundtrip.as_mut().fill(Scalar::ZERO);
-        fft.forward_as_torus(fourier.as_mut_view(), poly.as_view(), stack.rb_mut());
-        fft.add_backward_in_place_as_torus(
-            roundtrip.as_mut_view(),
-            fourier.as_mut_view(),
-            stack.rb_mut(),
-        );
+        fft.forward_as_torus(fourier.as_mut_view(), poly.as_view(), stack);
+        fft.add_backward_in_place_as_torus(roundtrip.as_mut_view(), fourier.as_mut_view(), stack);

        for (expected, actual) in izip!(poly.as_ref().iter(), roundtrip.as_ref().iter()) {
            if Scalar::BITS == 32 {
@@ -134,10 +130,10 @@ fn test_product<Scalar: UnsignedTorus>() {
                    .unwrap()
                    .and(fft.backward_scratch().unwrap()),
            );
-            let mut stack = PodStack::new(&mut mem);
+            let stack = PodStack::new(&mut mem);

-            fft.forward_as_torus(fourier0.as_mut_view(), poly0.as_view(), stack.rb_mut());
-            fft.forward_as_integer(fourier1.as_mut_view(), poly1.as_view(), stack.rb_mut());
+            fft.forward_as_torus(fourier0.as_mut_view(), poly0.as_view(), stack);
+            fft.forward_as_integer(fourier1.as_mut_view(), poly1.as_view(), stack);

            for (f0, f1) in izip!(&mut *fourier0.data, &*fourier1.data) {
                *f0 *= *f1;
@@ -153,7 +149,7 @@ fn test_product<Scalar: UnsignedTorus>() {
            fft.backward_as_torus(
                convolution_from_fft.as_mut_view(),
                fourier0.as_view(),
-                stack.rb_mut(),
+                stack,
            );

            for (expected, actual) in izip!(
@@ -175,7 +171,7 @@ fn test_product<Scalar: UnsignedTorus>() {
            fft.add_backward_as_torus(
                convolution_from_fft.as_mut_view(),
                fourier0.as_view(),
-                stack.rb_mut(),
+                stack,
            );

            for (expected, actual) in izip!(
@@ -199,7 +195,7 @@ fn test_product<Scalar: UnsignedTorus>() {
            fft.add_backward_in_place_as_torus(
                convolution_from_fft.as_mut_view(),
                fourier0.as_mut_view(),
-                stack.rb_mut(),
+                stack,
            );

            for (expected, actual) in izip!(
--- a/tfhe/src/core_crypto/gpu/slice.rs
+++ b/tfhe/src/core_crypto/gpu/slice.rs
@@ -23,7 +23,7 @@ pub struct CudaSliceMut<'a, T: Numeric> {
    _phantom_2: PhantomData<&'a mut ()>,
 }

-impl<'a, T> CudaSlice<'a, T>
+impl<T> CudaSlice<'_, T>
 where
    T: Numeric,
 {
@@ -53,7 +53,7 @@ where
    }
 }

-impl<'a, T> CudaSliceMut<'a, T>
+impl<T> CudaSliceMut<'_, T>
 where
    T: Numeric,
 {
--- a/tfhe/src/high_level_api/array/cpu/booleans.rs
+++ b/tfhe/src/high_level_api/array/cpu/booleans.rs
@@ -36,7 +36,7 @@ impl ArrayBackend for CpuFheBoolArrayBackend {
    type Owned = Vec<BooleanBlock>;
 }

-impl<'a> BackendDataContainer for &'a [BooleanBlock] {
+impl BackendDataContainer for &[BooleanBlock] {
    type Backend = CpuFheBoolArrayBackend;

    fn len(&self) -> usize {
@@ -55,7 +55,7 @@ impl<'a> BackendDataContainer for &'a [BooleanBlock] {
    }
 }

-impl<'a> BackendDataContainer for &'a mut [BooleanBlock] {
+impl BackendDataContainer for &mut [BooleanBlock] {
    type Backend = CpuFheBoolArrayBackend;

    fn len(&self) -> usize {
@@ -74,7 +74,7 @@ impl<'a> BackendDataContainer for &'a mut [BooleanBlock] {
    }
 }

-impl<'a> BackendDataContainerMut for &'a mut [BooleanBlock] {
+impl BackendDataContainerMut for &mut [BooleanBlock] {
    fn as_sub_slice_mut(
        &mut self,
        range: impl RangeBounds<usize>,
@@ -220,7 +220,7 @@ impl FheTryEncrypt<&[bool], ClientKey> for CpuFheBoolArray {
    }
 }

-impl<'a> FheDecrypt<Vec<bool>> for CpuFheBoolSlice<'a> {
+impl FheDecrypt<Vec<bool>> for CpuFheBoolSlice<'_> {
    fn decrypt(&self, key: &ClientKey) -> Vec<bool> {
        self.elems
            .iter()
@@ -229,7 +229,7 @@ impl<'a> FheDecrypt<Vec<bool>> for CpuFheBoolSlice<'a> {
    }
 }

-impl<'a> FheDecrypt<Vec<bool>> for CpuFheBoolSliceMut<'a> {
+impl FheDecrypt<Vec<bool>> for CpuFheBoolSliceMut<'_> {
    fn decrypt(&self, key: &ClientKey) -> Vec<bool> {
        self.as_slice().decrypt(key)
    }
--- a/tfhe/src/high_level_api/array/cpu/integers.rs
+++ b/tfhe/src/high_level_api/array/cpu/integers.rs
@@ -367,7 +367,7 @@ where
    }
 }

-impl<'a, T> BackendDataContainer for &'a [T]
+impl<T> BackendDataContainer for &[T]
 where
    T: IntegerRadixCiphertext,
 {
@@ -389,7 +389,7 @@ where
    }
 }

-impl<'a, T> BackendDataContainer for &'a mut [T]
+impl<T> BackendDataContainer for &mut [T]
 where
    T: IntegerRadixCiphertext,
 {
@@ -411,7 +411,7 @@ where
    }
 }

-impl<'a, T> BackendDataContainerMut for &'a mut [T]
+impl<T> BackendDataContainerMut for &mut [T]
 where
    T: IntegerRadixCiphertext,
 {
@@ -481,7 +481,7 @@ where
    }
 }

-impl<'a, Clear, Id> FheDecrypt<Vec<Clear>> for CpuFheUintSliceMut<'a, Id>
+impl<Clear, Id> FheDecrypt<Vec<Clear>> for CpuFheUintSliceMut<'_, Id>
 where
    Id: FheUintId,
    Clear: RecomposableFrom<u64> + UnsignedNumeric,
@@ -491,7 +491,7 @@ where
    }
 }

-impl<'a, Clear, Id> FheDecrypt<Vec<Clear>> for CpuFheUintSlice<'a, Id>
+impl<Clear, Id> FheDecrypt<Vec<Clear>> for CpuFheUintSlice<'_, Id>
 where
    Id: FheUintId,
    Clear: RecomposableFrom<u64> + UnsignedNumeric,
@@ -534,7 +534,7 @@ where
    }
 }

-impl<'a, Clear, Id> FheDecrypt<Vec<Clear>> for CpuFheIntSliceMut<'a, Id>
+impl<Clear, Id> FheDecrypt<Vec<Clear>> for CpuFheIntSliceMut<'_, Id>
 where
    Id: FheIntId,
    Clear: RecomposableSignedInteger,
@@ -544,7 +544,7 @@ where
    }
 }

-impl<'a, Clear, Id> FheDecrypt<Vec<Clear>> for CpuFheIntSlice<'a, Id>
+impl<Clear, Id> FheDecrypt<Vec<Clear>> for CpuFheIntSlice<'_, Id>
 where
    Id: FheIntId,
    Clear: RecomposableSignedInteger,
--- a/tfhe/src/high_level_api/array/cpu/mod.rs
+++ b/tfhe/src/high_level_api/array/cpu/mod.rs
@@ -57,7 +57,7 @@ where
    type Owned = ClearContainer<Vec<T>>;
 }

-impl<'a, T> BackendDataContainer for ClearContainer<&'a [T]>
+impl<T> BackendDataContainer for ClearContainer<&'_ [T]>
 where
    T: Copy,
 {
@@ -79,7 +79,7 @@ where
    }
 }

-impl<'a, T> BackendDataContainer for ClearContainer<&'a mut [T]>
+impl<T> BackendDataContainer for ClearContainer<&mut [T]>
 where
    T: Copy,
 {
@@ -101,7 +101,7 @@ where
    }
 }

-impl<'a, T> BackendDataContainerMut for ClearContainer<&'a mut [T]>
+impl<T> BackendDataContainerMut for ClearContainer<&mut [T]>
 where
    T: Copy,
 {
--- a/tfhe/src/high_level_api/array/dynamic/booleans.rs
+++ b/tfhe/src/high_level_api/array/dynamic/booleans.rs
@@ -229,7 +229,7 @@ pub enum InnerBoolSlice<'a> {
    Cpu(&'a [BooleanBlock]),
 }

-impl<'a> InnerBoolSlice<'a> {
+impl InnerBoolSlice<'_> {
    fn on_cpu(&self) -> Cow<'_, [BooleanBlock]> {
        match self {
            InnerBoolSlice::Cpu(cpu_slice) => Cow::Borrowed(cpu_slice),
@@ -237,7 +237,7 @@ impl<'a> InnerBoolSlice<'a> {
    }
 }

-impl<'a> BackendDataContainer for InnerBoolSlice<'a> {
+impl BackendDataContainer for InnerBoolSlice<'_> {
    type Backend = DynFheBoolArrayBackend;

    fn len(&self) -> usize {
@@ -268,7 +268,7 @@ pub enum InnerBoolSliceMut<'a> {
    Cpu(&'a mut [BooleanBlock]),
 }

-impl<'a> BackendDataContainer for InnerBoolSliceMut<'a> {
+impl BackendDataContainer for InnerBoolSliceMut<'_> {
    type Backend = DynFheBoolArrayBackend;

    fn len(&self) -> usize {
@@ -295,7 +295,7 @@ impl<'a> BackendDataContainer for InnerBoolSliceMut<'a> {
    }
 }

-impl<'a> BackendDataContainerMut for InnerBoolSliceMut<'a> {
+impl BackendDataContainerMut for InnerBoolSliceMut<'_> {
    fn as_sub_slice_mut(
        &mut self,
        range: impl RangeBounds<usize>,
--- a/tfhe/src/high_level_api/array/dynamic/signed.rs
+++ b/tfhe/src/high_level_api/array/dynamic/signed.rs
@@ -74,7 +74,7 @@ pub enum InnerIntSlice<'a> {
    Cpu(&'a [SignedRadixCiphertext]),
 }

-impl<'a> InnerIntSlice<'a> {
+impl InnerIntSlice<'_> {
    pub(crate) fn on_cpu(&self) -> Cow<'_, [SignedRadixCiphertext]> {
        match self {
            Self::Cpu(cpu_slice) => Cow::Borrowed(cpu_slice),
@@ -82,7 +82,7 @@ impl<'a> InnerIntSlice<'a> {
    }
 }

-impl<'a> BackendDataContainer for InnerIntSlice<'a> {
+impl BackendDataContainer for InnerIntSlice<'_> {
    type Backend = DynIntBackend;

    fn len(&self) -> usize {
@@ -111,7 +111,7 @@ pub enum InnerIntSliceMut<'a> {
    Cpu(&'a mut [SignedRadixCiphertext]),
 }

-impl<'a> BackendDataContainer for InnerIntSliceMut<'a> {
+impl BackendDataContainer for InnerIntSliceMut<'_> {
    type Backend = DynIntBackend;

    fn len(&self) -> usize {
@@ -136,7 +136,7 @@ impl<'a> BackendDataContainer for InnerIntSliceMut<'a> {
    }
 }

-impl<'a> BackendDataContainerMut for InnerIntSliceMut<'a> {
+impl BackendDataContainerMut for InnerIntSliceMut<'_> {
    fn as_sub_slice_mut(&mut self, range: impl RangeBounds<usize>) -> InnerIntSliceMut<'_> {
        match self {
            Self::Cpu(cpu_slice) => {
--- a/tfhe/src/high_level_api/array/dynamic/unsigned.rs
+++ b/tfhe/src/high_level_api/array/dynamic/unsigned.rs
@@ -36,7 +36,7 @@ pub enum InnerUintSlice<'a> {
    Cpu(&'a [RadixCiphertext]),
 }

-impl<'a> InnerUintSlice<'a> {
+impl InnerUintSlice<'_> {
    pub(crate) fn on_cpu(&self) -> Cow<'_, [RadixCiphertext]> {
        match self {
            InnerUintSlice::Cpu(cpu_slice) => Cow::Borrowed(cpu_slice),
@@ -98,7 +98,7 @@ impl BackendDataContainerMut for InnerUintArray {
    }
 }

-impl<'a> BackendDataContainer for InnerUintSlice<'a> {
+impl BackendDataContainer for InnerUintSlice<'_> {
    type Backend = DynUintBackend;

    fn len(&self) -> usize {
@@ -123,7 +123,7 @@ impl<'a> BackendDataContainer for InnerUintSlice<'a> {
    }
 }

-impl<'a> BackendDataContainer for InnerUintSliceMut<'a> {
+impl BackendDataContainer for InnerUintSliceMut<'_> {
    type Backend = DynUintBackend;

    fn len(&self) -> usize {
@@ -148,7 +148,7 @@ impl<'a> BackendDataContainer for InnerUintSliceMut<'a> {
    }
 }

-impl<'a> BackendDataContainerMut for InnerUintSliceMut<'a> {
+impl BackendDataContainerMut for InnerUintSliceMut<'_> {
    fn as_sub_slice_mut(&mut self, range: impl RangeBounds<usize>) -> InnerUintSliceMut<'_> {
        match self {
            Self::Cpu(cpu_slice) => {
@@ -403,7 +403,7 @@ where
    }
 }

-impl<'a, Clear, Id> FheDecrypt<Vec<Clear>> for FheUintSliceMut<'a, Id>
+impl<Clear, Id> FheDecrypt<Vec<Clear>> for FheUintSliceMut<'_, Id>
 where
    Id: FheUintId,
    Clear: RecomposableFrom<u64> + UnsignedNumeric,
@@ -413,7 +413,7 @@ where
    }
 }

-impl<'a, Clear, Id> FheDecrypt<Vec<Clear>> for FheUintSlice<'a, Id>
+impl<Clear, Id> FheDecrypt<Vec<Clear>> for FheUintSlice<'_, Id>
 where
    Id: FheUintId,
    Clear: RecomposableFrom<u64> + UnsignedNumeric,
--- a/tfhe/src/high_level_api/array/gpu/booleans.rs
+++ b/tfhe/src/high_level_api/array/gpu/booleans.rs
@@ -68,7 +68,7 @@ impl From<Vec<CudaBooleanBlock>> for GpuBooleanOwned {
    }
 }

-impl<'a> BackendDataContainer for GpuBooleanSlice<'a> {
+impl BackendDataContainer for GpuBooleanSlice<'_> {
    type Backend = GpuFheBoolArrayBackend;

    fn len(&self) -> usize {
@@ -89,7 +89,7 @@ impl<'a> BackendDataContainer for GpuBooleanSlice<'a> {
    }
 }

-impl<'a> BackendDataContainer for GpuBooleanSliceMut<'a> {
+impl BackendDataContainer for GpuBooleanSliceMut<'_> {
    type Backend = GpuFheBoolArrayBackend;

    fn len(&self) -> usize {
@@ -110,7 +110,7 @@ impl<'a> BackendDataContainer for GpuBooleanSliceMut<'a> {
    }
 }

-impl<'a> BackendDataContainerMut for GpuBooleanSliceMut<'a> {
+impl BackendDataContainerMut for GpuBooleanSliceMut<'_> {
    fn as_sub_slice_mut(
        &mut self,
        range: impl RangeBounds<usize>,
@@ -271,7 +271,7 @@ impl FheTryEncrypt<&[bool], ClientKey> for GpuFheBoolArray {
    }
 }

-impl<'a> FheDecrypt<Vec<bool>> for GpuFheBoolSlice<'a> {
+impl FheDecrypt<Vec<bool>> for GpuFheBoolSlice<'_> {
    fn decrypt(&self, key: &ClientKey) -> Vec<bool> {
        with_thread_local_cuda_streams(|streams| {
            self.elems
@@ -287,7 +287,7 @@ impl<'a> FheDecrypt<Vec<bool>> for GpuFheBoolSlice<'a> {
    }
 }

-impl<'a> FheDecrypt<Vec<bool>> for GpuFheBoolSliceMut<'a> {
+impl FheDecrypt<Vec<bool>> for GpuFheBoolSliceMut<'_> {
    fn decrypt(&self, key: &ClientKey) -> Vec<bool> {
        self.as_slice().decrypt(key)
    }
--- a/tfhe/src/high_level_api/array/gpu/integers.rs
+++ b/tfhe/src/high_level_api/array/gpu/integers.rs
@@ -420,7 +420,7 @@ where
    }
 }

-impl<'a, T> BackendDataContainer for GpuSlice<'a, T>
+impl<T> BackendDataContainer for GpuSlice<'_, T>
 where
    T: CudaIntegerRadixCiphertext,
 {
@@ -444,7 +444,7 @@ where
    }
 }

-impl<'a, T> BackendDataContainer for GpuSliceMut<'a, T>
+impl<T> BackendDataContainer for GpuSliceMut<'_, T>
 where
    T: CudaIntegerRadixCiphertext,
 {
@@ -468,7 +468,7 @@ where
    }
 }

-impl<'a, T> BackendDataContainerMut for GpuSliceMut<'a, T>
+impl<T> BackendDataContainerMut for GpuSliceMut<'_, T>
 where
    T: CudaIntegerRadixCiphertext,
 {
@@ -553,7 +553,7 @@ where
    }
 }

-impl<'a, Clear, Id> FheDecrypt<Vec<Clear>> for GpuFheUintSliceMut<'a, Id>
+impl<Clear, Id> FheDecrypt<Vec<Clear>> for GpuFheUintSliceMut<'_, Id>
 where
    Id: FheUintId,
    Clear: RecomposableFrom<u64> + UnsignedNumeric,
@@ -563,7 +563,7 @@ where
    }
 }

-impl<'a, Clear, Id> FheDecrypt<Vec<Clear>> for GpuFheUintSlice<'a, Id>
+impl<Clear, Id> FheDecrypt<Vec<Clear>> for GpuFheUintSlice<'_, Id>
 where
    Id: FheUintId,
    Clear: RecomposableFrom<u64> + UnsignedNumeric,
@@ -617,7 +617,7 @@ where
    }
 }

-impl<'a, Clear, Id> FheDecrypt<Vec<Clear>> for GpuFheIntSliceMut<'a, Id>
+impl<Clear, Id> FheDecrypt<Vec<Clear>> for GpuFheIntSliceMut<'_, Id>
 where
    Id: FheIntId,
    Clear: RecomposableSignedInteger,
@@ -627,7 +627,7 @@ where
    }
 }

-impl<'a, Clear, Id> FheDecrypt<Vec<Clear>> for GpuFheIntSlice<'a, Id>
+impl<Clear, Id> FheDecrypt<Vec<Clear>> for GpuFheIntSlice<'_, Id>
 where
    Id: FheIntId,
    Clear: RecomposableSignedInteger,
--- a/tfhe/src/high_level_api/array/ops.rs
+++ b/tfhe/src/high_level_api/array/ops.rs
@@ -111,9 +111,8 @@ where
    }
 }

-impl<'a, 's, C1, Id> Not for &'a FheArrayBase<C1, Id>
+impl<C1, Id> Not for &FheArrayBase<C1, Id>
 where
-    'a: 's,
    Id: Default,
    C1: BackendDataContainer,
    C1::Backend: BitwiseArrayBackend,
--- a/tfhe/src/high_level_api/array/stride.rs
+++ b/tfhe/src/high_level_api/array/stride.rs
@@ -194,13 +194,13 @@ impl<'a, T> Iterator for StridedIter<'a, T> {
    }
 }

-impl<'a, T> ExactSizeIterator for StridedIter<'a, T> {
+impl<T> ExactSizeIterator for StridedIter<'_, T> {
    fn len(&self) -> usize {
        self.index_producer.len()
    }
 }

-impl<'a, T> DoubleEndedIterator for StridedIter<'a, T> {
+impl<T> DoubleEndedIterator for StridedIter<'_, T> {
    fn next_back(&mut self) -> Option<Self::Item> {
        let current_flat_index = self.index_producer.next_back()?;
        self.data.get(current_flat_index)
@@ -240,13 +240,13 @@ impl<'a, T> Iterator for CountedStridedIter<'a, T> {
    }
 }

-impl<'a, T> ExactSizeIterator for CountedStridedIter<'a, T> {
+impl<T> ExactSizeIterator for CountedStridedIter<'_, T> {
    fn len(&self) -> usize {
        self.max_count - self.current_count
    }
 }

-impl<'a, T> DoubleEndedIterator for CountedStridedIter<'a, T> {
+impl<T> DoubleEndedIterator for CountedStridedIter<'_, T> {
    fn next_back(&mut self) -> Option<Self::Item> {
        if self.current_count == 0 {
            None
@@ -290,7 +290,7 @@ where
    }
 }

-impl<'a, T> rayon::iter::IndexedParallelIterator for ParStridedIter<'a, T>
+impl<T> rayon::iter::IndexedParallelIterator for ParStridedIter<'_, T>
 where
    T: Send + Sync,
 {
@@ -407,7 +407,7 @@ impl<'a, T> Iterator for OffsettedStridedIterMut<'a, T> {
    }
 }

-impl<'a, T> DoubleEndedIterator for OffsettedStridedIterMut<'a, T> {
+impl<T> DoubleEndedIterator for OffsettedStridedIterMut<'_, T> {
    fn next_back(&mut self) -> Option<Self::Item> {
        if self.current_count == 0 {
            None
@@ -427,7 +427,7 @@ impl<'a, T> DoubleEndedIterator for OffsettedStridedIterMut<'a, T> {
    }
 }

-impl<'a, T> ExactSizeIterator for OffsettedStridedIterMut<'a, T> {
+impl<T> ExactSizeIterator for OffsettedStridedIterMut<'_, T> {
    fn len(&self) -> usize {
        ExactSizeIterator::len(&self.index_producer)
    }
@@ -466,7 +466,7 @@ where
    }
 }

-impl<'a, T> rayon::iter::IndexedParallelIterator for ParStridedIterMut<'a, T>
+impl<T> rayon::iter::IndexedParallelIterator for ParStridedIterMut<'_, T>
 where
    T: Send,
 {
--- a/tfhe/src/high_level_api/details.rs
+++ b/tfhe/src/high_level_api/details.rs
@@ -7,7 +7,7 @@ pub(crate) enum MaybeCloned<'a, T> {
    Cloned(T),
 }

-impl<'a, T> MaybeCloned<'a, T> {
+impl<T> MaybeCloned<'_, T> {
    pub(crate) fn into_owned(self) -> T
    where
        T: ToOwned<Owned = T>,
--- a/tfhe/src/high_level_api/integers/signed/tests.rs
+++ b/tfhe/src/high_level_api/integers/signed/tests.rs
@@ -549,7 +549,7 @@ fn test_trivial_fhe_int256_small() {
 fn test_compact_public_key_big() {
    let config = ConfigBuilder::default()
        .use_custom_parameters(
-            crate::shortint::parameters::classic::compact_pk::PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS,
+            crate::shortint::parameters::classic::compact_pk::PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
        )
        .build();
    let (client_key, _) = generate_keys(config);
@@ -569,7 +569,7 @@ fn test_compact_public_key_big() {
 fn test_compact_public_key_small() {
    let config = ConfigBuilder::default()
        .use_custom_parameters(
-            crate::shortint::parameters::classic::compact_pk::PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS,
+            crate::shortint::parameters::classic::compact_pk::PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64,
        )
        .build();
    let (client_key, _) = generate_keys(config);
--- a/tfhe/src/high_level_api/integers/unsigned/tests/cpu.rs
+++ b/tfhe/src/high_level_api/integers/unsigned/tests/cpu.rs
@@ -206,7 +206,7 @@ fn test_decompressed_public_key_encrypt() {
 #[test]
 fn test_compact_public_key_big() {
    let config = ConfigBuilder::default()
-        .use_custom_parameters(PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS)
+        .use_custom_parameters(PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64)
        .build();
    let (client_key, _) = generate_keys(config);

@@ -224,7 +224,7 @@ fn test_compact_public_key_big() {
 #[test]
 fn test_compact_public_key_small() {
    let config = ConfigBuilder::default()
-        .use_custom_parameters(PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS)
+        .use_custom_parameters(PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_PBS_KS_GAUSSIAN_2M64)
        .build();
    let (client_key, _) = generate_keys(config);

--- a/tfhe/src/high_level_api/keys/public.rs
+++ b/tfhe/src/high_level_api/keys/public.rs
@@ -275,7 +275,8 @@ impl ParameterSetConformant for CompressedCompactPublicKey {
 mod test {
    use crate::conformance::ParameterSetConformant;
    use crate::shortint::parameters::{
-        CompactPublicKeyEncryptionParameters, PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS,
+        CompactPublicKeyEncryptionParameters,
+        PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64,
    };
    use crate::{
        generate_keys, ClientKey, CompactPublicKey, CompressedCompactPublicKey, ConfigBuilder,
@@ -283,7 +284,7 @@ mod test {

    #[test]
    fn conformance_compact_public_key() {
-        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS;
+        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64;

        let config = ConfigBuilder::default()
            .use_custom_parameters(params)
@@ -319,7 +320,7 @@ mod test {

    #[test]
    fn conformance_compressed_compact_public_key() {
-        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS;
+        let params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64;

        let config = ConfigBuilder::default()
            .use_custom_parameters(params)
--- a/tfhe/src/high_level_api/mod.rs
+++ b/tfhe/src/high_level_api/mod.rs
@@ -129,35 +129,37 @@ pub enum Device {
 }

 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
+#[repr(i32)]
+#[cfg_attr(test, derive(strum::EnumIter))]
 pub enum FheTypes {
-    Bool,
-    Uint2,
-    Uint4,
-    Uint6,
-    Uint8,
-    Uint10,
-    Uint12,
-    Uint14,
-    Uint16,
-    Uint32,
-    Uint64,
-    Uint128,
-    Uint160,
-    Uint256,
-    Uint512,
-    Uint1024,
-    Uint2048,
-    Int2,
-    Int4,
-    Int6,
-    Int8,
-    Int10,
-    Int12,
-    Int14,
-    Int16,
-    Int32,
-    Int64,
-    Int128,
-    Int160,
-    Int256,
+    Bool = 0,
+    Uint4 = 1,
+    Uint8 = 2,
+    Uint16 = 3,
+    Uint32 = 4,
+    Uint64 = 5,
+    Uint128 = 6,
+    Uint160 = 7,
+    Uint256 = 8,
+    Uint512 = 9,
+    Uint1024 = 10,
+    Uint2048 = 11,
+    Uint2 = 12,
+    Uint6 = 13,
+    Uint10 = 14,
+    Uint12 = 15,
+    Uint14 = 16,
+    Int2 = 17,
+    Int4 = 18,
+    Int6 = 19,
+    Int8 = 20,
+    Int10 = 21,
+    Int12 = 22,
+    Int14 = 23,
+    Int16 = 24,
+    Int32 = 25,
+    Int64 = 26,
+    Int128 = 27,
+    Int160 = 28,
+    Int256 = 29,
 }
--- a/tfhe/src/high_level_api/tag.rs
+++ b/tfhe/src/high_level_api/tag.rs
@@ -203,7 +203,7 @@ impl serde::Serialize for SmallVec {

 struct SmallVecVisitor;

-impl<'de> serde::de::Visitor<'de> for SmallVecVisitor {
+impl serde::de::Visitor<'_> for SmallVecVisitor {
    type Value = SmallVec;

    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
--- a/tfhe/src/integer/ciphertext/compact_list.rs
+++ b/tfhe/src/integer/ciphertext/compact_list.rs
@@ -605,9 +605,9 @@ impl CompactCiphertextList {
    ///     RadixCiphertext, SignedRadixCiphertext,
    /// };
    /// use tfhe::integer::{ClientKey, CompactPublicKey};
-    /// use tfhe::shortint::parameters::classic::compact_pk::PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS;
+    /// use tfhe::shortint::parameters::classic::compact_pk::PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64;
    ///
-    /// let fhe_params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS;
+    /// let fhe_params = PARAM_MESSAGE_2_CARRY_2_COMPACT_PK_KS_PBS_GAUSSIAN_2M64;
    ///
    /// let num_blocks = 4usize;
    ///
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Agnes Leroy	155e810376	Put back device synchronize, change active_gpu_count	2024-12-16 11:07:55 +01:00
Agnes Leroy	aa16aaf3a0	DO NOT MERGE: remove device synchronization in drop for CudaVec to check the effect on multi-gpu throughput benchmarks	2024-12-16 10:29:10 +01:00
Arthur Meyre	3a99ee9718	chore: remove aliases for gaussian parameters for compact PK - we are TUniform by default so no more aliases for gaussian parameters	2024-12-16 09:50:12 +01:00
Agnes Leroy	86f07045fe	chore(gpu): run pbs in parallel in difference_check	2024-12-16 09:23:41 +01:00
Mayeul@Zama	b1ce34f8a7	chore(hlapi): stabilize FheTypes	2024-12-13 18:31:30 +01:00
Agnes Leroy	4388a3dc99	chore(gpu): add sxm5 vm target	2024-12-13 17:25:55 +01:00
Arthur Meyre	805436839d	fix(shortint): fix compression encoding change not being taken into account - this maps better to what was optimized and will dramatically diminish the pfail as we now have 2 more bits for the LUT redundancy	2024-12-13 16:41:13 +01:00
Arthur Meyre	bdbec55e84	chore: do not crash when ark-ff or wasm_bindgen macros have cfg issues	2024-12-13 16:31:25 +01:00
Arthur Meyre	33131c664a	chore(ci): toolchain update	2024-12-13 16:31:25 +01:00
Arthur Meyre	1151bb267e	chore: update dependencies	2024-12-13 16:31:25 +01:00
Agnes Leroy	ce9679f1ee	doc(gpu): add an example to use arrays on GPU	2024-12-13 10:46:28 +01:00
Agnes Leroy	23b43c33c7	fix(gpu): fix scalar ne	2024-12-12 11:26:51 +01:00
Agnes Leroy	6feaf49906	chore(gpu): remove stream sync in broadcast lut	2024-12-12 10:19:02 +01:00
Agnes Leroy	25f4e5f279	fix(gpu): fix equal	2024-12-12 09:21:44 +01:00
David Testé	c1f05cbf85	chore(ci): use composite action to setup hyperstack instance	2024-12-12 09:18:33 +01:00
Mayeul@Zama	382f44766c	chore(strings): remove string_ prefix on tests	2024-12-11 10:25:31 +01:00
Mayeul@Zama	c47b37c0e1	chore(strings): remove string_ prefix on methods	2024-12-11 10:25:31 +01:00
Mayeul@Zama	76d1f05e6a	refactor(strings): add server and client key wrappers	2024-12-11 10:25:31 +01:00