From 36eceaf05ec53028b62316873870c3d5fe8c24f1 Mon Sep 17 00:00:00 2001
From: Andrei Stoian <andrei.stoian@zama.ai>
Date: Tue, 15 Jul 2025 11:25:29 +0200
Subject: [PATCH] feat(gpu): utility debug workflows in ci

---
 .github/actions/gpu_setup/action.yml          |   8 +
 .../workflows/gpu_code_validation_tests.yml   | 148 ++++++++++++++++++
 Makefile                                      |  18 ++-
 .../tfhe-cuda-backend/cuda/CMakeLists.txt     |   4 +-
 .../cuda/src/integer/cmux.cu                  |   3 +-
 scripts/check_memory_errors.sh                |  27 ++++
 6 files changed, 205 insertions(+), 3 deletions(-)
 create mode 100644 .github/workflows/gpu_code_validation_tests.yml
 create mode 100755 scripts/check_memory_errors.sh

diff --git a/.github/actions/gpu_setup/action.yml b/.github/actions/gpu_setup/action.yml
index 3c50b7272..6e788a447 100644
--- a/.github/actions/gpu_setup/action.yml
+++ b/.github/actions/gpu_setup/action.yml
@@ -23,6 +23,7 @@ runs:
         echo "${CMAKE_SCRIPT_SHA} cmake-${CMAKE_VERSION}-linux-x86_64.sh" > checksum
         sha256sum -c checksum
         sudo bash cmake-"${CMAKE_VERSION}"-linux-x86_64.sh --skip-license --prefix=/usr/ --exclude-subdir
+        sudo apt remove -y unattended-upgrades
         sudo apt update
         sudo apt install -y cmake-format libclang-dev
       env:
@@ -50,11 +51,13 @@ runs:
     - name: Export CUDA variables
       shell: bash
       run: |
+        find /usr/local -executable -name "nvcc"
         CUDA_PATH=/usr/local/cuda-"${CUDA_VERSION}"
         {
           echo "CUDA_PATH=$CUDA_PATH";
           echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH";
           echo "CUDA_MODULE_LOADER=EAGER";
+          echo "PATH=$PATH:$CUDA_PATH/bin"; 
         } >> "${GITHUB_ENV}"
         {
           echo "PATH=$PATH:$CUDA_PATH/bin"; 
@@ -74,6 +77,11 @@ runs:
       env:
         GCC_VERSION: ${{ inputs.gcc-version }}
 
+    - name: Check setup
+      shell: bash
+      run: |
+        which nvcc
+
     - name: Check device is detected
       shell: bash
       run: nvidia-smi
diff --git a/.github/workflows/gpu_code_validation_tests.yml b/.github/workflows/gpu_code_validation_tests.yml
new file mode 100644
index 000000000..a70d145e4
--- /dev/null
+++ b/.github/workflows/gpu_code_validation_tests.yml
@@ -0,0 +1,148 @@
+# Compile and test tfhe-cuda-backend on an AWS instance
+name: Cuda - Code Validation
+
+env:
+  CARGO_TERM_COLOR: always
+  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+  RUSTFLAGS: "-C target-cpu=native"
+  RUST_BACKTRACE: "full"
+  RUST_MIN_STACK: "8388608"
+  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
+  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
+  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
+  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+  SLACKIFY_MARKDOWN: true
+  IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
+  PULL_REQUEST_MD_LINK: ""
+  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
+  # Secrets will be available only to zama-ai organization members
+  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
+  EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
+
+on:
+  # Allows you to run this workflow manually from the Actions tab as an alternative.
+  workflow_dispatch:
+  pull_request:
+    types: [ labeled ]
+
+permissions:
+  contents: read
+
+jobs:
+  setup-instance:
+    name: Setup instance (cuda-tests)
+    runs-on: ubuntu-latest
+    if: github.event_name != 'pull_request' ||
+      (github.event.action == 'labeled' && github.event.label.name == 'approved')
+    outputs:
+      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
+    steps:
+      - name: Start remote instance
+        id: start-remote-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: start
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          backend: hyperstack
+          profile: gpu-test
+
+      # This instance will be spawned especially for pull-request from forked repository
+      - name: Start GitHub instance
+        id: start-github-instance
+        if: env.SECRETS_AVAILABLE == 'false'
+        run: |
+          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
+
+  cuda-tests-linux:
+    name: CUDA Code Validation tests
+    needs: [ setup-instance ]
+    if: github.event_name != 'pull_request' ||
+      (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
+    concurrency:
+      group: ${{ github.workflow_ref }}
+      cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
+    strategy:
+      fail-fast: false
+      # explicit include-based build matrix, of known valid options
+      matrix:
+        include:
+          - os: ubuntu-22.04
+            cuda: "12.8"
+            gcc: 11 
+    steps:
+      - name: Checkout tfhe-rs
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          persist-credentials: 'false'
+          token: ${{ env.CHECKOUT_TOKEN }}
+
+      - name: Setup Hyperstack dependencies
+        uses: ./.github/actions/gpu_setup
+        with:
+          cuda-version: ${{ matrix.cuda }}
+          gcc-version: ${{ matrix.gcc }}
+          github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
+
+      - name: Find tools
+        run: |
+          find /usr -executable -name "compute-sanitizer"
+
+      - name: Install latest stable
+        uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
+        with:
+          toolchain: stable
+
+      - name: Run memory sanitizer
+        run: |
+          make test_high_level_api_gpu_debug
+
+  slack-notify:
+    name: Slack Notification
+    needs: [ setup-instance, cuda-tests-linux ]
+    runs-on: ubuntu-latest
+    if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
+    continue-on-error: true
+    steps:
+      - name: Set pull-request URL
+        if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
+        run: |
+          echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), "  >> "${GITHUB_ENV}"
+        env:
+          PR_BASE_URL: ${{ vars.PR_BASE_URL }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+
+      - name: Send message
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
+          SLACK_MESSAGE: "GPU code validation tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
+
+  teardown-instance:
+    name: Teardown instance (cuda-tests)
+    if: ${{ always() && needs.setup-instance.result == 'success' }}
+    needs: [ setup-instance, cuda-tests-linux ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Stop remote instance
+        id: stop-instance
+        if: env.SECRETS_AVAILABLE == 'true'
+        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
+        with:
+          mode: stop
+          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
+          slab-url: ${{ secrets.SLAB_BASE_URL }}
+          job-secret: ${{ secrets.JOB_SECRET }}
+          label: ${{ needs.setup-instance.outputs.runner-name }}
+
+      - name: Slack Notification
+        if: ${{ failure() }}
+        continue-on-error: true
+        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
+        env:
+          SLACK_COLOR: ${{ job.status }}
+          SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
diff --git a/Makefile b/Makefile
index be7152ce2..3e0a50368 100644
--- a/Makefile
+++ b/Makefile
@@ -686,6 +686,22 @@ test_integer_gpu_debug: install_rs_build_toolchain
 	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile release_lto_off \
 		--features=integer,gpu-debug -p $(TFHE_SPEC) -- integer::gpu::server_key::
 
+.PHONY: test_high_level_api_gpu_debug # Run the tests of the integer module with Debug flags for CUDA
+test_high_level_api_gpu_debug: install_rs_build_toolchain install_cargo_nextest
+	export RUSTFLAGS="$(RUSTFLAGS)" && \
+	export CARGO_RS_BUILD_TOOLCHAIN="$(CARGO_RS_BUILD_TOOLCHAIN)" && \
+	export TFHE_SPEC="$(TFHE_SPEC)" && \
+	export CARGO_PROFILE="$(CARGO_PROFILE)" &&	scripts/check_memory_errors.sh
+
+.PHONY: test_integer_hl_test_gpu_check_warnings
+test_integer_hl_test_gpu_check_warnings: install_rs_build_toolchain
+	RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build \
+		--features=integer,internal-keycache,gpu-debug,zk-pok -vv -p $(TFHE_SPEC) &> /tmp/gpu_compile_output
+	WARNINGS=$$(cat /tmp/gpu_compile_output | grep ": warning:" | grep "\[tfhe-cuda-backend" | grep -v "inline function" || true) && \
+	if [[ "$${WARNINGS}" != "" ]]; then \
+		echo "$${WARNINGS}" && exit 1; \
+	fi
+
 
 .PHONY: test_integer_long_run_gpu # Run the long run integer tests on the gpu backend
 test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
@@ -1591,7 +1607,7 @@ tfhe_lints
 
 .PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
 pcc_gpu: check_rust_bindings_did_not_change clippy_rustdoc_gpu \
-clippy_gpu clippy_cuda_backend clippy_bench_gpu check_compile_tests_benches_gpu
+clippy_gpu clippy_cuda_backend clippy_bench_gpu check_compile_tests_benches_gpu test_integer_hl_test_gpu_check_warnings
 
 .PHONY: pcc_hpu # pcc stands for pre commit checks for HPU compilation
 pcc_hpu: clippy_hpu clippy_hpu_backend clippy_hpu_mockup test_integer_hpu_mockup_ci_fast
diff --git a/backends/tfhe-cuda-backend/cuda/CMakeLists.txt b/backends/tfhe-cuda-backend/cuda/CMakeLists.txt
index 0c52f99eb..c027f909f 100644
--- a/backends/tfhe-cuda-backend/cuda/CMakeLists.txt
+++ b/backends/tfhe-cuda-backend/cuda/CMakeLists.txt
@@ -78,8 +78,10 @@ endif()
 
 add_compile_definitions(CUDA_ARCH=${CUDA_ARCH})
 
+string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWERCASE)
+
 # Check if the DEBUG flag is defined
-if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+if(CMAKE_BUILD_TYPE_LOWERCASE STREQUAL "debug")
   # Debug mode
   message("Compiling in Debug mode")
   add_definitions(-DDEBUG)
diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
index 733befdd0..6fb141200 100644
--- a/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
+++ b/backends/tfhe-cuda-backend/cuda/src/integer/cmux.cu
@@ -17,11 +17,12 @@ uint64_t scratch_cuda_integer_radix_cmux_kb_64(
   std::function<uint64_t(uint64_t)> predicate_lut_f =
       [](uint64_t x) -> uint64_t { return x == 1; };
 
-  return scratch_cuda_integer_radix_cmux_kb<uint64_t>(
+  uint64_t ret = scratch_cuda_integer_radix_cmux_kb<uint64_t>(
       (cudaStream_t *)(streams), gpu_indexes, gpu_count,
       (int_cmux_buffer<uint64_t> **)mem_ptr, predicate_lut_f,
       lwe_ciphertext_count, params, allocate_gpu_memory);
   POP_RANGE()
+  return ret;
 }
 
 void cuda_cmux_integer_radix_ciphertext_kb_64(
diff --git a/scripts/check_memory_errors.sh b/scripts/check_memory_errors.sh
new file mode 100755
index 000000000..3ce45379b
--- /dev/null
+++ b/scripts/check_memory_errors.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# Build the tests but don't run them
+RUSTFLAGS="$RUSTFLAGS" cargo "${CARGO_RS_BUILD_TOOLCHAIN}" test --no-run --profile "${CARGO_PROFILE}" \
+  --features=integer,internal-keycache,gpu,zk-pok -p "${TFHE_SPEC}"
+
+# Find the test executable
+EXECUTABLE=$(find target/release/deps/ -type f -executable -name "tfhe-*")
+
+# List the tests into a temporary file
+RUSTFLAGS="$RUSTFLAGS" cargo "${CARGO_RS_BUILD_TOOLCHAIN}" nextest list --cargo-profile "${CARGO_PROFILE}" \
+          --features=integer,internal-keycache,gpu,zk-pok -p "${TFHE_SPEC}" &> /tmp/test_list.txt
+
+# Filter the tests to get only the HL ones
+TESTS_HL=$(sed -e $'s/\x1b\[[0-9;]*m//g' <  /tmp/test_list.txt | grep 'high_level_api::.*gpu.*')
+
+# Run compute sanitizer on each test individually
+# shellcheck disable=SC2181
+RESULT=0 && \
+while read -r t; do \
+  echo compute-sanitizer --target-processes=all "$(pwd)"/"${EXECUTABLE}" -- "${t}" && \
+  compute-sanitizer --leak-check=full --error-exitcode=1 --target-processes=all "$(pwd)"/"${EXECUTABLE}" -- "${t}" && \
+  if [[ $? != "0" ]]; then \
+      RESULT=1; \
+  fi; \
+done <<< "${TESTS_HL}"
+
+exit $RESULT