tfhe-rs/.github/workflows/gpu_pcc.yml

# Perform tfhe-cuda-backend post-commit checks on an AWS instance
name: gpu_pcc

env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
  RUST_BACKTRACE: "full"
  RUST_MIN_STACK: "8388608"
  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  SLACKIFY_MARKDOWN: true
  PULL_REQUEST_MD_LINK: ""
  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
  # Secrets will be available only to zama-ai organization members
  SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
  EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16-22.04"
  CUDA_KEYRING_PACKAGE: cuda-keyring_1.1-1_all.deb
  CUDA_KEYRING_SHA: "d93190d50b98ad4699ff40f4f7af50f16a76dac3bb8da1eaaf366d47898ff8df"

on:
  pull_request:

permissions:
  contents: read

jobs:
  setup-instance:
    name: gpu_pcc/setup-instance
    runs-on: ubuntu-latest
    outputs:
      runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
    steps:
      - name: Start remote instance
        id: start-remote-instance
        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
          backend: aws
          profile: gpu-build

      # This instance will be spawned especially for pull-request from forked repository
      - name: Start GitHub instance
        id: start-github-instance
        if: env.SECRETS_AVAILABLE == 'false'
        run: |
          echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"

  cuda-pcc:
    name: gpu_pcc/cuda-pcc (bpr)
    needs: setup-instance
    concurrency:
      group: ${{ github.workflow_ref }}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
      fail-fast: false
      # explicit include-based build matrix, of known valid options
      matrix:
        include:
          - os: ubuntu-22.04
            cuda: "12.2"
            gcc: 9
    env:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}

    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Install CUDA
        if: env.SECRETS_AVAILABLE == 'false'
        shell: bash
        run: |
          # Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
          # shellcheck disable=SC2001
          TOOLKIT_VERSION="$(echo "${CUDA_VERSION}" | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
          wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/"${CUDA_KEYRING_PACKAGE}"
          echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
          sha256sum -c checksum
          sudo dpkg -i "${CUDA_KEYRING_PACKAGE}"
          sudo apt update
          sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format
        env:
          CUDA_VERSION: ${{ matrix.cuda }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable

      - name: Export CUDA variables
        if: ${{ !cancelled() }}
        run: |
          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
          echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
          echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc" >> "${GITHUB_ENV}"
        env:
          CUDA_VERSION: ${{ matrix.cuda }}

      # Specify the correct host compilers
      - name: Export gcc and g++ variables
        if: ${{ !cancelled() }}
        run: |
          {
            echo "CC=/usr/bin/gcc-${GCC_VERSION}";
            echo "CXX=/usr/bin/g++-${GCC_VERSION}";
            echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
          } >> "${GITHUB_ENV}"
        env:
          GCC_VERSION: ${{ matrix.gcc }}

      - name: Run fmt checks
        run: |
          make check_fmt_gpu

      - name: Run clippy checks
        run: |
          make pcc_gpu

      - name: Check build with hpu enabled
        run: |
          make clippy_gpu_hpu

      - name: Set pull-request URL
        if: ${{ failure() && github.event_name == 'pull_request' }}
        run: |
          echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), "  >> "${GITHUB_ENV}"
        env:
          PR_BASE_URL: ${{ vars.PR_BASE_URL }}
          PR_NUMBER: ${{ github.event.pull_request.number }}

      - name: Slack Notification
        if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
        continue-on-error: true
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"

  teardown-instance:
    name: cuda_pcc/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-pcc ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop remote instance
        id: stop-instance
        if: env.SECRETS_AVAILABLE == 'true'
        uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
          label: ${{ needs.setup-instance.outputs.runner-name }}

      - name: Slack Notification
        if: ${{ failure() }}
        continue-on-error: true
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"