name: gpu_integer_long_run_tests

env:
  CARGO_TERM_COLOR: always
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUSTFLAGS: "-C target-cpu=native"
  RUST_BACKTRACE: "full"
  RUST_MIN_STACK: "8388608"
  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
  CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
  IS_PR: ${{ github.event_name == 'pull_request' }}

on:
  # Allows you to run this workflow manually from the Actions tab as an alternative.
  workflow_dispatch:
  schedule:
    # Nightly tests will be triggered each evening 8p.m.
    - cron: "0 20 * * *"
  pull_request:


permissions:
  contents: read

# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning

jobs:
  setup-instance:
    name: gpu_integer_long_run_tests/setup-instance
    if: github.event_name != 'schedule' ||
      (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
    runs-on: ubuntu-latest
    outputs:
      runner-name: ${{ steps.start-instance.outputs.label }}
    steps:
      - name: Start instance
        id: start-instance
        uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
          backend: hyperstack
          profile: 4-l40

  cuda-tests:
    name: gpu_integer_long_run_tests/cuda-tests
    needs: [ setup-instance ]
    concurrency:
      group: ${{ github.workflow_ref }}_${{github.event_name}}
      cancel-in-progress: true
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
      fail-fast: false
      # explicit include-based build matrix, of known valid options
      matrix:
        include:
          - os: ubuntu-22.04
            cuda: "12.8"
            gcc: 11 
    timeout-minutes: 4320 # 72 hours
    steps:
      - name: Checkout tfhe-rs
        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
        with:
          persist-credentials: 'false'
          token: ${{ env.CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
        uses: ./.github/actions/gpu_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}

      - name: Install latest stable
        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: stable
      - name: Enable nvidia multi-process service
        run: |
          nvidia-cuda-mps-control -d
      - name: Run tests
        run: |
          if [[ "${IS_PR}" == "true" ]]; then
            make test_integer_short_run_gpu
          else
            make test_integer_long_run_gpu
          fi

  slack-notify:
    name: gpu_integer_long_run_tests/slack-notify
    needs: [ setup-instance, cuda-tests ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-tests.result != 'skipped' && failure() }}
    continue-on-error: true
    steps:
      - name: Send message
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ needs.cuda-tests.result }}
          SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: gpu_integer_long_run_tests/teardown-instance
    if: ${{ always() && needs.setup-instance.result == 'success' }}
    needs: [ setup-instance, cuda-tests ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
        uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
          label: ${{ needs.setup-instance.outputs.runner-name }}

      - name: Slack Notification
        if: ${{ failure() }}
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "Instance teardown (gpu-long-run-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"