Files
tfhe-rs/.github/workflows/gpu_signed_integer_classic_tests.yml
David Testé 25a8bbfd89 chore(ci): make slack notify fail in case of error on teardown
On instance teardown we want to be informed of any failure to
avoid having zombies running due to Slack notify action error
being silenced.
2025-11-21 17:15:58 +01:00

187 lines
7.4 KiB
YAML

# Signed integer GPU tests on an RTXA6000 VM on hyperstack with classical PBS
name: gpu_signed_integer_classic_tests
env:
CARGO_TERM_COLOR: always
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUSTFLAGS: "-C target-cpu=native"
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACKIFY_MARKDOWN: true
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
types: [ labeled ]
permissions:
contents: read
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
jobs:
should-run:
name: gpu_signed_integer_classic_tests/should-run
runs-on: ubuntu-latest
permissions:
pull-requests: read # Needed to check for file change
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62 # v47.0.0
with:
files_yaml: |
gpu:
- tfhe/Cargo.toml
- tfhe/build.rs
- backends/tfhe-cuda-backend/**
- tfhe/src/core_crypto/gpu/**
- tfhe/src/integer/server_key/radix_parallel/tests_unsigned/**
- tfhe/src/integer/server_key/radix_parallel/tests_signed/**
- tfhe/src/integer/server_key/radix_parallel/tests_cases_unsigned.rs
- tfhe/src/integer/gpu/**
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_signed_integer_classic_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
setup-instance:
name: gpu_signed_integer_classic_tests/setup-instance
needs: should-run
if: github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: gpu-test
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
cuda-tests-linux:
name: gpu_signed_integer_classic_tests/cuda-tests-linux
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow_ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
with:
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Run signed integer tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
slack-notify:
name: gpu_signed_integer_classic_tests/slack-notify
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
continue-on-error: true
steps:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: gpu_signed_integer_classic_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"