mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-09 14:47:56 -05:00
The term "bpr" means Branch Protection Rule. It helps one to identify any job that must pass before being able to merge to the base branch.
128 lines
4.5 KiB
YAML
128 lines
4.5 KiB
YAML
name: gpu_integer_long_run_tests
|
|
|
|
env:
|
|
CARGO_TERM_COLOR: always
|
|
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
|
RUSTFLAGS: "-C target-cpu=native"
|
|
RUST_BACKTRACE: "full"
|
|
RUST_MIN_STACK: "8388608"
|
|
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
|
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
|
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
|
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
|
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
|
IS_PR: ${{ github.event_name == 'pull_request' }}
|
|
|
|
on:
|
|
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
|
workflow_dispatch:
|
|
schedule:
|
|
# Nightly tests will be triggered each evening 8p.m.
|
|
- cron: "0 20 * * *"
|
|
pull_request:
|
|
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
jobs:
|
|
setup-instance:
|
|
name: gpu_integer_long_run_tests/setup-instance
|
|
if: github.event_name != 'schedule' ||
|
|
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
runner-name: ${{ steps.start-instance.outputs.label }}
|
|
steps:
|
|
- name: Start instance
|
|
id: start-instance
|
|
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
|
with:
|
|
mode: start
|
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
|
job-secret: ${{ secrets.JOB_SECRET }}
|
|
backend: hyperstack
|
|
profile: multi-gpu-test
|
|
|
|
cuda-tests:
|
|
name: gpu_integer_long_run_tests/cuda-tests
|
|
needs: [ setup-instance ]
|
|
concurrency:
|
|
group: ${{ github.workflow_ref }}_${{github.event_name}}
|
|
cancel-in-progress: true
|
|
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
|
strategy:
|
|
fail-fast: false
|
|
# explicit include-based build matrix, of known valid options
|
|
matrix:
|
|
include:
|
|
- os: ubuntu-22.04
|
|
cuda: "12.8"
|
|
gcc: 11
|
|
timeout-minutes: 4320 # 72 hours
|
|
steps:
|
|
- name: Checkout tfhe-rs
|
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
|
|
with:
|
|
persist-credentials: 'false'
|
|
token: ${{ env.CHECKOUT_TOKEN }}
|
|
|
|
- name: Setup Hyperstack dependencies
|
|
uses: ./.github/actions/gpu_setup
|
|
with:
|
|
cuda-version: ${{ matrix.cuda }}
|
|
gcc-version: ${{ matrix.gcc }}
|
|
|
|
- name: Install latest stable
|
|
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
|
with:
|
|
toolchain: stable
|
|
- name: Enable nvidia multi-process service
|
|
run: |
|
|
nvidia-cuda-mps-control -d
|
|
- name: Run tests
|
|
run: |
|
|
if [[ "${IS_PR}" == "true" ]]; then
|
|
make test_integer_short_run_gpu
|
|
else
|
|
make test_integer_long_run_gpu
|
|
fi
|
|
|
|
slack-notify:
|
|
name: gpu_integer_long_run_tests/slack-notify
|
|
needs: [ setup-instance, cuda-tests ]
|
|
runs-on: ubuntu-latest
|
|
if: ${{ always() && needs.cuda-tests.result != 'skipped' && failure() }}
|
|
continue-on-error: true
|
|
steps:
|
|
- name: Send message
|
|
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
|
env:
|
|
SLACK_COLOR: ${{ needs.cuda-tests.result }}
|
|
SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
|
|
|
teardown-instance:
|
|
name: gpu_integer_long_run_tests/teardown-instance
|
|
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
|
needs: [ setup-instance, cuda-tests ]
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Stop instance
|
|
id: stop-instance
|
|
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
|
with:
|
|
mode: stop
|
|
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
|
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
|
job-secret: ${{ secrets.JOB_SECRET }}
|
|
label: ${{ needs.setup-instance.outputs.runner-name }}
|
|
|
|
- name: Slack Notification
|
|
if: ${{ failure() }}
|
|
continue-on-error: true
|
|
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
|
env:
|
|
SLACK_COLOR: ${{ job.status }}
|
|
SLACK_MESSAGE: "Instance teardown (gpu-long-run-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|