name: gpu_integer_long_run_tests env: CARGO_TERM_COLOR: always ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} RUSTFLAGS: "-C target-cpu=native" RUST_BACKTRACE: "full" RUST_MIN_STACK: "8388608" SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }} IS_PR: ${{ github.event_name == 'pull_request' }} on: # Allows you to run this workflow manually from the Actions tab as an alternative. workflow_dispatch: schedule: # Nightly tests will be triggered each evening 8p.m. - cron: "0 20 * * *" pull_request: permissions: contents: read # zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning jobs: setup-instance: name: gpu_integer_long_run_tests/setup-instance if: github.event_name != 'schedule' || (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') runs-on: ubuntu-latest outputs: runner-name: ${{ steps.start-instance.outputs.label }} steps: - name: Start instance id: start-instance uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2 with: mode: start github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} backend: hyperstack profile: 4-l40 cuda-tests: name: gpu_integer_long_run_tests/cuda-tests needs: [ setup-instance ] concurrency: group: ${{ github.workflow_ref }}_${{github.event_name}} cancel-in-progress: true runs-on: ${{ needs.setup-instance.outputs.runner-name }} strategy: fail-fast: false # explicit include-based build matrix, of known valid options matrix: include: - os: ubuntu-22.04 cuda: "12.8" gcc: 11 timeout-minutes: 4320 # 72 hours steps: - name: Checkout tfhe-rs uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 with: persist-credentials: 'false' token: ${{ env.CHECKOUT_TOKEN }} - name: Setup Hyperstack dependencies uses: ./.github/actions/gpu_setup with: cuda-version: ${{ matrix.cuda }} gcc-version: ${{ matrix.gcc }} - name: Install latest stable uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases with: toolchain: stable - name: Enable nvidia multi-process service run: | nvidia-cuda-mps-control -d - name: Run tests run: | if [[ "${IS_PR}" == "true" ]]; then make test_integer_short_run_gpu else make test_integer_long_run_gpu fi slack-notify: name: gpu_integer_long_run_tests/slack-notify needs: [ setup-instance, cuda-tests ] runs-on: ubuntu-latest if: ${{ always() && needs.cuda-tests.result != 'skipped' && failure() }} continue-on-error: true steps: - name: Send message uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 env: SLACK_COLOR: ${{ needs.cuda-tests.result }} SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})" teardown-instance: name: gpu_integer_long_run_tests/teardown-instance if: ${{ always() && needs.setup-instance.result == 'success' }} needs: [ setup-instance, cuda-tests ] runs-on: ubuntu-latest steps: - name: Stop instance id: stop-instance uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2 with: mode: stop github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} label: ${{ needs.setup-instance.outputs.runner-name }} - name: Slack Notification if: ${{ failure() }} uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 env: SLACK_COLOR: ${{ job.status }} SLACK_MESSAGE: "Instance teardown (gpu-long-run-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"