# Compile and test tfhe-cuda-backend on an AWS instance name: gpu_code_validation_tests env: CARGO_TERM_COLOR: always ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} RUSTFLAGS: "-C target-cpu=native" RUST_BACKTRACE: "full" RUST_MIN_STACK: "8388608" SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} SLACKIFY_MARKDOWN: true IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }} PULL_REQUEST_MD_LINK: "" CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }} # Secrets will be available only to zama-ai organization members SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }} EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04" on: # Allows you to run this workflow manually from the Actions tab as an alternative. workflow_dispatch: schedule: # every month - cron: "0 0 1 * *" permissions: contents: read # zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning jobs: setup-instance: name: gpu_code_validation_tests/setup-instance runs-on: ubuntu-latest if: github.event_name != 'pull_request' || (github.event.action == 'labeled' && github.event.label.name == 'approved') outputs: runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }} steps: - name: Start remote instance id: start-remote-instance if: env.SECRETS_AVAILABLE == 'true' uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2 with: mode: start github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} backend: hyperstack profile: single-h100 # This instance will be spawned especially for pull-request from forked repository - name: Start GitHub instance id: start-github-instance if: env.SECRETS_AVAILABLE == 'false' run: | echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT" cuda-tests-linux: name: gpu_code_validation_tests/cuda-tests-linux needs: [ setup-instance ] if: github.event_name != 'pull_request' || (github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped') concurrency: group: ${{ github.workflow_ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} runs-on: ${{ needs.setup-instance.outputs.runner-name }} timeout-minutes: 14400 strategy: fail-fast: false # explicit include-based build matrix, of known valid options matrix: include: - os: ubuntu-22.04 cuda: "12.8" gcc: 11 steps: - name: Checkout tfhe-rs uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 with: persist-credentials: 'false' token: ${{ env.CHECKOUT_TOKEN }} - name: Setup Hyperstack dependencies uses: ./.github/actions/gpu_setup with: cuda-version: ${{ matrix.cuda }} gcc-version: ${{ matrix.gcc }} github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }} - name: Find tools run: | sudo apt update && sudo apt install -y valgrind find /usr -executable -name "compute-sanitizer" which valgrind - name: Install latest stable uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases with: toolchain: stable - name: Run memory sanitizer run: | make test_high_level_api_gpu_valgrind slack-notify: name: gpu_code_validation_tests/slack-notify needs: [ setup-instance, cuda-tests-linux ] runs-on: ubuntu-latest if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }} continue-on-error: true steps: - name: Set pull-request URL if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request' run: | echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}" env: PR_BASE_URL: ${{ vars.PR_BASE_URL }} PR_NUMBER: ${{ github.event.pull_request.number }} - name: Send message if: env.SECRETS_AVAILABLE == 'true' uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 env: SLACK_COLOR: ${{ needs.cuda-tests-linux.result }} SLACK_MESSAGE: "GPU Memory Checks tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))" teardown-instance: name: gpu_code_validation_tests/teardown-instance if: ${{ always() && needs.setup-instance.result == 'success' }} needs: [ setup-instance, cuda-tests-linux ] runs-on: ubuntu-latest steps: - name: Stop remote instance id: stop-instance if: env.SECRETS_AVAILABLE == 'true' uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2 with: mode: stop github-token: ${{ secrets.SLAB_ACTION_TOKEN }} slab-url: ${{ secrets.SLAB_BASE_URL }} job-secret: ${{ secrets.JOB_SECRET }} label: ${{ needs.setup-instance.outputs.runner-name }} - name: Slack Notification if: ${{ failure() }} uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661 env: SLACK_COLOR: ${{ job.status }} SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"