# Run benchmarks on CUDA instance and return parsed results to Slab CI bot.
name: benchmark_gpu_common

on:
  workflow_call:
    inputs:
      backend:
        type: string
        default: hyperstack
      profile:
        type: string
        required: true
      hardware_name:
        type: string
        required: true
      command: # Use a comma separated values to generate an array
        type: string
        required: true
      op_flavor: # Use a comma separated values to generate an array
        type: string
        default: default
      bench_type:
        type: string
        default: latency
      params_type:
        type: string
        default: multi_bit
      precisions_set:
        type: string
        default: fast
    secrets:
      REPO_CHECKOUT_TOKEN:
        required: true
      SLAB_ACTION_TOKEN:
        required: true
      SLAB_BASE_URL:
        required: true
      SLAB_URL:
        required: true
      JOB_SECRET:
        required: true
      SLACK_CHANNEL:
        required: true
      BOT_USERNAME:
        required: true
      SLACK_WEBHOOK:
        required: true

env:
  CARGO_TERM_COLOR: always
  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
  ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
  RUST_BACKTRACE: "full"
  RUST_MIN_STACK: "8388608"
  SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
  SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
  SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
  SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

permissions: {}

# zizmor: ignore[concurrency-limits] caller workflow is responsible for the concurrency

jobs:
  prepare-matrix:
    name: benchmark_gpu_common/prepare-matrix
    runs-on: ubuntu-latest
    outputs:
      command: ${{ steps.set_matrix_args.outputs.command }}
      op_flavor: ${{ steps.set_matrix_args.outputs.op_flavor }}
      bench_type: ${{ steps.set_matrix_args.outputs.bench_type }}
      params_type: ${{ steps.set_matrix_args.outputs.params_type }}
    steps:
      - name: Parse user inputs
        shell: python
        env:
          INPUTS_COMMAND: ${{ inputs.command }}
          INPUTS_OP_FLAVOR: ${{ inputs.op_flavor }}
          INPUTS_BENCH_TYPE: ${{ inputs.bench_type }}
          INPUTS_PARAMS_TYPE: ${{ inputs.params_type }}
        run: |
          import os

          inputs_command = os.environ["INPUTS_COMMAND"]
          inputs_op_flavor = os.environ["INPUTS_OP_FLAVOR"]
          inputs_bench_type = os.environ["INPUTS_BENCH_TYPE"]
          inputs_params_type = os.environ["INPUTS_PARAMS_TYPE"]
          env_file = os.environ["GITHUB_ENV"]

          split_command = inputs_command.replace(" ", "").split(",")
          split_op_flavor = inputs_op_flavor.replace(" ", "").split(",")

          if inputs_bench_type == "both":
            bench_type = ["latency", "throughput"]
          else:
            bench_type = [inputs_bench_type, ]

          if "+" in inputs_params_type:
            split_params_type= inputs_params_type.replace(" ", "").split("+")
          else:
            split_params_type = [inputs_params_type, ]

          with open(env_file, "a") as f:
            for env_name, values_to_join in [
              ("COMMAND", split_command),
              ("OP_FLAVOR", split_op_flavor),
              ("BENCH_TYPE", bench_type),
              ("PARAMS_TYPE", split_params_type),
            ]:
              f.write(f"""{env_name}=["{'", "'.join(values_to_join)}"]\n""")

      - name: Set martix arguments outputs
        id: set_matrix_args
        run: | # zizmor: ignore[template-injection] these env variable are safe
          {
            echo "command=${{ toJSON(env.COMMAND) }}";
            echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}";
            echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}";
            echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}";
          } >> "${GITHUB_OUTPUT}"

  setup-instance:
    name: benchmark_gpu_common/setup-instance
    needs: prepare-matrix
    runs-on: ubuntu-latest
    outputs:
      # Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
      # If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
      # Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
      # otherwise we'll try to run the next job on a non-existing on-demand instance.
      runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
      remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
    steps:
      - name: Start remote instance
        id: start-remote-instance
        continue-on-error: true
        uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
        with:
          mode: start
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
          backend: ${{ inputs.backend }}
          profile: ${{ inputs.profile }}

      - name: Acknowledge remote instance failure
        if: steps.start-remote-instance.outcome == 'failure' &&
          inputs.profile != 'single-h100'
        run: |
          echo "Remote instance instance has failed to start (profile provided: '${INPUTS_PROFILE}')"
          echo "Permanent instance instance cannot be used as a substitute (profile needed: 'single-h100')"
          exit 1
        env:
          INPUTS_PROFILE: ${{ inputs.profile }}

      # This will allow to fallback on permanent instances running on Hyperstack.
      - name: Use permanent remote instance
        id: use-permanent-instance
        if: env.SECRETS_AVAILABLE == 'true' &&
          steps.start-remote-instance.outcome == 'failure' &&
          inputs.profile == 'single-h100'
        run: |
          echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"

  # Install dependencies only once since cuda-benchmarks uses a matrix strategy, thus running multiple times.
  install-dependencies:
    name: benchmark_gpu_common/install-dependencies
    needs: [ setup-instance ]
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    strategy:
      matrix:
        # explicit include-based build matrix, of known valid options
        include:
          - cuda: "12.8"
            gcc: 11
    steps:
      - name: Checkout tfhe-rs repo
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Setup Hyperstack dependencies
        if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
        uses: ./.github/actions/gpu_setup
        with:
          cuda-version: ${{ matrix.cuda }}
          gcc-version: ${{ matrix.gcc }}

  cuda-benchmarks:
    name: benchmark_gpu_common/cuda-benchmarks
    needs: [ prepare-matrix, setup-instance, install-dependencies ]
    runs-on: ${{ needs.setup-instance.outputs.runner-name }}
    timeout-minutes: 1440 # 24 hours
    strategy:
      fail-fast: false
      max-parallel: 1
      matrix:
        command: ${{ fromJSON(needs.prepare-matrix.outputs.command) }}
        op_flavor: ${{ fromJSON(needs.prepare-matrix.outputs.op_flavor) }}
        bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
        params_type: ${{ fromJSON(needs.prepare-matrix.outputs.params_type) }}
        # explicit include-based build matrix, of known valid options
        include:
          - cuda: "12.8"
            gcc: 11
    env:
      CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
    steps:
      - name: Checkout tfhe-rs repo with tags
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          fetch-depth: 0
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Get benchmark details
        run: |
          COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
          {
            echo "BENCH_DATE=$(date --iso-8601=seconds)";
            echo "COMMIT_DATE=${COMMIT_DATE}";
            echo "COMMIT_HASH=$(git describe --tags --dirty)";
          } >> "${GITHUB_ENV}"
        env:
          SHA: ${{ github.sha }}

      # Re-export environment variables as dependencies setup perform this task in the previous job.
      # Local env variables are cleaned at the end of each job.
      - name: Export CUDA variables
        shell: bash
        run: |
          echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
          echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
          echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
          echo "CUDA_MODULE_LOADER=EAGER" >> "${GITHUB_ENV}"

      - name: Export gcc and g++ variables
        shell: bash
        run: |
          {
          echo "CC=/usr/bin/gcc-${GCC_VERSION}";
          echo "CXX=/usr/bin/g++-${GCC_VERSION}";
          echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
          } >> "${GITHUB_ENV}"
        env:
          GCC_VERSION: ${{ matrix.gcc }}

      - name: Install rust
        uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
        with:
          toolchain: nightly

      - name: Run benchmarks
        run: |
          make BIT_SIZES_SET="${PRECISIONS_SET}" BENCH_OP_FLAVOR="${OP_FLAVOR}" BENCH_TYPE="${BENCH_TYPE}" BENCH_PARAM_TYPE="${BENCH_PARAMS_TYPE}" bench_"${BENCH_COMMAND}"_gpu
        env:
          OP_FLAVOR: ${{ matrix.op_flavor }}
          BENCH_TYPE: ${{ matrix.bench_type }}
          BENCH_PARAMS_TYPE: ${{ matrix.params_type }}
          BENCH_COMMAND: ${{ matrix.command }}
          PRECISIONS_SET: ${{ inputs.precisions_set }}

      - name: Parse results
        run: |
          python3 ./ci/benchmark_parser.py target/criterion "${RESULTS_FILENAME}" \
          --database tfhe_rs \
          --hardware "${INPUTS_HARDWARE_NAME}" \
          --backend gpu \
          --project-version "${COMMIT_HASH}" \
          --branch "${REF_NAME}" \
          --commit-date "${COMMIT_DATE}" \
          --bench-date "${BENCH_DATE}" \
          --walk-subdirs \
          --name-suffix avx512 \
          --bench-type "${BENCH_TYPE}"
        env:
          INPUTS_HARDWARE_NAME: ${{ inputs.hardware_name }}
          REF_NAME: ${{ github.ref_name }}
          BENCH_TYPE: ${{ matrix.bench_type }}

      - name: Upload parsed results artifact
        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
        with:
          name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
          path: ${{ env.RESULTS_FILENAME }}

      - name: Checkout Slab repo
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
        with:
          repository: zama-ai/slab
          path: slab
          persist-credentials: 'false'
          token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

      - name: Send data to Slab
        shell: bash
        run: |
          python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
          --slab-url "${SLAB_URL}"
        env:
          JOB_SECRET: ${{ secrets.JOB_SECRET }}
          SLAB_URL: ${{ secrets.SLAB_URL }}

  slack-notify:
    name: benchmark_gpu_common/slack-notify
    needs: [ setup-instance, cuda-benchmarks ]
    runs-on: ubuntu-latest
    if: ${{ always() && needs.cuda-benchmarks.result != 'skipped' && failure() }}
    continue-on-error: true
    steps:
      - name: Send message
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ needs.cuda-benchmarks.result }}
          SLACK_MESSAGE: "Cuda benchmarks (${{ inputs.profile }}) finished with status: ${{ needs.cuda-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"

  teardown-instance:
    name: benchmark_gpu_common/teardown-instance
    if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
    needs: [ setup-instance, cuda-benchmarks, slack-notify ]
    runs-on: ubuntu-latest
    steps:
      - name: Stop instance
        id: stop-instance
        uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
        with:
          mode: stop
          github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
          slab-url: ${{ secrets.SLAB_BASE_URL }}
          job-secret: ${{ secrets.JOB_SECRET }}
          label: ${{ needs.setup-instance.outputs.runner-name }}

      - name: Slack Notification
        if: ${{ failure() }}
        uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
        env:
          SLACK_COLOR: ${{ job.status }}
          SLACK_MESSAGE: "Instance teardown (cuda-${{ inputs.profile }}-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"