chore(ci): run ml benchmarks in a matrix with slab

This CI "feature" is meant to circumvent the 6 hours hard-limit for a job in GitHub Action. The benchmark is done using a matrix which is handled by Slab. Here's the workflow: 1. ML benchmarks are started in a fire and forget fashion via start_ml_benchmarks.yml 2. Slab will read ci/slab.toml to get the AWS EC2 configuration and the matrix parameters 3. Slab will launch at most max_parallel_jobs EC2 instances in parallel 4. Each job will trigger ml_benchmark_subset.yml which will run only one of the generated YAML file via make generate-mlbench, based on the value of the matrix item they were given. 5. As soon as a job is completed, the next one in the matrix will start promptly. This is done until all the matrix items are exhausted.
2026-02-08 19:44:57 -05:00 · 2022-11-21 10:13:30 +01:00
parent 8e4be10eb9
commit 3c2a75186f
6 changed files with 191 additions and 8 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -15,13 +15,6 @@ on:
        options:
          - m6i.metal
          - c6a.metal
-  # Have a weekly benchmark run on main branch to be available on Monday morning (Paris time)
-# TODO: uncomment this section once MLBenchmarks are implemented
-#  schedule:
-#    # * is a special character in YAML so you have to quote this string
-#    # At 1:00 every Thursday
-#    # Timezone is UTC, so Paris time is +2 during the summer and +1 during winter
-#    - cron: '0 1 * * THU'

 env:
  CARGO_TERM_COLOR: always
--- a/.github/workflows/ml_benchmark_subset.yml
+++ b/.github/workflows/ml_benchmark_subset.yml
@@ -0,0 +1,135 @@
+# Run one of the ML benchmarks on an AWS instance and return parsed results to Slab CI bot.
+name: Application benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      instance_id:
+        description: 'Instance ID'
+        type: string
+      instance_image_id:
+        description: 'Instance AMI ID'
+        type: string
+      instance_type:
+        description: 'Instance product type'
+        type: string
+      runner_name:
+        description: 'Action runner name'
+        type: string
+      request_id:
+        description: 'Slab request ID'
+        type: string
+      matrix_item:
+        description: 'Build matrix item'
+        type: string
+
+
+env:
+  CARGO_TERM_COLOR: always
+  RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
+
+jobs:
+  run-ml-benchmarks:
+    name: Execute ML benchmarks subset in EC2
+    runs-on: ${{ github.event.inputs.runner_name }}
+    if: ${{ !cancelled() }}
+    steps:
+      - name: Instance configuration used
+        run: |
+          echo "IDs: ${{ inputs.instance_id }}"
+          echo "AMI: ${{ inputs.instance_image_id }}"
+          echo "Type: ${{ inputs.instance_type }}"
+          echo "Request ID: ${{ inputs.request_id }}"
+          echo "Matrix item: ${{ inputs.matrix_item }}"
+
+      - name: Get benchmark date
+        run: |
+          echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
+
+      # SSH private key is required as some dependencies are from private repos
+      - uses: webfactory/ssh-agent@v0.5.2
+        with:
+          ssh-private-key: ${{ secrets.CONCRETE_COMPILER_CI_SSH_PRIVATE }}
+
+      - name: Fetch submodules
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          submodules: recursive
+          token: ${{ secrets.GH_TOKEN }}
+
+      - name: Set up home
+        # "Install rust" step require root user to have a HOME directory which is not set.
+        run: |
+          echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
+
+      - name: Install rust
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+          override: true
+
+      - name: Build compiler and ML benchmarks
+        run: |
+          set -e
+          cd compiler
+          make BINDINGS_PYTHON_ENABLED=OFF build-mlbench
+
+      - name: Download KeySetCache
+        if: ${{ !contains(github.head_ref, 'newkeysetcache') }}
+        continue-on-error: true
+        run: |
+          cd compiler
+          GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} make keysetcache_ci_populated
+
+      - name: Run ML benchmarks
+        run: |
+          set -e
+          cd compiler
+          make BINDINGS_PYTHON_ENABLED=OFF ML_BENCH_SUBSET_ID=${{ inputs.matrix_item }} run-mlbench-subset
+
+      - name: Upload raw results artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ github.sha }}_raw
+          path: compiler/benchmarks_results.json
+
+      - name: Parse results
+        shell: bash
+        run: |
+          COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
+          COMMIT_HASH="$(git describe --tags --dirty)"
+          python3 ./ci/benchmark_parser.py compiler/benchmarks_results.json ${{ env.RESULTS_FILENAME }} \
+          --schema compiler_benchmarks \
+          --hardware ${{ inputs.instance_type }} \
+          --project-version ${COMMIT_HASH} \
+          --branch ${{ github.ref_name }} \
+          --commit-date ${COMMIT_DATE} \
+          --bench-date "${{ env.BENCH_DATE }}"
+
+      - name: Upload parsed results artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ github.sha }}
+          path: ${{ env.RESULTS_FILENAME }}
+
+      - name: Checkout Slab repo
+        uses: actions/checkout@v3
+        with:
+          repository: zama-ai/slab
+          path: slab
+          token: ${{ secrets.GH_TOKEN }}
+
+      - name: Send data to Slab
+        shell: bash
+        run: |
+          echo "Computing HMac on downloaded artifact"
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
+          echo "Sending results to Slab..."
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: store_data" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ env.RESULTS_FILENAME }} \
+          ${{ secrets.SLAB_URL }}
--- a/.github/workflows/start_ml_benchmarks.yml
+++ b/.github/workflows/start_ml_benchmarks.yml
@@ -0,0 +1,30 @@
+# Start application benchmarks job on Slab CI bot.
+name: Start ML benchmarks
+
+on:
+  workflow_dispatch:
+
+  # Have a weekly benchmark run on main branch to be available on Monday morning (Paris time)
+# TODO: uncomment this section once MLBenchmarks are implemented
+#  schedule:
+#    # * is a special character in YAML so you have to quote this string
+#    # At 1:00 every Thursday
+#    # Timezone is UTC, so Paris time is +2 during the summer and +1 during winter
+#    - cron: '0 1 * * THU'
+
+jobs:
+  start-ml-benchmarks:
+    steps:
+      - name: Start AWS job in Slab
+        shell: bash
+        # TODO: step result must be correlated to HTTP return code.
+        run: |
+          PAYLOAD='{"command": "ml-bench", "git_ref": ${{ github.ref }}, "sha": ${{ github.sha }}}'
+          SIGNATURE="$(slab/scripts/hmac_calculator.sh ${PAYLOAD} '${{ secrets.JOB_SECRET }}')"
+          curl -v -k \
+          -H "Content-Type: application/json" \
+          -H "X-Slab-Repository: ${{ github.repository }}" \
+          -H "X-Slab-Command: start_data" \
+          -H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
+          -d @${{ PAYLOAD }} \
+          ${{ secrets.SLAB_URL }}
--- a/ci/slab.toml
+++ b/ci/slab.toml
@@ -0,0 +1,13 @@
+[profile.m6i-bench]
+region = "eu-west-3"
+image_id = "ami-0a24aaee029d1295c"
+instance_type = "m6i.metal"
+subnet_id = "subnet-a886b4c1"
+security_group= ["sg-0bf1c1d79c97bc88f", ]
+
+# Trigger ML benchmarks by running each use cases subset in parallel.
+[command.ml-bench]
+workflow = "ml_benchmark_subset.yml"
+profile = "m6i-bench"
+matrix = [0,1,2,3,4,5,6,7,8,9,10]
+max_parallel_jobs = 2
--- a/compiler/Makefile
+++ b/compiler/Makefile
@@ -23,6 +23,8 @@ HPX_INSTALL_DIR?=$(HPX_LOCAL_DIR)/build

 CONCRETE_CORE_FFI_VERSION?=0.2.0

+ML_BENCH_SUBSET_ID=
+
 OS=undefined
 ifeq ($(shell uname), Linux)
 	OS=linux
@@ -290,6 +292,10 @@ generate-mlbench:
 run-mlbench: build-mlbench generate-mlbench
 	tests/end_to_end_benchmarks/end_to_end_mlbench.sh tests/end_to_end_benchmarks/mlbench/ $(BUILD_DIR)/bin/end_to_end_mlbench

+run-mlbench-subset: build-mlbench generate-mlbench
+	@[ "${ML_BENCH_SUBSET_ID}" ] || ( echo "ML_BENCH_SUBSET_ID is not set"; exit 1 )
+	tests/end_to_end_benchmarks/end_to_end_mlbench.sh tests/end_to_end_benchmarks/mlbench/end_to_end_mlbench_$(ML_BENCH_SUBSET_ID).yaml $(BUILD_DIR)/bin/end_to_end_mlbench
+
 show-stress-tests-summary:
 	@echo '------ Stress tests summary ------'
 	@echo
--- a/compiler/tests/end_to_end_benchmarks/end_to_end_mlbench.sh
+++ b/compiler/tests/end_to_end_benchmarks/end_to_end_mlbench.sh
@@ -1,3 +1,9 @@
 #!/bin/bash

-find $1 -name "*mlbench_*.yaml" -exec bash -c "BENCHMARK_FILE={} BENCHMARK_STACK=1000000000 BENCHMARK_NAME=MLBench $2" \;
+if [[ -d $1 ]]; then
+  # Execute all generated YAML files sequentially.
+  find "$1" -name "*mlbench_*.yaml" -exec bash -c "BENCHMARK_FILE={} BENCHMARK_STACK=1000000000 BENCHMARK_NAME=MLBench $2" \;
+else
+  # Execute only one of the YAML file.
+  bash -c "BENCHMARK_FILE=$1 BENCHMARK_STACK=1000000000 BENCHMARK_NAME=MLBench $2" \;
+fi